Add support for microarch 8855 (#8860)

* accept jobminuarch arg in submit and store in tm_job_min_microarch * set required_minimum_micro_arch in job JDL * pylint
dmwm · Dec 11, 2024 · 68ef911 · 68ef911
1 parent 35d85e4
commit 68ef911
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 48 deletions.
diff --git a/etc/updateOracle.sql b/etc/updateOracle.sql
@@ -125,22 +125,25 @@
 -- ALTER TABLE tasks ADD tm_user_config CLOB;
 
 --allow NULL in soon-to-be-removed columns
-alter table filetransfersdb modify TM_REST_URI varchar(1000) NULL;
-alter table filetransfersdb modify TM_REST_HOST varchar(1000) NULL;
+-- alter table filetransfersdb modify TM_REST_URI varchar(1000) NULL;
+-- alter table filetransfersdb modify TM_REST_HOST varchar(1000) NULL;
 
 -- Add new columns to allow RUCIO_Transfers communications with Publisher
-ALTER TABLE filetransfersdb ADD tm_dbs_blockname VARCHAR(1000);
-ALTER TABLE filetransfersdb ADD tm_block_complete VARCHAR(10);
+-- ALTER TABLE filetransfersdb ADD tm_dbs_blockname VARCHAR(1000);
+-- ALTER TABLE filetransfersdb ADD tm_block_complete VARCHAR(10);
 
 --Add Rucio ASO's transfer container name and rule id)
-ALTER TABLE tasks ADD tm_transfer_container VARCHAR(1000);
-ALTER TABLE tasks ADD tm_transfer_rule VARCHAR(255);
-ALTER TABLE tasks ADD tm_publish_rule VARCHAR(255);
+-- ALTER TABLE tasks ADD tm_transfer_container VARCHAR(1000);
+-- ALTER TABLE tasks ADD tm_transfer_rule VARCHAR(255);
+-- ALTER TABLE tasks ADD tm_publish_rule VARCHAR(255);
 
 --Add Rucio ASO's json kv for store container names and its rules.
-ALTER TABLE tasks ADD tm_multipub_rule CLOB;
+-- ALTER TABLE tasks ADD tm_multipub_rule CLOB;
 
 --Drop unused columns related to "panda", see issue #8418
 -- alter table tasks set unused ( PANDA_RESUBMITTED_JOBS, PANDA_JOBSET_ID );
 -- ALTER TABLE tasks DROP UNUSED COLUMNS;
 
+--Add support for scram microarchitecture
+alter table tasks add tm_job_min_microarch varchar(255) default 'any';
+
diff --git a/src/python/CRABInterface/DataUserWorkflow.py b/src/python/CRABInterface/DataUserWorkflow.py
@@ -97,6 +97,7 @@ def submit(self, *args, **kwargs):
            :arg str jobtype: job type of the workflow, usually Analysis;
            :arg str jobsw: software requirement;
            :arg str jobarch: software architecture (=SCRAM_ARCH);
+           :arg str jobminuarch: minimum required microarchitecture (=SCRAM_MIN_SUPPORTED_MICROARCH);
            :arg str inputdata: input dataset;
            :arg str primarydataset: primary dataset;
            :arg str nonvaliddata: allow invalid input dataset;

diff --git a/src/python/CRABInterface/DataWorkflow.py b/src/python/CRABInterface/DataWorkflow.py
@@ -90,8 +90,8 @@ def output(self, workflow, howmany, jobids):
         raise NotImplementedError
 
     @conn_handler(services=['centralconfig'])
-    def submit(self, workflow, activity, jobtype, jobsw, jobarch, use_parent, secondarydata, generator, events_per_lumi, siteblacklist,
-               sitewhitelist, splitalgo, algoargs, cachefilename, cacheurl, addoutputfiles,
+    def submit(self, workflow, activity, jobtype, jobsw, jobarch, jobminuarch, use_parent, secondarydata, generator,
+               events_per_lumi, siteblacklist, sitewhitelist, splitalgo, algoargs, cachefilename, cacheurl, addoutputfiles,
                username, userdn, savelogsflag, publication, publishname, publishname2, asyncdest, dbsurl, publishdbsurl, vorole, vogroup, tfileoutfiles, edmoutfiles,
                runs, lumis, totalunits, adduserfiles, oneEventMode=False, maxjobruntime=None, numcores=None, maxmemory=None, priority=None, lfn=None,
                ignorelocality=None, saveoutput=None, faillimit=10, userfiles=None, scriptexe=None, scriptargs=None,
@@ -104,6 +104,7 @@ def submit(self, workflow, activity, jobtype, jobsw, jobarch, use_parent, second
            :arg str jobtype: job type of the workflow, usually Analysis;
            :arg str jobsw: software requirement;
            :arg str jobarch: software architecture (=SCRAM_ARCH);
+           :arg str jobminuarch: minimum required microarchitecture (=SCRAM_MIN_SUPPORTED_MICROARCH);
            :arg str inputdata: input dataset;
            :arg str primarydataset: primary dataset;
            :arg str nonvaliddata: allow invalid input dataset;
@@ -180,6 +181,7 @@ def submit(self, workflow, activity, jobtype, jobsw, jobarch, use_parent, second
                             task_failure    = [''],
                             job_sw          = [jobsw],
                             job_arch        = [jobarch],
+                            job_min_microarch = [jobminuarch],
                             input_dataset   = [inputdata],
                             primary_dataset = [primarydataset],
                             nonvalid_data   = ['T' if nonvaliddata else 'F'],

diff --git a/src/python/CRABInterface/RESTUserWorkflow.py b/src/python/CRABInterface/RESTUserWorkflow.py
@@ -15,15 +15,16 @@
 # CRABServer dependecies here
 from CRABInterface.DataUserWorkflow import DataUserWorkflow
 from CRABInterface.RESTExtensions import authz_owner_match
-from CRABInterface.Regexps import (RX_TASKNAME, RX_ACTIVITY, RX_JOBTYPE, RX_GENERATOR, RX_LUMIEVENTS, RX_CMSSW, RX_ARCH, RX_DATASET,
-    RX_CMSSITE, RX_SPLIT, RX_CACHENAME, RX_CACHEURL, RX_LFN, RX_USERFILE, RX_VOPARAMS, RX_DBSURL, RX_LFNPRIMDS, RX_OUTFILES,
-    RX_RUNS, RX_LUMIRANGE, RX_SCRIPTARGS, RX_SCHEDD_NAME, RX_COLLECTOR, RX_SUBRESTAT, RX_JOBID, RX_ADDFILE,
-    RX_ANYTHING, RX_USERNAME, RX_DATE, RX_MANYLINES_SHORT, RX_CUDA_VERSION, RX_BLOCK, RX_RUCIODID, RX_RUCIOSCOPE)
+from CRABInterface.Regexps import (RX_TASKNAME, RX_ACTIVITY, RX_JOBTYPE, RX_GENERATOR, RX_LUMIEVENTS, RX_CMSSW,
+                                   RX_ARCH, RX_MICROARCH, RX_DATASET, RX_CMSSITE, RX_SPLIT, RX_CACHENAME,
+                                   RX_CACHEURL, RX_LFN, RX_USERFILE, RX_VOPARAMS, RX_DBSURL, RX_LFNPRIMDS, RX_OUTFILES,
+                                   RX_RUNS, RX_LUMIRANGE, RX_SCRIPTARGS, RX_SCHEDD_NAME, RX_COLLECTOR, RX_SUBRESTAT,
+                                   RX_JOBID, RX_ADDFILE, RX_ANYTHING, RX_USERNAME, RX_DATE, RX_MANYLINES_SHORT,
+                                   RX_CUDA_VERSION, RX_BLOCK, RX_RUCIODID, RX_RUCIOSCOPE)
 from CRABInterface.Utilities import CMSSitesCache, conn_handler, getDBinstance, validate_dict
 from ServerUtilities import checkOutLFN, generateTaskName
 
 
-
 class RESTUserWorkflow(RESTEntity):
     """REST entity for workflows from the user point of view and relative subresources"""
 
@@ -151,17 +152,15 @@ def _checkPublishDataName2(kwargs, outlfn, requestname, username):
         #saves that in kwargs since it's what we want
         kwargs['publishname2'] = outputDatasetTagToCheck
 
-        """
-        ##Determine if it's a dataset that will go into a group space and therefore the (group)username prefix it will be used
-        if 'publishgroupname' in kwargs and int(kwargs['publishgroupname']): #the first half of the if is for backward compatibility
-            if not (outlfn.startswith('/store/group/') and outlfn.split('/')[3]):
-                msg = "Parameter 'publishgroupname' is True,"
-                msg += " but parameter 'lfn' does not start with '/store/group/<groupname>'."
-                raise InvalidParameter(msg)
-            group_user_prefix = outlfn.split('/')[3]
-        else:
-            group_user_prefix = username
-        """
+        # ##Determine if it's a dataset that will go into a group space and therefore the (group)username prefix it will be used
+        # if 'publishgroupname' in kwargs and int(kwargs['publishgroupname']): #the first half of the if is for backward compatibility
+        #     if not (outlfn.startswith('/store/group/') and outlfn.split('/')[3]):
+        #         msg = "Parameter 'publishgroupname' is True,"
+        #         msg += " but parameter 'lfn' does not start with '/store/group/<groupname>'."
+        #         raise InvalidParameter(msg)
+        #     group_user_prefix = outlfn.split('/')[3]
+        # else:
+        #     group_user_prefix = username
 
         outputDatasetTagToCheck = "%s-%s" % (username, outputDatasetTagToCheck)
         try:
@@ -282,6 +281,7 @@ def validate(self, apiobj, method, api, param, safe): #pylint: disable=unused-ar
             validate_str("jobsw", param, safe, RX_CMSSW, optional=False)
             validate_num("nonprodsw", param, safe, optional=False)
             validate_str("jobarch", param, safe, RX_ARCH, optional=False)
+            validate_str("jobminuarch", param, safe, RX_MICROARCH, optional=True)
             if not safe.kwargs["nonprodsw"]: #if the user wants to allow non-production releases
                 self._checkReleases(safe.kwargs['jobarch'], safe.kwargs['jobsw'])
             validate_num("useparent", param, safe, optional=True)
@@ -513,7 +513,8 @@ def validate(self, apiobj, method, api, param, safe): #pylint: disable=unused-ar
 
     @restcall
     #@getUserCert(headers=cherrypy.request.headers)
-    def put(self, workflow, activity, jobtype, jobsw, jobarch, inputdata, primarydataset, nonvaliddata, useparent, secondarydata, generator, eventsperlumi,
+    def put(self, workflow, activity, jobtype, jobsw, jobarch, jobminuarch, inputdata, primarydataset, nonvaliddata,
+            useparent, secondarydata, generator, eventsperlumi,
             siteblacklist, sitewhitelist, splitalgo, algoargs, cachefilename, debugfilename, cacheurl, addoutputfiles,
             savelogsflag, publication, publishname, publishname2, asyncdest, dbsurl, publishdbsurl, vorole, vogroup,
             tfileoutfiles, edmoutfiles, runs, lumis,
@@ -527,6 +528,7 @@ def put(self, workflow, activity, jobtype, jobsw, jobarch, inputdata, primarydat
            :arg str jobtype: job type of the workflow, usually Analysis;
            :arg str jobsw: software requirement;
            :arg str jobarch: software architecture (=SCRAM_ARCH);
+           :arg str jobminuarch: minimum required microarchitecture (=SCRAM_MIN_SUPPORTED_MICROARCH);
            :arg str inputdata: input dataset;
            :arg str primarydataset: primary dataset;
            :arg str nonvaliddata: allow invalid input dataset;
@@ -587,19 +589,25 @@ def put(self, workflow, activity, jobtype, jobsw, jobarch, inputdata, primarydat
         }
 
         return self.userworkflowmgr.submit(workflow=workflow, activity=activity, jobtype=jobtype, jobsw=jobsw, jobarch=jobarch,
-                                           inputdata=inputdata, primarydataset=primarydataset, nonvaliddata=nonvaliddata, use_parent=useparent,
+                                           jobminuarch=jobminuarch, inputdata=inputdata, primarydataset=primarydataset,
+                                           nonvaliddata=nonvaliddata, use_parent=useparent,
                                            secondarydata=secondarydata, generator=generator, events_per_lumi=eventsperlumi,
-                                           siteblacklist=siteblacklist, sitewhitelist=sitewhitelist, splitalgo=splitalgo, algoargs=algoargs,
-                                           cachefilename=cachefilename, debugfilename=debugfilename, cacheurl=cacheurl,
-                                           addoutputfiles=addoutputfiles, userdn=cherrypy.request.user['dn'],
-                                           username=cherrypy.request.user['login'], savelogsflag=savelogsflag, vorole=vorole, vogroup=vogroup,
-                                           publication=publication, publishname=publishname, publishname2=publishname2, asyncdest=asyncdest,
-                                           dbsurl=dbsurl, publishdbsurl=publishdbsurl, tfileoutfiles=tfileoutfiles,
-                                           edmoutfiles=edmoutfiles, runs=runs, lumis=lumis, totalunits=totalunits, adduserfiles=adduserfiles, oneEventMode=oneEventMode,
-                                           maxjobruntime=maxjobruntime, numcores=numcores, maxmemory=maxmemory, priority=priority, lfn=lfn,
-                                           ignorelocality=ignorelocality, saveoutput=saveoutput, faillimit=faillimit, userfiles=userfiles,
-                                           scriptexe=scriptexe, scriptargs=scriptargs, scheddname=scheddname, extrajdl=extrajdl, collector=collector, dryrun=dryrun,
-                                           submitipaddr=cherrypy.request.headers['X-Forwarded-For'], ignoreglobalblacklist=ignoreglobalblacklist, user_config=user_config)
+                                           siteblacklist=siteblacklist, sitewhitelist=sitewhitelist, splitalgo=splitalgo,
+                                           algoargs=algoargs, cachefilename=cachefilename, debugfilename=debugfilename,
+                                           cacheurl=cacheurl, addoutputfiles=addoutputfiles, userdn=cherrypy.request.user['dn'],
+                                           username=cherrypy.request.user['login'], savelogsflag=savelogsflag,
+                                           vorole=vorole, vogroup=vogroup,
+                                           publication=publication, publishname=publishname, publishname2=publishname2,
+                                           asyncdest=asyncdest, lfn=lfn,
+                                           dbsurl=dbsurl, publishdbsurl=publishdbsurl,
+                                           tfileoutfiles=tfileoutfiles, edmoutfiles=edmoutfiles, runs=runs, lumis=lumis,
+                                           totalunits=totalunits, adduserfiles=adduserfiles, oneEventMode=oneEventMode,
+                                           maxjobruntime=maxjobruntime, numcores=numcores, maxmemory=maxmemory, priority=priority,
+                                           ignorelocality=ignorelocality, saveoutput=saveoutput, faillimit=faillimit,
+                                           userfiles=userfiles, scriptexe=scriptexe, scriptargs=scriptargs,
+                                           scheddname=scheddname, extrajdl=extrajdl, collector=collector, dryrun=dryrun,
+                                           submitipaddr=cherrypy.request.headers['X-Forwarded-For'],
+                                           ignoreglobalblacklist=ignoreglobalblacklist, user_config=user_config)
 
     @restcall
     def post(self, workflow, subresource, publication, jobids, force, siteblacklist, sitewhitelist, maxjobruntime, maxmemory,

diff --git a/src/python/CRABInterface/Regexps.py b/src/python/CRABInterface/Regexps.py
@@ -41,6 +41,7 @@
 RX_LUMIEVENTS = re.compile(r'^\d+$')
 RX_CMSSW     = re.compile(r"^(?=.{0,255}$)CMSSW[a-zA-Z0-9-_]*$") #using a lookahead (?=.{0,255}$) to check maximum size of the regex
 RX_ARCH      = re.compile(r"^(?=.{0,255}$)[a-z]+[0-9]{1,2}_[a-z0-9]+_gcc[a-z0-9]+(_[a-z0-9]+)?$")
+RX_MICROARCH = re.compile(r"^(?=.{0,255}$)[a-zA-Z0-9\-\_\.]+$")
 RX_DATASET   = re.compile(DATASET_RE) #See https://github.com/dmwm/WMCore/issues/6054#issuecomment-135475550
 RX_LFNPRIMDS = re.compile(r"^%(primDS)s$" % lfnParts)
 RX_BLOCK     = re.compile(r"^(/[a-zA-Z0-9\.\-_]{1,100}){3}#[a-zA-Z0-9\.\-_]{1,100}$")

diff --git a/src/python/Databases/TaskDB/Oracle/Create.py b/src/python/Databases/TaskDB/Oracle/Create.py
@@ -41,6 +41,7 @@ def __init__(self, logger=None, dbi=None, param=None):
         tm_task_failure CLOB,
         tm_job_sw VARCHAR(255) NOT NULL,
         tm_job_arch VARCHAR(255),
+        tm_job_min_microarch VARCHAR(255) DEFAULT 'any',
         tm_input_dataset VARCHAR(500),
         tm_nonvalid_input_dataset VARCHAR(1) DEFAULT 'T',
         tm_use_parent NUMBER(1),

diff --git a/src/python/Databases/TaskDB/Oracle/Task/Task.py b/src/python/Databases/TaskDB/Oracle/Task/Task.py
@@ -48,7 +48,7 @@ class Task(object):
     #New
     New_sql = "INSERT INTO tasks ( \
               tm_taskname, tm_activity, tm_task_status, tm_task_command, tm_start_time, tm_task_failure, tm_job_sw, \
-              tm_job_arch, tm_input_dataset, tm_primary_dataset, tm_nonvalid_input_dataset, tm_use_parent, tm_secondary_input_dataset, tm_site_whitelist, tm_site_blacklist, \
+              tm_job_arch, tm_job_min_microarch, tm_input_dataset, tm_primary_dataset, tm_nonvalid_input_dataset, tm_use_parent, tm_secondary_input_dataset, tm_site_whitelist, tm_site_blacklist, \
               tm_split_algo, tm_split_args, tm_totalunits, tm_user_sandbox, tm_debug_files, tm_cache_url, tm_username, tm_user_dn, \
               tm_user_vo, tm_user_role, tm_user_group, tm_publish_name, tm_asyncdest, tm_dbs_url, tm_publish_dbs_url, \
               tm_publication, tm_outfiles, tm_tfile_outfiles, tm_edm_outfiles, tm_job_type, tm_generator, tm_arguments, \
@@ -57,7 +57,7 @@ class Task(object):
               tm_user_files, tm_transfer_outputs, tm_output_lfn, tm_ignore_locality, tm_fail_limit, tm_one_event_mode, tm_submitter_ip_addr, tm_ignore_global_blacklist, \
               tm_user_config) \
               VALUES (:task_name, :task_activity, upper(:task_status), upper(:task_command), SYS_EXTRACT_UTC(SYSTIMESTAMP), :task_failure, :job_sw, \
-              :job_arch, :input_dataset, :primary_dataset, :nonvalid_data, :use_parent, :secondary_dataset, :site_whitelist, :site_blacklist, \
+              :job_arch, :job_min_microarch, :input_dataset, :primary_dataset, :nonvalid_data, :use_parent, :secondary_dataset, :site_whitelist, :site_blacklist, \
               :split_algo, :split_args, :total_units, :user_sandbox, :debug_files, :cache_url, :username, :user_dn, \
               :user_vo, :user_role, :user_group, :publish_name, :asyncdest, :dbs_url, :publish_dbs_url, \
               :publication, :outfiles, :tfile_outfiles, :edm_outfiles, :job_type, :generator, :arguments, \
@@ -68,7 +68,7 @@ class Task(object):
 
     GetReadyTasks_tuple = namedtuple("GetReadyTasks", ["tm_taskname", "tm_task_status", "tm_task_command", \
                        "tm_start_time", "tm_start_injection", "tm_end_injection", \
-                       "tm_task_failure", "tm_job_sw", "tm_job_arch", "tm_input_dataset", "tm_DDM_reqid", \
+                       "tm_task_failure", "tm_job_sw", "tm_job_arch", "tm_job_min_microarch", "tm_input_dataset", "tm_DDM_reqid", \
                        "tm_site_whitelist", "tm_site_blacklist", "tm_split_algo", "tm_split_args", \
                        "tm_totalunits", "tm_user_sandbox", "tm_debug_files", "tm_cache_url", "tm_username", "tm_user_dn", "tm_user_vo", \
                        "tm_user_role", "tm_user_group", "tm_publish_name", "tm_asyncdest", "tm_dbs_url", \
@@ -83,7 +83,7 @@ class Task(object):
     #GetReadyTasks
     GetReadyTasks_sql = """SELECT tm_taskname, tm_task_status, tm_task_command, \
                        tm_start_time, tm_start_injection, tm_end_injection, \
-                       tm_task_failure, tm_job_sw, tm_job_arch, tm_input_dataset, tm_DDM_reqid, \
+                       tm_task_failure, tm_job_sw, tm_job_arch, tm_job_min_microarch, tm_input_dataset, tm_DDM_reqid, \
                        tm_site_whitelist, tm_site_blacklist, tm_split_algo, tm_split_args, \
                        tm_totalunits, tm_user_sandbox, tm_debug_files, tm_cache_url, tm_username, tm_user_dn, tm_user_vo, \
                        tm_user_role, tm_user_group, tm_publish_name, tm_asyncdest, tm_dbs_url, \