From 7fbe2cb0e692e15f86fa2e51d397c5351ec8d010 Mon Sep 17 00:00:00 2001 From: chaen Date: Fri, 17 Nov 2023 16:04:09 +0100 Subject: [PATCH] sweep: #7296 fix (ElasticSearchDB): convert exception object to string representation and FTS3 lifetime changes --- src/DIRAC/Core/Utilities/ElasticSearchDB.py | 2 +- src/DIRAC/DataManagementSystem/Agent/FTS3Agent.py | 5 ++++- src/DIRAC/DataManagementSystem/Client/FTS3Job.py | 8 +++----- src/DIRAC/DataManagementSystem/ConfigTemplate.cfg | 4 ++-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/DIRAC/Core/Utilities/ElasticSearchDB.py b/src/DIRAC/Core/Utilities/ElasticSearchDB.py index 465b17001a4..4d9897bc921 100644 --- a/src/DIRAC/Core/Utilities/ElasticSearchDB.py +++ b/src/DIRAC/Core/Utilities/ElasticSearchDB.py @@ -503,7 +503,7 @@ def bulk_index(self, indexPrefix, data=None, mapping=None, period="day", withTim res = bulk(client=self.client, index=indexName, actions=generateDocs(data, withTimeStamp)) except (BulkIndexError, RequestError) as e: sLog.exception() - return S_ERROR(e) + return S_ERROR(f"Failed to index by bulk {e!r}") if res[0] == len(data): # we have inserted all documents... diff --git a/src/DIRAC/DataManagementSystem/Agent/FTS3Agent.py b/src/DIRAC/DataManagementSystem/Agent/FTS3Agent.py index 3b59495578b..4151a3fffb8 100644 --- a/src/DIRAC/DataManagementSystem/Agent/FTS3Agent.py +++ b/src/DIRAC/DataManagementSystem/Agent/FTS3Agent.py @@ -48,7 +48,10 @@ AGENT_NAME = "DataManagement/FTS3Agent" # Lifetime in seconds of the proxy we download for submission -PROXY_LIFETIME = 43200 # 12 hours +# Because we force the redelegation if only a third is left, +# and we want to have a quiet night (~12h) +# let's make the lifetime 12*3 hours +PROXY_LIFETIME = 36 * 3600 # 36 hours # Instead of querying many jobs at once, # which maximizes the possibility of race condition diff --git a/src/DIRAC/DataManagementSystem/Client/FTS3Job.py b/src/DIRAC/DataManagementSystem/Client/FTS3Job.py index 5a8645d93e8..94f3e4b021a 100644 --- a/src/DIRAC/DataManagementSystem/Client/FTS3Job.py +++ b/src/DIRAC/DataManagementSystem/Client/FTS3Job.py @@ -763,15 +763,13 @@ def generateContext(ftsServer, ucert, lifetime=25200): # decides that there is not enough timeleft. # At the moment, this is 1 hour, which effectively means that if you do # not submit a job for more than 1h, you have no valid proxy in FTS servers - # anymore. In future release of FTS3, the delegation will be triggered when + # anymore, and all the jobs failed. So we force it when # one third of the lifetime will be left. # Also, the proxy given as parameter might have less than "lifetime" left # since it is cached, but it does not matter, because in the FTS3Agent # we make sure that we renew it often enough - # Finally, FTS3 has an issue with handling the lifetime of the proxy, - # because it does not check all the chain. This is under discussion - # https://its.cern.ch/jira/browse/FTS-1575 - fts3.delegate(context, lifetime=datetime.timedelta(seconds=lifetime)) + td_lifetime = datetime.timedelta(seconds=lifetime) + fts3.delegate(context, lifetime=td_lifetime, delegate_when_lifetime_lt=td_lifetime // 3) return S_OK(context) except FTS3ClientException as e: diff --git a/src/DIRAC/DataManagementSystem/ConfigTemplate.cfg b/src/DIRAC/DataManagementSystem/ConfigTemplate.cfg index f6b84a764a3..ef20117d498 100644 --- a/src/DIRAC/DataManagementSystem/ConfigTemplate.cfg +++ b/src/DIRAC/DataManagementSystem/ConfigTemplate.cfg @@ -144,8 +144,8 @@ Agents KickAssignedHours = 1 # Max number of kicks per cycle KickLimitPerCycle = 100 - # Lifetime in sec of the Proxy we download to delegate to FTS3 (default 12h) - ProxyLifetime = 43200 + # Lifetime in sec of the Proxy we download to delegate to FTS3 (default 36h) + ProxyLifetime = 129600 } ##END FTS3Agent }