Skip to content

Commit

Permalink
linting
Browse files Browse the repository at this point in the history
  • Loading branch information
kpedro88 committed Jan 18, 2025
1 parent 50ac8a7 commit a95e0bd
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions get_files_on_disk.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#!/usr/bin/env python3

import os,sys,getpass,warnings,glob,shlex,subprocess,argparse
"""Returns a list of files from a dataset including only files that are hosted on disk."""

import os,sys,getpass,warnings,glob,shlex,subprocess,argparse # pylint: disable=multiple-imports
from collections import defaultdict

def getOS():
"""Gets OS version from shell (other methods return host OS when in container)"""
cmd = r"sed -nr 's/[^0-9]*([0-9]+).*/\1/p' /etc/redhat-release"
osv = subprocess.check_output(shlex.split(cmd), encoding="utf-8").rstrip()
return osv

def getHosted(dataset):
"""Gets list of files on disk for a dataset, and list of sites along with how many files each site has"""
osv = getOS()
rucio_path = f'/cvmfs/cms.cern.ch/rucio/x86_64/rhel{osv}/py3/current'
os.environ['RUCIO_HOME'] = rucio_path
Expand All @@ -17,7 +21,7 @@ def getHosted(dataset):
sys.path.insert(0,full_rucio_path+'/site-packages/')

warnings.filterwarnings("ignore", message=".*cryptography.*")
from rucio.client.client import Client
from rucio.client.client import Client # pylint: disable=import-error,import-outside-toplevel
client = Client()

# loop over blocks to avoid timeout error from too-large response
Expand All @@ -27,12 +31,13 @@ def getHosted(dataset):
nblocks = 10
block_groups = [all_blocks[i:i+nblocks] for i in range(0, len(all_blocks), nblocks)]

from rucio.client.replicaclient import ReplicaClient
from rucio.client.replicaclient import ReplicaClient # pylint: disable=import-error,import-outside-toplevel
rep_client = ReplicaClient()

filelist = set()
sitelist = defaultdict(int)
sitecond = lambda site: "_Tape" not in site
def sitecond(site):
return "_Tape" not in site
for block_group in block_groups:
reps = list(rep_client.list_replicas([{'scope': 'cms', 'name': block['name']} for block in block_group]))
for rep in reps:
Expand All @@ -45,15 +50,16 @@ def getHosted(dataset):
return filelist, sitelist

def main(dataset, outfile=None, verbose=False):
"""Prints file list and site list"""
filelist, sitelist = getHosted(dataset)

if verbose:
print("Site list:")
print("\n".join(f'{k}: {v}' for k,v in sitelist.items()))

file = open(outfile,'w') if outfile is not None else sys.stdout
file = open(outfile,'w') if outfile is not None else sys.stdout # pylint: disable=consider-using-with,unspecified-encoding
print("\n".join(filelist), file=file)
if outfile is not None: file.close()
if outfile is not None: file.close() # pylint: disable=multiple-statements

if __name__=="__main__":
parser = argparse.ArgumentParser(
Expand Down

0 comments on commit a95e0bd

Please sign in to comment.