From 86197dc76ad0a9c22ec180dfe93b0d386dd90fc0 Mon Sep 17 00:00:00 2001 From: Merlin Fisher-Levine Date: Tue, 1 Aug 2023 15:09:05 -0700 Subject: [PATCH] Add docs --- python/lsst/summit/utils/blockUtils.py | 187 +++++++++++++++++++++++-- tests/test_blockUtils.py | 4 +- 2 files changed, 181 insertions(+), 10 deletions(-) diff --git a/python/lsst/summit/utils/blockUtils.py b/python/lsst/summit/utils/blockUtils.py index 6088d44d..6ceeaa85 100644 --- a/python/lsst/summit/utils/blockUtils.py +++ b/python/lsst/summit/utils/blockUtils.py @@ -40,7 +40,25 @@ @dataclass(slots=True, kw_only=True, frozen=True) class BlockInfo: - """The block info relating to a TMAEvent. + """Information about the execution of a "block". + + Each BlockInfo instance contains information about a single block + execution. This is identified by the block number and sequence number, + which, when combined with the dayObs, are exactly degenerate with the + blockId. + + Each BlockInfo instance contains the following information: + * The block ID - this is the primary identifier, as a string, for + example "BL52_20230615_02", which is parsed into: + * The block number, as an integer, for example 52, for "BLOCK-52". + * The dayObs, as an integer, for example 20230615. + * The seqNum - the execution number of that block on that day. + * The begin and end times of the block execution, as astropy.time.Time + * The SAL indices which were involved in the block execution, as a list + * The SITCOM tickets which were involved in the block execution, as a + list of strings, including the SITCOM- prefix. + * The states of the script during the block execution, as a list of + ``ScriptStatePoint`` instances. Parameters ---------- @@ -48,16 +66,24 @@ class BlockInfo: The block number, as an integer. blockId : `str` The block ID, as a string. + dayObs : `int` + The dayObs the block was run on. + seqNum : `int` + The sequence number of the block. + begin : `astropy.time.Time` + The time the block execution began. + end : `astropy.time.Time` + The time the block execution ended. salIndices : `list` of `int` One or more SAL indices, relating to the block. tickets : `list` of `str` One or more SITCOM tickets, relating to the block. - states : `list` of `ScriptStatePoint` + states : `list` of `lsst.summit.utils.blockUtils.ScriptStatePoint` The states of the script during the block. Each element is a ``ScriptStatePoint`` which contains: - the time, as an astropy.time.Time - the state, as a ``ScriptState`` enum - - the reason for state change, as a string + - the reason for state change, as a string, if present """ blockNumber: int blockId: str @@ -76,6 +102,10 @@ def __repr__(self): ) def _ipython_display_(self): + """This is the function which runs when someone executes a cell in a + notebook with just the class instance on its own, without calling + print() or str() on it. + """ print(self.__str__()) def __str__(self): @@ -95,6 +125,17 @@ def __str__(self): @dataclass(slots=True, kw_only=True, frozen=True) class ScriptStatePoint: + """The execution state of a script at a point in time. + + Parameters + ---------- + time : `astropy.time.Time` + The time of the state change. + state : `lsst.summit.utils.enums.ScriptState` + The state of the script at this point in time. + reason : `str` + The reason for the state change, if given. + """ time: Time state: ScriptState reason: str @@ -105,6 +146,10 @@ def __repr__(self): ) def _ipython_display_(self): + """This is the function which runs when someone executes a cell in a + notebook with just the class instance on its own, without calling + print() or str() on it. + """ print(self.__str__()) def __str__(self): @@ -113,10 +158,27 @@ def __str__(self): class BlockParser: + """A class to parse BLOCK data from the EFD. + + Information on executed blocks is stored in the EFD in the + ``lsst.sal.Script.logevent_state`` topic. This class parses that topic and + provides methods to get information on the blocks which were run on a given + dayObs. It also provides methods to get the events which occurred during a + given block, and also to get the block in which a specified event occurred, + if any. + + Parameters + ---------- + dayObs : `int` + The dayObs to get the block data for. + client : `lsst_efd_client.efd_client.EfdClient`, optional + The EFD client to use. If not specified, a new one is created. + """ + def __init__(self, dayObs, client=None): - t0 = time.time() - self.client = client self.dayObs = dayObs + + self.client = client if client is None: self.client = makeEfdClient() @@ -129,10 +191,15 @@ def __init__(self, dayObs, client=None): print(f"Parsing data took {(time.time()-t0):.5f} seconds") def getDataForDayObs(self): + """Retrieve the data for the specified dayObs from the EFD. + """ data = getEfdData(self.client, 'lsst.sal.Script.logevent_state', dayObs=self.dayObs) # , prePadding=86400*365) self.data = data def augmentDataSlow(self): + """Parse each row in the data frame individually, pulling the + information out into its own columns. + """ data = self.data blockPattern = r"BLOCK-(\d+)" blockIdPattern = r"BL\d+(?:_\w+)+" @@ -160,6 +227,13 @@ def augmentDataSlow(self): data.loc[index, 'blockSeqNum'] = blockSeqNum def augmentData(self): + """Parse the dataframe using vectorized methods, pulling the + information out into its own columns. + + This method is much faster for large dataframes than augmentDataSlow, + but is also much harder to maintain/debug, as the vectorized regexes + are hard to work with, and to know which row is causing problems. + """ data = self.data blockPattern = r"BLOCK-(\d+)" blockIdPattern = r"(BL\d+(?:_\w+)+)" @@ -176,24 +250,90 @@ def augmentData(self): data['blockSeqNum'] = blockIdSplit[3].astype(float).astype(pd.Int64Dtype()) def _listColumnValues(self, column, removeNone=True): + """Get all the different values for the specified column, as a list. + + Parameters + ---------- + column : `str` + The column to get the values for. + removeNone : `bool` + Whether to remove None from the list of values. + + Returns + ------- + values : `list` + The values for the specified column. + """ values = set(self.data[column].dropna()) if None in values and removeNone: values.remove(None) return sorted(values) - def getBlockNums(self,): + def getBlockNums(self): + """Get the block numbers which were run on the specified dayObs. + + Returns + ------- + blockNums : `list` of `int` + The blocks which were run on the specified dayObs. + """ return self._listColumnValues('blockNum') def getSeqNums(self, block): + """Get the seqNums for the specified block. + + Parameters + ---------- + block : `int` + The block number to get the events for. + + Returns + ------- + seqNums : `list` of `int` + The sequence numbers for the specified block. + """ return sorted(set(self.data[self.data['blockNum'] == block]['blockSeqNum'])) def getRows(self, block, seqNum=None): + """Get all rows of data which relate to the specified block. + + If the seqNum is specified, only the rows for that sequence number are + returned, otherwise all the rows relating to any block execution that + day are returned. + + Parameters + ---------- + block : `int` + The block number to get the events for. + seqNum : `int`, optional + The sequence number, if specified, to get the row data for. If not + specified, all data for the specified block is returned. + + Returns + ------- + data : `pandas.DataFrame` + The row data. + """ rowsForBlock = self.data[self.data['blockNum'] == block] if seqNum is None: return rowsForBlock return rowsForBlock[rowsForBlock['blockSeqNum'] == seqNum] - def getBlockEvolution(self, block, seqNum=None): + def printBlockEvolution(self, block, seqNum=None): + """Display the evolution of the specified block. + + If the seqNum is specified, the evolution of that specific block + exection is displayed, otherwise all executions of that block are + printed. + + Parameters + ---------- + block : `int` + The block number to get the events for. + seqNum : `int`, optional + The sequence number, if specified, to print the evolution of. If + not specified, all sequence numbers for the block are printed. + """ if seqNum is None: seqNums = self.getSeqNums(block) else: @@ -204,6 +344,21 @@ def getBlockEvolution(self, block, seqNum=None): print(blockInfo, '\n') def getBlockInfo(self, block, seqNum): + """XXX change this to use the already-parsed columns! + Maybe leave the SITCOM parsing in here though?! + + Parameters + ---------- + block : `int` + The block number. + seqNum : `int` + The sequence number. + + Returns + ------- + blockInfo : `lsst.summit.utils.blockUtils.BlockInfo` + The block info. + """ rows = self.getRows(block, seqNum=seqNum) if rows.empty: print(f'No {seqNum=} on dayObs={self.dayObs} for {block=}') @@ -249,6 +404,22 @@ def getBlockInfo(self, block, seqNum): return blockInfo def getEventsForBlock(self, events, block, seqNum): + """Get the events which occurred during the specified block. + + Parameters + ---------- + events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` + The list of candidate events. + block : `int` + The block number to get the events for. + seqNum : `int` + The sequence number to get the events for. + + Returns + ------- + events : `list` of `lsst.summit.utils.tmaUtils.TMAEvent` + The events. + """ blockInfo = self.getBlockInfo(block, seqNum) begin = blockInfo.begin end = blockInfo.end @@ -259,6 +430,6 @@ def getEventsForBlock(self, events, block, seqNum): # start/end too return [e for e in events if e.end >= begin and e.begin <= end] - def getBlockForEvent(self, event): + def getBlocksForEvent(self, event): raise NotImplementedError diff --git a/tests/test_blockUtils.py b/tests/test_blockUtils.py index 9d8e1e27..c874431c 100644 --- a/tests/test_blockUtils.py +++ b/tests/test_blockUtils.py @@ -66,14 +66,14 @@ def test_parsing(self): found = self.blockParser.getSeqNums(block) self.assertTrue(all(isinstance(s, int) for s in found)) self.assertEqual(found, seqNums) - self.blockParser.getBlockEvolution(block) + self.blockParser.printBlockEvolution(block) for seqNum in seqNums: data = self.blockParser.getRows(block, seqNum) self.assertIsInstance(data, pd.DataFrame) self.assertGreater(len(data), 0) self.blockParser.getBlockInfo(block=block, seqNum=seqNum) - self.blockParser.getBlockEvolution(block, seqNum=seqNum) + self.blockParser.printBlockEvolution(block, seqNum=seqNum) def test_notFoundBehavior(self): # no block data on this day so check init doesn't raise