Skip to content

Commit

Permalink
merge with main
Browse files Browse the repository at this point in the history
  • Loading branch information
patrick-troy committed Jan 30, 2025
2 parents b16e77e + 6ba0e99 commit 946fe48
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 20 deletions.
20 changes: 3 additions & 17 deletions liiatools/common/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,10 @@ def list_snapshots(self) -> Dict:
"""
List the snapshots in the archive.
"""
log.info("list snapshots method called")
try:
directories = sorted(self.fs.listdir("/"))
log.info(f"first look at directories: {directories}")
except Exception as e:
log.info(f"loading directories error: {e}")
except fs.errors.ResourceNotFound:
log.error(f"Resource not found error when listing snapshots")
return {}
la_snapshots = {}

Expand All @@ -117,22 +115,16 @@ def delete_snapshot(self, *snap_ids: str):
for snap_id in snap_ids:
self.fs.removetree(snap_id)

def current(
self, la_code: str, deduplicate_mode: Literal["E", "A", "N"] = "E"
) -> DataContainer:
def current(self, la_code: str, deduplicate_mode: Literal["E", "A", "N"] = "E") -> DataContainer:
"""
Get the current session as a datacontainer.
"""
log.info("current method called")
try:
directories = self.list_snapshots()
log.info(f"Directories: {directories}")
snap_ids = directories[la_code]
log.info(f"Snap IDs: {snap_ids}")
return self.combine_snapshots(snap_ids, deduplicate_mode)

except KeyError:
log.info("KeyError raised")
return

def load_snapshot(self, snap_id) -> DataContainer:
Expand All @@ -150,11 +142,8 @@ def load_snapshot(self, snap_id) -> DataContainer:
log.info(f"table id match: {table_spec.id}")
with self.fs.open(snap_id, "r") as f:
df = pd.read_csv(f)
log.info("csv read into pandas")
df = _normalise_table(df, table_spec)
log.info("dataframe has been normalised")
data[table_spec.id] = df
log.info("data saved to DataContainer")

return data

Expand All @@ -171,7 +160,6 @@ def combine_snapshots(
"""
assert deduplicate_mode in ["E", "A", "N"]
log.info(f"assert deduplicate mode correct: {deduplicate_mode}")

combined = DataContainer()
for snap_id in snap_ids:
Expand Down Expand Up @@ -222,9 +210,7 @@ def _combine_snapshots(
all_sources = []
for source in sources:
if table_id in source:
log.info(f"table id in source: {table_id}")
all_sources.append(source[table_id])
log.info("source appended to all_sources")

if len(all_sources) == 0:
continue
Expand Down
3 changes: 0 additions & 3 deletions liiatools_pipeline/ops/common_la.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ def create_concatenated_view(current: DataframeArchive, config: CleanConfig):
f"concatenated/{config.dataset}", recreate=True
)
existing_files = concat_folder.listdir("/")
log.info(f"Existing files: {existing_files}")

for la_code in authorities.codes:
la_files_regex = f"{la_code}_{config.dataset}_"
Expand All @@ -256,9 +255,7 @@ def create_concatenated_view(current: DataframeArchive, config: CleanConfig):
log.info(f"Successfully removed files")

if config.dataset == "annex_a":
log.info(f"Concatenating annex a data for {la_code}")
concat_data = current.current(la_code, deduplicate_mode="N")
log.info(f"Data concatenated")
else:
concat_data = current.current(la_code)

Expand Down

0 comments on commit 946fe48

Please sign in to comment.