Skip to content

Commit

Permalink
Add histological structure id saving to binary compressed format, and…
Browse files Browse the repository at this point in the history
… read out, and add indexing on it to the DataFrames.
  • Loading branch information
jimmymathews committed Sep 25, 2023
1 parent f78b46d commit 6d31196
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 10 deletions.
3 changes: 2 additions & 1 deletion spatialprofilingtoolbox/ondemand/compressed_matrix_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def _get_specimens_and_indices(

def _write_data_array_to_file(self, data_array: dict[int, int], filename: str) -> None:
with open(filename, 'wb') as file:
for entry in data_array:
for histological_structure_id, entry in data_array.items():
file.write(histological_structure_id.to_bytes(8, 'little'))
file.write(entry.to_bytes(8, 'little'))

def _report_subsample_for_inspection(self, data_arrays: CompressedDataArrays):
Expand Down
22 changes: 13 additions & 9 deletions spatialprofilingtoolbox/ondemand/providers/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,32 +96,36 @@ def _get_data_array_from_file(
target_index_lookup: dict,
target_by_symbol: dict,
) -> DataFrame:
"""Load data arrays from a precomputed JSON artifact."""
"""Load data arrays from a precomputed binary artifact."""
rows = []
target_index_lookup = cast(dict, target_index_lookup)
target_by_symbol = cast(dict, target_by_symbol)
feature_columns = cls._list_columns(target_index_lookup, target_by_symbol)
size = len(feature_columns)
with open(filename, 'rb') as file:
while True:
buffer = file.read(8)
row = cls._parse_cell_row(buffer, size)
buffer1 = file.read(8)
buffer2 = file.read(8)
row = cls._parse_cell_row(buffer1, buffer2, size)
if row is None:
break
rows.append(row)
return DataFrame(rows, columns=feature_columns + ['integer'])
df = DataFrame(rows, columns=feature_columns + ['integer', 'histological_structure_id'])
df.set_index('histological_structure_id', inplace=True)
return df

@staticmethod
def _parse_cell_row(buffer: bytes, size: int) -> tuple[int, ...] | None:
if buffer == b'':
def _parse_cell_row(buffer1: bytes, buffer2: bytes, size: int) -> tuple[int, ...] | None:
if buffer1 == b'':
return None
binary_expression_64_string = ''.join([
''.join(list(reversed(bin(ii)[2:].rjust(8, '0'))))
for ii in buffer
for ii in buffer2
])
truncated_to_channels = binary_expression_64_string[0:size]
integer = int.from_bytes(buffer, 'little')
return tuple([int(b) for b in list(truncated_to_channels)] + [integer])
integer_phenotypes = int.from_bytes(buffer2, 'little')
integer_id = int.from_bytes(buffer1, 'little')
return tuple([int(b) for b in list(truncated_to_channels)] + [integer_phenotypes] + [integer_id])

@staticmethod
def _add_centroids(
Expand Down

0 comments on commit 6d31196

Please sign in to comment.