Skip to content

Commit

Permalink
Merge pull request #196 from broadinstitute/development
Browse files Browse the repository at this point in the history
Release v1.8.7
  • Loading branch information
knapii-developments authored Dec 17, 2020
2 parents de1a015 + d06465a commit b720fc0
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 16 deletions.
25 changes: 12 additions & 13 deletions ingest/expression_files/mtx.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def get_gene_expression_data(file_path: str, file_handler):
return p1

@staticmethod
def is_sorted(file_path: str, file_handler):
def is_sorted(file_path: str, file_handler: IO):
"""Checks if a file is sorted by gene index"""
p1 = MTXIngestor.get_gene_expression_data(file_path, file_handler)
try:
Expand Down Expand Up @@ -185,19 +185,21 @@ def get_data_start_line_number(file_handler: IO) -> int:
Parameters:
___________
file_handler (IO): File handler that points to top of MTX file
file_handler (IO): File handler of MTX file that contains headers.
Returns
----------
i (IO): Line number where data starts
count (int): Line number where data starts
"""
for count, line in enumerate(file_handler):
# Move file pointer to top of file
file_handler.seek(0, 0)
for count, line in enumerate(file_handler, start=1):
if not line.startswith("%"):
try:
line_values = line.strip().split()
float(line_values[0]) # Determines if value is numeric
# First line w/o '%' is mtx dimension. So skip this line (+1)
return count + 2
return count + 1
except ValueError:
raise ValueError(
"Only header, comment lines starting with '%', and numeric data allowed in MTX file."
Expand Down Expand Up @@ -246,7 +248,8 @@ def sort_mtx(file_path, mtx_file_handler: IO) -> str:
Returns
----------
new_file_path (str) : Full path of newly sorted MTX file
new_file_path (str) : Full path of newly sorted MTX file. This file does not contain original headers
or MTX dimensions.
"""
file_name = os.path.basename(file_path)
file_name = os.path.splitext(file_name)[0]
Expand Down Expand Up @@ -298,17 +301,13 @@ def execute_ingest(self):
self.study_id, self.study_file_id, self.mongo_connection._client
):
self.is_raw_count = False
# Need fresh mtx file handler for get_data_start_line_number()
fresh_mtx_file_handler = self.resolve_path(self.mtx_path)[0]
if not MTXIngestor.is_sorted(self.mtx_path, fresh_mtx_file_handler):
new_mtx_file_path = MTXIngestor.sort_mtx(
self.mtx_path, fresh_mtx_file_handler
)
if not MTXIngestor.is_sorted(self.mtx_path, self.mtx_file):
new_mtx_file_path = MTXIngestor.sort_mtx(self.mtx_path, self.mtx_file)
# Reset mtx variables to newly sorted file
self.mtx_file, self.mtx_path = self.resolve_path(new_mtx_file_path)
else:
# Cell names are the only data stored for raw counts.
# Therefore, no need to sort files.
# Therefore, no need to determine if file is sorted or sort file.
self.is_raw_count = True
self.transform()

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="scp-ingest-pipeline",
version="1.8.4",
version="1.8.7",
description="ETL pipeline for single-cell RNA-seq data",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11200,7 +11200,6 @@
"FoobarAB2_BazMoo_7DABDADBDBADACDB-1",
"FoobarAB5_BazMoo_2DDBCCDBADBADCBC-1",
"FoobarAB7_BazMoo_7BCABCCCACBAADDC-1",
"FoobarAB2_BazMoo_3BCDCBCCBCCCCBAC-1",
"FoobarAB2_BazMoo_8BDBABBACDCCDDBD-1",
"FoobarAB7_BazMoo_1DBAACCBDDDCBCDB-1",
"FoobarAB3_BazMoo_1DABABDBDCCDBCBA-1",
Expand Down Expand Up @@ -11267,7 +11266,6 @@
2.0,
2.81,
1.58,
4352.0,
2.81,
2.81,
1.0,
Expand Down

0 comments on commit b720fc0

Please sign in to comment.