Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update defaults.py #539

Merged
merged 3 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion isatools/isatab/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def pbar(x):
_RX_I_FILE_NAME = compile(r'i_(.*?)\.txt')
_RX_DATA = compile(r'data\[(.*?)\]')
_RX_COMMENT = compile(r'Comment\[(.*?)\]')
_RX_DOI = compile(r'(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%"#? ])\\S)+)')
_RX_DOI = compile(r'10.\d{4,9}/[-._;()/:a-z0-9A-Z]+')
_RX_PMID = compile(r'[0-9]{8}')
_RX_PMCID = compile(r'PMC[0-9]{8}')
_RX_CHARACTERISTICS = compile(r'Characteristics\[(.*?)\]')
Expand Down
17 changes: 13 additions & 4 deletions tests/convert/test_isatab2w4m.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Test conversion to W4M format

import filecmp
import os
import shutil
import tempfile
Expand All @@ -9,6 +8,16 @@
from isatools.tests import utils


def universal_filecmp(f1, f2):
with open(f1, 'r') as fp1, open(f2, 'r') as fp2:
while True:
b1 = fp1.readline()
b2 = fp2.readline()
if b1 != b2:
return False
if not b1:
return True

# Test presence of data folder
def setUpModule():
if not os.path.exists(utils.DATA_DIR):
Expand Down Expand Up @@ -46,7 +55,7 @@ def plain_test(self, study, test_dir):
output_file = os.path.join(self._tmp_dir, '.'.join(
['-'.join([study, 'w4m', x]), 'tsv']))
self.assertTrue(os.path.exists(output_file))
self.assertTrue(filecmp.cmp(output_file, ref_file, shallow=False),
self.assertTrue(universal_filecmp(output_file, ref_file),
'Output file "{0}" differs from reference file "{1}".'.format(output_file, ref_file))

# Test MTBLS30
Expand Down Expand Up @@ -89,7 +98,7 @@ def na_filtering_test(self, study, test_dir, samp_na_filtering=None,
'sample-metadata', 'variable-metadata', 'sample-variable-matrix']:
self.assertTrue(os.path.exists(output_files[x]))
self.assertTrue(
filecmp.cmp(output_files[x], ref_files[x]),
universal_filecmp(output_files[x], ref_files[x]),
'Output file "{0}" differs from reference file "{1}".'.format(
output_files[x], ref_files[x]))

Expand Down Expand Up @@ -140,5 +149,5 @@ def test_assay_selection(self):
['-'.join([study, 'w4m', x, assay]), 'tsv']))
self.assertTrue(os.path.exists(output_file))
self.assertTrue(
filecmp.cmp(output_file, ref_file),
universal_filecmp(output_file, ref_file),
'Output file "{0}" differs from reference file "{1}".'.format(output_file, ref_file))
26 changes: 15 additions & 11 deletions tests/isatab/test_isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def setUpModule():
"git clone -b tests --single-branch [email protected]:ISA-tools/ISAdatasets {0}"
.format(utils.DATA_DIR))

def replace_windows_newlines(input_string):
return input_string.replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n')


class TestIsaMerge(unittest.TestCase):

Expand Down Expand Up @@ -1069,7 +1072,7 @@ def test_source_protocol_ref_sample(self):
i.studies = [s]
expected = """Source Name\tProtocol REF\tSample Name
source1\tsample collection\tsample1"""
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_source_protocol_ref_sample_x2(self):
i = Investigation()
Expand Down Expand Up @@ -1167,7 +1170,7 @@ def test_source_protocol_ref_sample_with_characteristics(self):
i.studies = [s]
expected = """Source Name\tCharacteristics[reference descriptor]\tProtocol REF\tSample Name\tCharacteristics[organism part]
source1\tnot applicable\tsample collection\tsample1\tliver"""
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_source_protocol_ref_sample_with_parameter_values(self):
i = Investigation()
Expand All @@ -1188,7 +1191,7 @@ def test_source_protocol_ref_sample_with_parameter_values(self):
i.studies = [s]
expected = """Source Name\tProtocol REF\tParameter Value[temperature]\tSample Name
source1\tsample collection\t10\tsample1"""
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_source_protocol_ref_sample_with_factor_values(self):
i = Investigation()
Expand Down Expand Up @@ -1216,11 +1219,11 @@ def test_source_protocol_ref_sample_with_factor_values(self):
s.assays = [a]
expected_study_table = """Source Name\tProtocol REF\tSample Name\tFactor Value[study group]
source1\tsample collection\tsample1\tStudy group 1"""
self.assertIn(expected_study_table, isatab.dumps(i))
self.assertIn(expected_study_table, replace_windows_newlines(isatab.dumps(i)))
expected_assay_table = """Sample Name\tFactor Value[study group]\tProtocol REF
sample1\tStudy group 1\textraction"""
self.assertIn(expected_assay_table,
isatab.dumps(i, write_fvs_in_assay_table=True))
replace_windows_newlines(isatab.dumps(i, write_fvs_in_assay_table=True)))

def test_source_protocol_ref_protocol_ref_sample(self):
i = Investigation()
Expand All @@ -1239,7 +1242,7 @@ def test_source_protocol_ref_protocol_ref_sample(self):
i.studies = [s]
expected = """Source Name\tProtocol REF\tProtocol REF\tSample Name
source1\tsample collection\taliquoting\taliquot1"""
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_source_protocol_ref_sample_protocol_ref_sample(self):
i = Investigation()
Expand All @@ -1261,7 +1264,7 @@ def test_source_protocol_ref_sample_protocol_ref_sample(self):
i.studies = [s]
expected = """Source Name\tProtocol REF\tSample Name\tProtocol REF\tSample Name
source1\tsample collection\tsample1\taliquoting\taliquot1"""
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_sample_protocol_ref_material_protocol_ref_data2(self):
i = Investigation()
Expand Down Expand Up @@ -1295,7 +1298,7 @@ def test_sample_protocol_ref_material_protocol_ref_data2(self):
i.studies = [s]
expected = (f"""Sample Name\tProtocol REF\tExtract Name\tProtocol REF\tAssay Name\tRaw Data File\tComment[checksum type]\tComment[checksum]\n""" +
f"""sample1\textraction\textract1\tnucleic acid sequencing\tassay-1\tdatafile.raw\t{cs_comment1.value}\t{cs_comment2.value}""")
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_sample_protocol_ref_material_protocol_ref_data3(self):
i = Investigation()
Expand Down Expand Up @@ -1334,7 +1337,7 @@ def test_sample_protocol_ref_material_protocol_ref_data3(self):

# self.assertIn(expected_line1, dump_out)
# self.assertIn(expected_line2, dump_out)
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_sample_protocol_ref_material_protocol_ref_data4(self):
i = Investigation()
Expand Down Expand Up @@ -1373,7 +1376,7 @@ def test_sample_protocol_ref_material_protocol_ref_data4(self):

# self.assertIn(expected_line1, dump_out)
# self.assertIn(expected_line2, dump_out)
self.assertIn(expected, isatab.dumps(i))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))

def test_sample_protocol_ref_material_protocol_ref_data_x2(self):
i = Investigation()
Expand Down Expand Up @@ -1710,7 +1713,7 @@ def test_isatab_preprocess_issue235(self):
test_isatab_str = b""""Sample Name" "Protocol REF" "Parameter Value[medium]" "Term Source REF" "Term Accession Number" "Parameter Value[serum]" "Term Source REF" "Term Accession Number" "Parameter Value[serum concentration]" "Unit" "Term Source REF" "Term Accession Number" "Parameter Value[medium volume]" "Unit" "Term Source REF" "Term Accession Number" "Parameter Value[migration modulator]" "Term Source REF" "Term Accession Number" "Parameter Value[modulator concentration]" "Unit" "Term Source REF" "Term Accession Number" "Parameter Value[modulator distribution]" "Term Source REF" "Term Accession Number" "Protocol REF" "Parameter Value[imaging technique]" "Term Source REF" "Term Accession Number" "Parameter Value[imaging technique temporal feature]" "Term Source REF" "Term Accession Number" "Parameter Value[acquisition duration]" "Unit" "Term Source REF" "Term Accession Number" "Parameter Value[time interval]" "Unit" "Term Source REF" "Term Accession Number" "Parameter Value[objective type]" "Term Source REF" "Term Accession Number" "Parameter Value[objective magnification]" "Term Source REF" "Term Accession Number" "Parameter Value[objective numerical aperture]" "Term Source REF" "Term Accession Number" "Parameter Value[acquisition channel count]" "Term Source REF" "Term Accession Number" "Parameter Value[reporter]" "Term Source REF" "Term Accession Number" "Parameter Value[voxel size]" "Unit" "Term Source REF" "Term Accession Number" "Assay Name" "Raw Data File" "Protocol REF" "Parameter Value[software]" "Term Source REF" "Term Accession Number" "Data Transformation Name" "Derived Data File"
"culture1" "migration assay" "RPMI-1640" "" "" "Heat Inactivated Fetal Bovine Serum " "" "" "10" "%" "UO" "http://purl.obolibrary.org/obo/UO_0000165" "300" "microliter" "UO" "http://purl.obolibrary.org/obo/UO_0000101" "" "" "" "" "" "" "" "gradient" "" "" "imaging" "phase-contrast microscopy" "" "" "dynamic" "" "" "6" "hour" "UO" "http://purl.obolibrary.org/obo/UO_0000032" "15" "minute" "UO" "http://purl.obolibrary.org/obo/UO_0000031" "" "" "" "20" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "culture1" "" "data transformation" "CELLMIA" "" "" "" ""
"""
with tempfile.NamedTemporaryFile() as tmp:
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp.write(test_isatab_str)
tmp.seek(0)
study_assay_parser = isatab_parser.StudyAssayParser('mock.txt')
Expand All @@ -1719,6 +1722,7 @@ def test_isatab_preprocess_issue235(self):
if """Protocol REF\tData Transformation Name""" in header:
self.fail('Incorrectly inserted Protocol REF before '
'Data Transformation Name')
os.remove(tmp.name)

def test_isatab_factor_value_parsing_issue270(self):
with open(os.path.join(self._tab_data_dir, 'issue270', 'i_matteo.txt'),
Expand Down
4 changes: 2 additions & 2 deletions tests/isatab/validate/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_b_ii_s_3(self):
data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'BII-S-3')
with open(path.join(data_path, 'i_gilbert.txt'), 'r') as data_file:
r = validate(fp=data_file, config_dir=self.default_conf, origin="")
self.assertEqual(len(r['warnings']), 12)
self.assertEqual(len(r['warnings']), 10)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two DOIs were actually valid, this is why there's only 10 warnings left instead of 12


def test_mtbls267(self):
data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'MTBLS267-partial')
Expand Down Expand Up @@ -82,7 +82,7 @@ def is_investigation(investigation_df):
data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'BII-S-3')
with open(path.join(data_path, 'i_gilbert.txt'), 'r') as data_file:
r = validate(data_file, rules=rules)
self.assertEqual(len(r['warnings']), 12)
self.assertEqual(len(r['warnings']), 10)

rule = '12000'
expected_error = {
Expand Down
19 changes: 9 additions & 10 deletions tests/validators/test_validate_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import unittest
import pathlib

from jsonschema import Draft4Validator
from jsonschema import RefResolver
Expand Down Expand Up @@ -304,8 +305,8 @@ class TestIsaJsonCreateTestData(unittest.TestCase):

def setUp(self):
self._reporting_level = logging.ERROR
self.v2_create_schemas_path = os.path.join(
os.path.dirname(__file__), '../..', 'isatools', 'resources', 'schemas',
self.v2_create_schemas_path = pathlib.PurePosixPath(
pathlib.Path(__file__).parents[0], '..', '..', 'isatools', 'resources', 'schemas',
'isa_model_version_2_0_schemas', 'create')

def test_validate_testdata_sampleassayplan_json(self):
Expand All @@ -314,10 +315,9 @@ def test_validate_testdata_sampleassayplan_json(self):
with open(os.path.join(self.v2_create_schemas_path,
'sample_assay_plan_schema.json')) as fp:
sample_assay_plan_schema = json.load(fp)
resolver = RefResolver('file://{}'.format(
os.path.join(self.v2_create_schemas_path,
'sample_assay_plan_schema.json')),
sample_assay_plan_schema)
res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path,
'sample_assay_plan_schema.json').as_uri()
resolver = RefResolver(res_path, sample_assay_plan_schema)
validator = Draft4Validator(sample_assay_plan_schema,
resolver=resolver)
validator.validate(json.load(test_case_fp))
Expand All @@ -342,10 +342,9 @@ def test_validate_testdata_treatment_sequence_json(self):
with open(os.path.join(self.v2_create_schemas_path,
'treatment_sequence_schema.json')) as fp:
treatment_sequence_schema = json.load(fp)
resolver = RefResolver('file://{}'.format(
os.path.join(self.v2_create_schemas_path,
'treatment_sequence_schema.json')),
treatment_sequence_schema)
res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path,
'treatment_sequence_schema.json').as_uri()
resolver = RefResolver(res_path, treatment_sequence_schema)
validator = Draft4Validator(treatment_sequence_schema,
resolver=resolver)
validator.validate(json.load(test_case_fp))
Expand Down
Loading