-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add version metadata to CytoTable Parquet output (#134)
* add version detection utility * manage semver with poetry-dynamic-versioning * comments to further describe what's happening * update github actions workflows and simplify * remove version util and lint * update pre-commit check versions * add docs on semver for release publishing process * move setup-poetry appropriately * correct action location * readd version getter util and test * add metadata writer * simplify metadata parquet write util * add a test for _write_parquet_table_with_metadata * move to constants module for reuse capabilities * update convert with constants and new writer fxn * add tool.setuptools_scm to avoid warnings * linting update * move dunamai to dev deps and update try block * Apply suggestions from code review Co-authored-by: Gregory Way <[email protected]> * add additional notes about release drafts * linting * expand docs on kwargs * add colons to docstring --------- Co-authored-by: Gregory Way <[email protected]>
- Loading branch information
Showing
7 changed files
with
221 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
""" | ||
CytoTable: constants - storing various constants to be used throughout cytotable. | ||
""" | ||
|
||
import multiprocessing | ||
import os | ||
from typing import cast | ||
|
||
from cytotable.utils import _get_cytotable_version | ||
|
||
# read max threads from environment if necessary | ||
# max threads will be used with default Parsl config and Duckdb | ||
MAX_THREADS = ( | ||
multiprocessing.cpu_count() | ||
if "CYTOTABLE_MAX_THREADS" not in os.environ | ||
else int(cast(int, os.environ.get("CYTOTABLE_MAX_THREADS"))) | ||
) | ||
|
||
# enables overriding default memory mapping behavior with pyarrow memory mapping | ||
CYTOTABLE_ARROW_USE_MEMORY_MAPPING = ( | ||
os.environ.get("CYTOTABLE_ARROW_USE_MEMORY_MAPPING", "1") == "1" | ||
) | ||
|
||
DDB_DATA_TYPE_SYNONYMS = { | ||
"real": ["float32", "float4", "float"], | ||
"double": ["float64", "float8", "numeric", "decimal"], | ||
"integer": ["int32", "int4", "int", "signed"], | ||
"bigint": ["int64", "int8", "long"], | ||
} | ||
|
||
# A reference dictionary for SQLite affinity and storage class types | ||
# See more here: https://www.sqlite.org/datatype3.html#affinity_name_examples | ||
SQLITE_AFFINITY_DATA_TYPE_SYNONYMS = { | ||
"integer": [ | ||
"int", | ||
"integer", | ||
"tinyint", | ||
"smallint", | ||
"mediumint", | ||
"bigint", | ||
"unsigned big int", | ||
"int2", | ||
"int8", | ||
], | ||
"text": [ | ||
"character", | ||
"varchar", | ||
"varying character", | ||
"nchar", | ||
"native character", | ||
"nvarchar", | ||
"text", | ||
"clob", | ||
], | ||
"blob": ["blob"], | ||
"real": [ | ||
"real", | ||
"double", | ||
"double precision", | ||
"float", | ||
], | ||
"numeric": [ | ||
"numeric", | ||
"decimal", | ||
"boolean", | ||
"date", | ||
"datetime", | ||
], | ||
} | ||
|
||
CYTOTABLE_DEFAULT_PARQUET_METADATA = { | ||
"data-producer": "https://github.com/cytomining/CytoTable", | ||
"data-producer-version": str(_get_cytotable_version()), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.