diff --git a/README.md b/README.md index ce9f9eb..c76a80b 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ Below we will show the basic BismarkPlot workflow. ### Single sample ```python -import src.bismarkplot.Genome +import src.bsxplorer.Genome import bismarkplot # Firstly, we need to read the regions annotation (e.g. reference genome .gff) @@ -282,7 +282,7 @@ Output for _Brachypodium distachyon_: ```python # For analyzing samples with different reference genomes, we need to initialize several genomes instances -import src.bismarkplot.Genome +import src.bsxplorer.Genome genome_filenames = ["arabidopsis.gff", "brachypodium.gff", "cucumis.gff", "mus.gff"] reports_filenames = ["arabidopsis.txt", "brachypodium.txt", "cucumis.txt", "mus.txt"] @@ -319,7 +319,7 @@ Output: Other genomic regions from .gff can be analyzed too with ```.exon``` or ```.near_tss/.near_tes``` option for ```bismarkplot.Genome``` ```python -import src.bismarkplot.Genome +import src.bsxplorer.Genome exons = [ src.bismarkplot.genome.Genome.from_gff(file).exon(min_length=100) for file in genome_filenames @@ -363,7 +363,7 @@ TSS output: BismarkPlot allows user to visualize chromosome methylation levels across full genome ```python -import src.bismarkplot.ChrLevels +import src.bsxplorer.ChrLevels import bismarkplot chr = src.bismarkplot.levels.ChrLevels.from_bismark( diff --git a/docs/conf.py b/docs/conf.py index 9ea76f1..d0513cf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,10 +1,10 @@ import os import sys -project = 'BismarkPlot' +project = 'BSXplorer' copyright = '2023, shitohana' author = 'shitohana' -release = '1.0.0' +release = '1.0.0a0' sys.path.insert(0, os.path.abspath('../src')) sys.path.append(os.path.abspath('.')) @@ -28,7 +28,7 @@ autosummary_generate = True autodoc_member_order = 'bysource' add_module_names = False -html_short_title = 'BismarkPlot Documentation' +html_short_title = 'BSXplorer Documentation' html_theme = 'sphinx_rtd_theme' diff --git a/pyproject.toml b/pyproject.toml index 0b7048c..146721c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bsxplorer" -version = "1.0.0" +version = "1.0.0a0" authors = [ { name="shitohana", email="kyudytskiy@gmail.com" }, ] @@ -40,17 +40,17 @@ classifiers = [ # dynamic = ["version", "description"] [project.urls] -Homepage = "https://github.com/shitohana/BismarkPlot" -Documentation = "https://shitohana.github.io/BismarkPlot/" -"Bug Tracker" = "https://github.com/shitohana/BismarkPlot/issues" +Homepage = "https://github.com/shitohana/BSXplorer" +Documentation = "https://shitohana.github.io/BSXplorer/" +"Bug Tracker" = "https://github.com/shitohana/BSXplorer/issues" [tool.hatch.build] exclude = ["/venv", "/dist", "/test", "/docs"] [tool.hatch.build.targets.wheel] -packages = ["src/bismarkplot"] +packages = ["src/bsxplorer"] [project.scripts] -bsxplorer-metagene = "bismarkplot.templates.cons_MetageneReport:main" -bsxplorer-category = "bismarkplot.templates.cons_CategoryReport:main" -bsxplorer-chr = "bismarkplot.templates.cons_ChrLevelsReport:main" \ No newline at end of file +bsxplorer-metagene = "bsxplorer.templates.cons_MetageneReport:main" +bsxplorer-category = "bsxplorer.templates.cons_CategoryReport:main" +bsxplorer-chr = "bsxplorer.templates.cons_ChrLevelsReport:main" \ No newline at end of file diff --git a/src/bismarkplot/console_chrs.py b/src/bismarkplot/console_chrs.py deleted file mode 100644 index 1136962..0000000 --- a/src/bismarkplot/console_chrs.py +++ /dev/null @@ -1,61 +0,0 @@ -import argparse -import os -import traceback -from datetime import datetime - -parser = argparse.ArgumentParser( - prog='BismarkPlot', - description='Chromosome methylation levels visualization.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter -) - -parser.add_argument('filename', help='path to bismark methylation_extractor file', metavar='path/to/txt') -parser.add_argument('-o', '--out', help='output base name', default="plot", metavar='NAME') -parser.add_argument('-d', '--dir', help='output dir', default=os.path.abspath(os.getcwd()), metavar='DIR') -parser.add_argument('-b', '--batch', help='number of rows to be read from bismark file by batch', type=int, default=10**6, metavar='N') -parser.add_argument('-c', '--cores', help='number of cores to use', type=int, default=None) -parser.add_argument('-w', '--wlength', help='number of windows for chromosome', type=int, default=10**5, metavar='N') -parser.add_argument('-m', '--mlength', help='minimum chromosome length', type=int, default=10**6, metavar='N') -parser.add_argument('-S', '--smooth', help='windows for smoothing (0 - no smoothing, 1 - straight line', type=float, default=50, metavar='FLOAT') -parser.add_argument('-F', '--fmt', help='format of output plots', choices=['png', 'pdf', 'svg'], default='pdf', dest='file_format') -parser.add_argument("--dpi", help="dpi of output plot", type=int, default=200) - - - -def main(): - args = parser.parse_args() - - try: - from src.bismarkplot import ChrLevels - import matplotlib.pyplot as plt - - chr = ChrLevels.from_bismark( - args.filename, - window_length=args.wlength, - chr_min_length=args.mlength, - batch_size=args.batch, - cpu=args.cores - ) - - for strand in ["+", "-"]: - fig, axes = plt.subplots() - - for context in ["CG", "CHG", "CHH"]: - chr.filter(strand=strand, context=context).draw_mpl((fig, axes), smooth=args.smooth, label=context) - - save_path = f"{args.dir}/{args.out}_{strand}.{args.file_format}" - - print(f"Saving to: {save_path}") - - fig.savefig(save_path, dpi=args.dpi) - - except Exception: - filename = f'error{datetime.now().strftime("%m_%d_%H:%M")}.txt' - file_dir = args.dir + '/' + filename - with open(file_dir, 'w') as f: - f.write(traceback.format_exc()) - print(f'Error happened. Please open an issue at GitHub with Traceback from file: {file_dir}') - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/bismarkplot/console_metagene.py b/src/bismarkplot/console_metagene.py deleted file mode 100644 index 2129fd3..0000000 --- a/src/bismarkplot/console_metagene.py +++ /dev/null @@ -1,107 +0,0 @@ -import argparse -import os -import traceback -from datetime import datetime -from matplotlib.pyplot import close - -parser = argparse.ArgumentParser( - prog='BismarkPlot.', - description='Metagene visualizing tool.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter -) -parser.add_argument('filename', help='path to bismark methylation_extractor files', nargs='+') -parser.add_argument('-o', '--out', help='output base name', default="plot", metavar='NAME') -parser.add_argument('--dir', help='output dir', default=os.path.abspath(os.getcwd()), metavar='DIR') -parser.add_argument('-g', '--genome', help='path to GFF genome file') -parser.add_argument('-r', '--region', help='path to GFF genome file', default="gene", choices=["gene", "exon", "tss", "tes"]) -parser.add_argument('-b', '--batch', help='number of rows to be read from bismark file by batch', type=int, default=10**6) -parser.add_argument('-c', '--cores', help='number of cores to use', type=int, default=None) -parser.add_argument('-f', '--flength', help='length in bp of flank regions', type=int, default=2000) -parser.add_argument('-u', '--uwindows', help='number of windows for upstream', type=int, default=50) -parser.add_argument('-d', '--dwindows', help='number of windows for downstream', type=int, default=50) -parser.add_argument('-m', '--mlength', help='minimal length in bp of gene', type=int, default=4000) -parser.add_argument('-w', '--gwindows', help='number of windows for genes', type=int, default=100) - -parser.add_argument('--line', help='line-plot enabled', action='store_true', default=True) -parser.add_argument('--heatmap', help='heat-map enabled', action='store_true', default=True) -parser.add_argument('--box', help='box-plot enabled', action='store_true', default=True) -parser.add_argument('--violin', help='violin-plot enabled', action='store_true', default=True) - -parser.add_argument('-S', '--smooth', help='windows for smoothing', type=float, default=10) -parser.add_argument('-L', '--labels', help='labels for plots', nargs='+') -parser.add_argument('-C', '--confidence', help='probability for confidence bands for line-plot. 0 if disabled', type=float, default=.95) -parser.add_argument('-H', help='vertical resolution for heat-map', type=int, default=100, dest="vresolution") -parser.add_argument('-V', help='vertical resolution for heat-map', type=int, default=100, dest="hresolution") -parser.add_argument("--dpi", help="dpi of output plot", type=int, default=200) - -parser.add_argument('-F', '--format', help='format of output plots', choices=['png', 'pdf', 'svg'], default='pdf', dest='file_format') - - -def main(): - args = parser.parse_args() - if args.genome is None: - print("You need to specify genome path") - exit() - - try: - from .BismarkPlot import MetageneFiles - from src.bismarkplot import Genome - genome = Genome.from_gff( - file=args.genome - ) - if args.region == "tss": - genome = genome.near_TSS(min_length=args.mlength, flank_length=args.flength) - elif args.region == "tes": - genome = genome.near_TES(min_length=args.mlength, flank_length=args.flength) - elif args.region == "exon": - genome = genome.exon(min_length=args.mlength) - else: - genome = genome.gene_body(min_length=args.mlength, flank_length=args.flength) - - bismark = MetageneFiles.from_list( - filenames=args.filename, - genomes=genome, - labels=args.labels, - body_windows=args.gwindows, - up_windows=args.uwindows, - down_windows=args.dwindows, - batch_size=args.batch, - cpu=args.cores - ) - - filename = args.dir + "/" + args.out - print(f"Base name for saving: {filename}_<...>.{args.file_format}") - - for context in ["CG", "CHG", "CHH"]: - for strand in ["+", "-"]: - - filtered = bismark.filter(context=context, strand=strand) - base_name = filename + "_" + context + strand + "_{type}." + args.file_format - - if args.line: - fig = filtered.line_plot().draw_mpl(smooth=args.smooth, confidence=args.confidence) - fig.savefig(base_name.format(type="line-plot"), dpi = args.dpi) - close() - if args.heatmap: - fig = filtered.heat_map(args.hresolution, args.vresolution).draw_mpl() - fig.savefig(base_name.format(type="heat-map"), dpi=args.dpi) - close() - if args.box: - fig = filtered.trim_flank().box_plot() - fig.savefig(base_name.format(type="box-plot"), dpi=args.dpi) - close() - if args.violin: - fig = filtered.trim_flank().violin_plot() - fig.savefig(base_name.format(type="violin-plot"), dpi=args.dpi) - close() - - - except Exception: - filename = args.dir + '/' + f'error{datetime.now().strftime("%m_%d_%H:%M")}.txt' - with open(filename, 'w') as f: - f.write(traceback.format_exc()) - print(f'Error happened. Please open an issue at GitHub with Traceback from file: {filename}') - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/bismarkplot/ArrowReaders.py b/src/bsxplorer/ArrowReaders.py similarity index 99% rename from src/bismarkplot/ArrowReaders.py rename to src/bsxplorer/ArrowReaders.py index b548681..e756f3b 100644 --- a/src/bismarkplot/ArrowReaders.py +++ b/src/bsxplorer/ArrowReaders.py @@ -1,7 +1,6 @@ from __future__ import annotations from pathlib import Path -from abc import ABC, abstractmethod import pyarrow as pa import pyarrow.csv as pcsv diff --git a/src/bismarkplot/Base.py b/src/bsxplorer/Base.py similarity index 100% rename from src/bismarkplot/Base.py rename to src/bsxplorer/Base.py index 9808314..dfb0c5a 100644 --- a/src/bismarkplot/Base.py +++ b/src/bsxplorer/Base.py @@ -9,11 +9,11 @@ import polars as pl import pyarrow as pa from pyreadr import write_rds +from matplotlib.axes import Axes +import plotly.graph_objects as go from .ArrowReaders import CsvReader, ParquetReader, BismarkOptions from .utils import remove_extension, prepare_labels, MetageneSchema, ReportBar -from matplotlib.axes import Axes -import plotly.graph_objects as go class MetageneBase: diff --git a/src/bismarkplot/Binom.py b/src/bsxplorer/Binom.py similarity index 99% rename from src/bismarkplot/Binom.py rename to src/bsxplorer/Binom.py index 26d5026..8d7219d 100644 --- a/src/bismarkplot/Binom.py +++ b/src/bsxplorer/Binom.py @@ -161,7 +161,7 @@ def region_pvalue( -------- If there no preprocessed file: - >>> import bismarkplot as bp + >>> import bsxplorer as bp >>> report_path = "/path/to/report.txt" >>> genome_path = "/path/to/genome.gff" >>> c_binom = bp.BinomialData.preprocess(report_path, report_type="bismark") diff --git a/src/bismarkplot/ChrLevels.py b/src/bsxplorer/ChrLevels.py similarity index 99% rename from src/bismarkplot/ChrLevels.py rename to src/bsxplorer/ChrLevels.py index 04c81c9..08c2559 100644 --- a/src/bismarkplot/ChrLevels.py +++ b/src/bsxplorer/ChrLevels.py @@ -1,7 +1,6 @@ from __future__ import annotations import os -from multiprocessing import cpu_count import numpy as np import polars as pl @@ -16,7 +15,7 @@ from abc import ABC, abstractmethod import warnings -from .utils import approx_batch_num, interval, decompress, ReportBar +from .utils import interval, decompress, ReportBar from .ArrowReaders import CsvReader, BismarkOptions, ParquetReader diff --git a/src/bismarkplot/Clusters.py b/src/bsxplorer/Clusters.py similarity index 98% rename from src/bismarkplot/Clusters.py rename to src/bsxplorer/Clusters.py index 7c20a4e..7781c5d 100644 --- a/src/bismarkplot/Clusters.py +++ b/src/bsxplorer/Clusters.py @@ -12,9 +12,6 @@ import polars as pl from dynamicTreeCut import cutreeHybrid from dynamicTreeCut.dynamicTreeCut import get_heights -from matplotlib import pyplot as plt, colormaps -from matplotlib.figure import Figure -from pyreadr import write_rds from fastcluster import linkage from scipy.cluster.hierarchy import leaves_list, optimal_leaf_ordering from scipy.spatial.distance import pdist @@ -24,7 +21,6 @@ from .Base import MetageneBase, MetageneFilesBase from abc import ABC, abstractmethod -from .utils import prepare_labels, hm_flank_lines class _ClusterBase(ABC): diff --git a/src/bismarkplot/GenomeClass.py b/src/bsxplorer/GenomeClass.py similarity index 99% rename from src/bismarkplot/GenomeClass.py rename to src/bsxplorer/GenomeClass.py index 19b9700..0f4f0d8 100644 --- a/src/bismarkplot/GenomeClass.py +++ b/src/bsxplorer/GenomeClass.py @@ -2,6 +2,7 @@ import polars as pl from pathlib import Path + from .utils import MetageneSchema diff --git a/src/bismarkplot/MetageneClasses.py b/src/bsxplorer/MetageneClasses.py similarity index 99% rename from src/bismarkplot/MetageneClasses.py rename to src/bsxplorer/MetageneClasses.py index 80553e7..a9aeeee 100644 --- a/src/bismarkplot/MetageneClasses.py +++ b/src/bsxplorer/MetageneClasses.py @@ -13,14 +13,13 @@ import seaborn as sns from .Plots import LinePlot, LinePlotFiles, HeatMap, HeatMapFiles -from .ArrowReaders import CsvReader, BismarkOptions, ParquetReader from .SeqMapper import Mapper, Sequence from .Base import ( MetageneBase, MetageneFilesBase, BismarkReportReader, ParquetReportReader, BinomReportReader ) from .Clusters import ClusterSingle, ClusterMany -from .utils import MetageneSchema, ReportBar +from .utils import MetageneSchema from .GenomeClass import Genome pl.enable_string_cache(True) diff --git a/src/bismarkplot/Plots.py b/src/bsxplorer/Plots.py similarity index 99% rename from src/bismarkplot/Plots.py rename to src/bsxplorer/Plots.py index 77b54fe..0747d50 100644 --- a/src/bismarkplot/Plots.py +++ b/src/bsxplorer/Plots.py @@ -2,7 +2,6 @@ import itertools import re -from dataclasses import dataclass import numpy as np import polars as pl @@ -13,7 +12,6 @@ from plotly import graph_objects as go, express as px from pyreadr import write_rds from scipy.signal import savgol_filter -# todo add to dependencies sklearn from sklearn.decomposition import PCA as PCA_sklearn from .Base import PlotBase, MetageneFilesBase diff --git a/src/bismarkplot/SeqMapper.py b/src/bsxplorer/SeqMapper.py similarity index 99% rename from src/bismarkplot/SeqMapper.py rename to src/bsxplorer/SeqMapper.py index b2f765c..2e09005 100644 --- a/src/bismarkplot/SeqMapper.py +++ b/src/bsxplorer/SeqMapper.py @@ -3,7 +3,6 @@ import gc import gzip import io -import multiprocessing import os import shutil import tempfile diff --git a/src/bismarkplot/__init__.py b/src/bsxplorer/__init__.py similarity index 87% rename from src/bismarkplot/__init__.py rename to src/bsxplorer/__init__.py index 63ffba3..57f6d26 100644 --- a/src/bismarkplot/__init__.py +++ b/src/bsxplorer/__init__.py @@ -1,7 +1,7 @@ from .MetageneClasses import Metagene, MetageneFiles -from .Plots import LinePlot, LinePlotFiles, HeatMap, HeatMapFiles +from .Plots import LinePlot, LinePlotFiles, HeatMap, HeatMapFiles, PCA from .Binom import BinomialData, RegionStat from .GenomeClass import Genome from .ChrLevels import ChrLevels -__version__ = '1.0.0' +__version__ = '1.0.0a0' diff --git a/src/bismarkplot/utils.py b/src/bsxplorer/utils.py similarity index 100% rename from src/bismarkplot/utils.py rename to src/bsxplorer/utils.py diff --git a/src/templates/cons_CategoryReport.py b/src/templates/cons_CategoryReport.py index 52edcb2..1f43b44 100644 --- a/src/templates/cons_CategoryReport.py +++ b/src/templates/cons_CategoryReport.py @@ -9,8 +9,8 @@ from matplotlib import pyplot as plt from cons_MetageneReport import get_metagene_parser, parse_config, ReportRow, render_metagene_report -from src.bismarkplot import Genome, Metagene, MetageneFiles, BinomialData -from src.bismarkplot.utils import merge_replicates, decompress +from src.bsxplorer import Genome, Metagene, MetageneFiles, BinomialData +from src.bsxplorer.utils import merge_replicates, decompress from cons_utils import render_template, TemplateMetagenePlot, TemplateMetageneContext, TemplateMetageneBody diff --git a/src/templates/cons_ChrLevelsReport.py b/src/templates/cons_ChrLevelsReport.py index ee5b4e9..b53bd10 100644 --- a/src/templates/cons_ChrLevelsReport.py +++ b/src/templates/cons_ChrLevelsReport.py @@ -11,9 +11,9 @@ import polars as pl from plotly.express.colors import qualitative as PALETTE -from src.bismarkplot import ChrLevels +from src.bsxplorer import ChrLevels from cons_utils import render_template, TemplateMetagenePlot, TemplateMetageneContext, TemplateMetageneBody -from src.bismarkplot.utils import merge_replicates +from src.bsxplorer.utils import merge_replicates def get_chr_parser(): diff --git a/src/templates/cons_MetageneReport.py b/src/templates/cons_MetageneReport.py index 612b860..7ca4382 100644 --- a/src/templates/cons_MetageneReport.py +++ b/src/templates/cons_MetageneReport.py @@ -16,8 +16,8 @@ from matplotlib import pyplot as plt sys.path.insert(0, os.getcwd()) -from src.bismarkplot import Genome, Metagene, MetageneFiles -from src.bismarkplot.Plots import PCA +from src.bsxplorer import Genome, Metagene, MetageneFiles +from src.bsxplorer.Plots import PCA from cons_utils import render_template, TemplateMetagenePlot, TemplateMetageneContext, TemplateMetageneBody # TODO add plot data export option