From 6afbf7a8e547651017a34e309cb80a3e7f49c6e6 Mon Sep 17 00:00:00 2001 From: shitohana Date: Thu, 12 Oct 2023 13:24:11 +0300 Subject: [PATCH] README.md --- README.md | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 159 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d8311a6..17b0796 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,14 @@ options: format of output plots (default: pdf) ``` +Example: + +```commandline +bismarkplot-metagene -g path/to/genome.gff -r gene -f 2000 -m 4000 -u 500 -d 500 -w 1000 -b 1000000 --line --heatmap --box --violin --dpi 200 -f pdf -S 50 report1.txt report2.txt report3.txt report4.txt +``` + +[Result](#multiple-samples-same-specie) + ### bismarkplot-chrs ```commandline @@ -90,11 +98,19 @@ options: format of output plots (default: pdf) ``` +Example: + +```commandline +bismarkplot-chrs -b 10000000 -w 10000 -m 1000000 -s 10 -f pdf path/to/CX_report.txt +``` + +[Result](#chromosome-levels) + # Python BismarkPlot provides a large variety of function for manipulating with cytosine methylation data. -## Basic workflow +## Metagene Below we will show the basic BismarkPlot workflow. @@ -124,7 +140,7 @@ lp = filtered.line_plot() # line plot data lp.draw().savefig("path/to/lp.pdf") # matplotlib.Figure hm = filtered.heat_map(ncol=200, nrow=200) -lp.draw().savefig("path/to/hm.pdf") # matplotlib.Figure +hm.draw().savefig("path/to/hm.pdf") # matplotlib.Figure ``` Output: @@ -132,22 +148,159 @@ Output:

+ +### Smoothing the line plot + +Smoothing is very useful, when input signal is very weak (e.g. mammalian non-CpG contexts) + +```python +# mouse CHG methylation example +filtered = metagene.filter(context = "CHG", strand = "+") +lp.draw(smooth = 0).savefig("path/to/lp.pdf") # no smooth +lp.draw(smooth = 50).savefig("path/to/lp.pdf") # smoothed with window length = 50 +``` + +Output: + +

+ + +

+ ### Multiple samples, same specie ```python # We can initialize genome like in previous example -filenames = ["report1.txt", "report2.txt", "report3.txt"] -metagenes = bismarkplot.MetageneFiles.from_list(filenames, labels = ["rep1", "rep2", "rep3"], ...) # rest of params from previous example +filenames = ["report1.txt", "report2.txt", "report3.txt", "report4.txt"] +metagenes = bismarkplot.MetageneFiles.from_list(filenames, labels = ["1", "2", "3", "4"], ...) # rest of params from previous example # Our metagenes contains all methylation contexts and both strands, so we need to filter it (as in dplyr) filtered = metagenes.filter(context = "CG", strand = "+") # Now we can draw line-plot or heatmap like in previous example, or plot distribution statistics as shown below trimmed = filtered.trim_flank() # we want to analyze only gene bodies -trimmed.box_plot(showfliers=True).savefig(...) +trimmed.box_plot(showfliers=False).savefig(...) trimmed.violin_plot().savefig(...) # If data is technical replicates we can merge them into single DataFrame and analyze as one merged = filtered.merge() -``` \ No newline at end of file +``` + +Output: + +

+ + +

+

+ + +

+ +### Multiple samples, multiple species + +```python +# For analyzing samples with different reference genomes, we need to initialize several genomes instances +genome_filenames = ["arabidopsis.gff", "brachypodium.gff", "cucumis.gff", "mus.gff"] +reports_filenames = ["arabidopsis.txt", "brachypodium.txt", "cucumis.txt", "mus.txt"] + +genomes = [ + bismarkplot.Genome.from_gff(file).gene_body(...) for file in genome_filenames +] + +# Now we read reports +metagenes = [] +for report, genome in zip(reports_filenames, genomes): + metagene = bismarkplot.Metagene(report, genome = genome, ...) + metagenes.append(metagene) + +# Initialize MetageneFiles +labels = ["A. thaliana", "B. distachyon", "C. sativus", "M. musculus"] +metagenes = Bismarkplot.MetageneFiles(metagenes, labels) +# Now we can plot them like in previous example +``` + +Output: + +

+ + +

+

+ + +

+ +### Different regions + +Other genomic regions from .gff can be analyzed too with ```.exon``` or ```.near_tss/.near_tes``` option for ```bismarkplot.Genome``` + +```python +exons = [ + bismarkplot.Genome.from_gff(file).exon(min_length=100) for file in genome_filenames +] +metagenes = [] +for report, exon in zip(reports_filenames, exons): + metagene = bismarkplot.Metagene(report, genome = exon, + upstream_windows = 0, # !!! + downstream_windows = 0, # !!! + ...) + metagenes.append(metagene) +# OR +tss = [ + bismarkplot.Genome.from_gff(file).near_tss(min_length = 2000, flank_length = 2000) for file in genome_filenames +] +metagenes = [] +for report, t in zip(reports_filenames, tss): + metagene = bismarkplot.Metagene(report, genome = t, + upstream_windows = 1000,# same number of windows + gene_windows = 1000, # same number of windows + downstream_windows = 0, # !!! + ...) + metagenes.append(metagene) +``` + +Exon output: + +

+ + +

+ +TSS output: +

+ +

+ +## Chromosome levels + +BismarkPlot allows user to visualize chromosome methylation levels across full genome + +```python +import bismarkplot +chr = bismarkplot.ChrLevels.from_file( + "path/to/CX_report.txt", + window_length=10**5, # window length in bp + batch_size=10**7, + chr_min_length = 10**6, # minimum chr length in bp +) +fig, axes = plt.subplots() + +for context in ["CG", "CHG", "CHH"]: + chr.filter(strand="+", context=context).draw( + (fig, axes), # to plot contexts on same axes + smooth=10, # window number for smoothing + label=context # labels for lines + ) + +fig.savefig(f"chrom.pdf", dpi = 200) +``` + +Output for Arabidopsis t.: + + + +Output for Brachypodium d.: + + \ No newline at end of file