From 6afbf7a8e547651017a34e309cb80a3e7f49c6e6 Mon Sep 17 00:00:00 2001
From: shitohana
+ + +
+ ### Multiple samples, same specie ```python # We can initialize genome like in previous example -filenames = ["report1.txt", "report2.txt", "report3.txt"] -metagenes = bismarkplot.MetageneFiles.from_list(filenames, labels = ["rep1", "rep2", "rep3"], ...) # rest of params from previous example +filenames = ["report1.txt", "report2.txt", "report3.txt", "report4.txt"] +metagenes = bismarkplot.MetageneFiles.from_list(filenames, labels = ["1", "2", "3", "4"], ...) # rest of params from previous example # Our metagenes contains all methylation contexts and both strands, so we need to filter it (as in dplyr) filtered = metagenes.filter(context = "CG", strand = "+") # Now we can draw line-plot or heatmap like in previous example, or plot distribution statistics as shown below trimmed = filtered.trim_flank() # we want to analyze only gene bodies -trimmed.box_plot(showfliers=True).savefig(...) +trimmed.box_plot(showfliers=False).savefig(...) trimmed.violin_plot().savefig(...) # If data is technical replicates we can merge them into single DataFrame and analyze as one merged = filtered.merge() -``` \ No newline at end of file +``` + +Output: + ++ + +
++ + +
+ +### Multiple samples, multiple species + +```python +# For analyzing samples with different reference genomes, we need to initialize several genomes instances +genome_filenames = ["arabidopsis.gff", "brachypodium.gff", "cucumis.gff", "mus.gff"] +reports_filenames = ["arabidopsis.txt", "brachypodium.txt", "cucumis.txt", "mus.txt"] + +genomes = [ + bismarkplot.Genome.from_gff(file).gene_body(...) for file in genome_filenames +] + +# Now we read reports +metagenes = [] +for report, genome in zip(reports_filenames, genomes): + metagene = bismarkplot.Metagene(report, genome = genome, ...) + metagenes.append(metagene) + +# Initialize MetageneFiles +labels = ["A. thaliana", "B. distachyon", "C. sativus", "M. musculus"] +metagenes = Bismarkplot.MetageneFiles(metagenes, labels) +# Now we can plot them like in previous example +``` + +Output: + ++ + +
++ + +
+ +### Different regions + +Other genomic regions from .gff can be analyzed too with ```.exon``` or ```.near_tss/.near_tes``` option for ```bismarkplot.Genome``` + +```python +exons = [ + bismarkplot.Genome.from_gff(file).exon(min_length=100) for file in genome_filenames +] +metagenes = [] +for report, exon in zip(reports_filenames, exons): + metagene = bismarkplot.Metagene(report, genome = exon, + upstream_windows = 0, # !!! + downstream_windows = 0, # !!! + ...) + metagenes.append(metagene) +# OR +tss = [ + bismarkplot.Genome.from_gff(file).near_tss(min_length = 2000, flank_length = 2000) for file in genome_filenames +] +metagenes = [] +for report, t in zip(reports_filenames, tss): + metagene = bismarkplot.Metagene(report, genome = t, + upstream_windows = 1000,# same number of windows + gene_windows = 1000, # same number of windows + downstream_windows = 0, # !!! + ...) + metagenes.append(metagene) +``` + +Exon output: + ++ + +
+ +TSS output: ++ +
+ +## Chromosome levels + +BismarkPlot allows user to visualize chromosome methylation levels across full genome + +```python +import bismarkplot +chr = bismarkplot.ChrLevels.from_file( + "path/to/CX_report.txt", + window_length=10**5, # window length in bp + batch_size=10**7, + chr_min_length = 10**6, # minimum chr length in bp +) +fig, axes = plt.subplots() + +for context in ["CG", "CHG", "CHH"]: + chr.filter(strand="+", context=context).draw( + (fig, axes), # to plot contexts on same axes + smooth=10, # window number for smoothing + label=context # labels for lines + ) + +fig.savefig(f"chrom.pdf", dpi = 200) +``` + +Output for Arabidopsis t.: + + + +Output for Brachypodium d.: + + \ No newline at end of file