diff --git a/Docker/build.sh b/Docker/build.sh
index 6da6c76..c0015e3 100644
--- a/Docker/build.sh
+++ b/Docker/build.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 
-tag="V0.1.1"
+tag="V0.1.2"
 
 docker build --platform linux/amd64 -t wenjin27/methylgrapher:latest -t wenjin27/methylgrapher:$tag ./
 # docker push wenjin27/methylgrapher:latest
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index 8f2325c..b5dcb5e 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/filetype.doctree b/docs/build/doctrees/filetype.doctree
index 2910788..c86d598 100644
Binary files a/docs/build/doctrees/filetype.doctree and b/docs/build/doctrees/filetype.doctree differ
diff --git a/docs/build/doctrees/process.doctree b/docs/build/doctrees/process.doctree
index 4a04420..382f1a0 100644
Binary files a/docs/build/doctrees/process.doctree and b/docs/build/doctrees/process.doctree differ
diff --git a/docs/build/html/_sources/filetype.rst.txt b/docs/build/html/_sources/filetype.rst.txt
index ae7878d..65999aa 100644
--- a/docs/build/html/_sources/filetype.rst.txt
+++ b/docs/build/html/_sources/filetype.rst.txt
@@ -5,17 +5,17 @@ File Types
 
 
 GFA: Graphical Fragment Assembly format
-------------------------------
+------------------------------------------------------------
 For detail explanation, refer to https://gfa-spec.github.io/GFA-spec/GFA1.html
 
 
 GAF: Graphical mApping Format
-------------------------------
+------------------------------------------------------------
 For detail explanation, refer to https://github.com/lh3/gfatools/blob/master/doc/rGFA.md
 
 
 methyl: Graph methylation file
-------------------------------
+------------------------------------------------------------
 The methylGrapher extraction output provides a list of each cytosine mapped to the genome graph, specified by its coordinates (segment ID, 0-based position on the segment, and strand relative to the segment).
 For each cytosine, the output includes its context, the number of read-pairs supporting whether the cytosine is methylated or unmethylated, and the coverage, which is the sum of methylated and unmethylated read-pairs.
 The methylation percentage is calculated as the ratio of methylated read-pairs to the total coverage.
diff --git a/docs/build/html/_sources/process.rst.txt b/docs/build/html/_sources/process.rst.txt
index f281181..51f995a 100644
--- a/docs/build/html/_sources/process.rst.txt
+++ b/docs/build/html/_sources/process.rst.txt
@@ -11,9 +11,6 @@ Description
 ~~~~~~~~~~~~~~~~~~~~~~
 This process involves transforming the genome graph from GFA format into two fully converted genome graphs: one depleted of C bases and another depleted of G bases. Additionally, if desired, you may include a spike-in genome in FASTA format to estimate the conversion rate in a single step further.
 
-.. note::
-    It's important to note that in the C-to-T genome graph, if both C and T segments are positioned identically—meaning they share the same parent and child segments, and each has only one parent and one child—the T segments are removed. Additionally, any associated links and paths are redirected to the corresponding C segment. This principle also applies in the G-to-A genome graph.
-
 Example Usage
 ~~~~~~~~~~~~~~~~~~~~~~
 .. code-block:: shell
diff --git a/docs/build/html/process.html b/docs/build/html/process.html
index fa9cc60..4086610 100644
--- a/docs/build/html/process.html
+++ b/docs/build/html/process.html
@@ -118,10 +118,6 @@ <h2>Genome Indexing<a class="headerlink" href="#genome-indexing" title="Link to
 <section id="description">
 <h3>Description<a class="headerlink" href="#description" title="Link to this heading"></a></h3>
 <p>This process involves transforming the genome graph from GFA format into two fully converted genome graphs: one depleted of C bases and another depleted of G bases. Additionally, if desired, you may include a spike-in genome in FASTA format to estimate the conversion rate in a single step further.</p>
-<div class="admonition note">
-<p class="admonition-title">Note</p>
-<p>It’s important to note that in the C-to-T genome graph, if both C and T segments are positioned identically—meaning they share the same parent and child segments, and each has only one parent and one child—the T segments are removed. Additionally, any associated links and paths are redirected to the corresponding C segment. This principle also applies in the G-to-A genome graph.</p>
-</div>
 </section>
 <section id="example-usage">
 <h3>Example Usage<a class="headerlink" href="#example-usage" title="Link to this heading"></a></h3>
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index 6dbf2a8..4e9172d 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"Align": [[5, "align"]], "Citation": [[0, "citation"]], "Contact us": [[0, null]], "ConversionRate": [[5, "conversionrate"]], "Deduplication": [[5, "deduplication"]], "Description": [[5, "description"], [5, "id1"], [5, "id5"], [5, "id9"], [5, "id13"]], "Example Usage": [[5, "example-usage"], [5, "id2"], [5, "id6"], [5, "id10"], [5, "id14"]], "File Types": [[2, null]], "GAF: Graphical mApping Format": [[2, "gaf-graphical-mapping-format"]], "GFA: Graphical Fragment Assembly format": [[2, "gfa-graphical-fragment-assembly-format"]], "Genome Indexing": [[5, "genome-indexing"]], "Install via CONDA": [[4, "install-via-conda"]], "Install via PIP": [[4, "install-via-pip"]], "Installation": [[4, null]], "Main": [[5, "main"]], "MethylCall": [[5, "methylcall"]], "Optional arguments": [[5, "optional-arguments"], [5, "id4"], [5, "id8"], [5, "id12"]], "Prerequisites": [[4, "prerequisites"]], "Process": [[5, null]], "Reference:": [[2, "reference"]], "Required arguments": [[5, "required-arguments"], [5, "id3"], [5, "id7"], [5, "id11"], [5, "id15"]], "Source code": [[4, "source-code"]], "Source code and issue tracker": [[0, "source-code-and-issue-tracker"]], "Toy Example": [[1, null]], "Use Docker": [[4, "use-docker"]], "Welcome to methylGrapher\u2019s documentation!": [[3, null]], "methyl: Graph methylation file": [[2, "methyl-graph-methylation-file"]], "methylGrapher: Genome-Graph-Based DNA Methylation Analysis Using Whole Genome Bisulfite Sequencing": [[3, "methylgrapher-genome-graph-based-dna-methylation-analysis-using-whole-genome-bisulfite-sequencing"]]}, "docnames": ["contact", "example", "filetype", "index", "install", "process"], "envversion": {"sphinx": 63, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["contact.rst", "example.rst", "filetype.rst", "index.rst", "install.rst", "process.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": 5, "0": [2, 5], "1": 5, "10": 4, "11": 4, "12": 4, "20": 5, "3": 4, "4096": 5, "9": 4, "A": 5, "For": 2, "If": 0, "It": 5, "The": [1, 2, 4, 5], "activ": 4, "ad": 4, "adapt": 4, "addition": 5, "against": 5, "align": [3, 4], "all": 1, "along": 1, "also": [1, 5], "an": 0, "analyz": 1, "ani": [0, 5], "anoth": 5, "appli": 5, "ar": [1, 4, 5], "assembli": 3, "associ": 5, "base": [2, 5], "bash": 1, "batch_siz": 5, "below": 1, "best": 4, "bioconda": 4, "blob": [1, 2], "both": [4, 5], "c": 5, "calcul": 2, "call": 5, "can": [0, 1], "cd": 1, "child": 5, "citat": 3, "clone": 1, "code": 3, "com": [0, 1, 2], "compris": 5, "conda": 3, "construct": 2, "contact": 3, "context": 2, "convers": 5, "conversionr": 3, "convert": 5, "coordin": 2, "core": 5, "correspond": 5, "coverag": 2, "creat": 4, "cytosin": 2, "data": 1, "dedupl": [3, 4], "default": 5, "deplet": 5, "design": 2, "desir": 5, "detail": [2, 5], "direct": 5, "directori": 5, "discard_multimap": 5, "doc": 2, "docker": 3, "document": 5, "each": [2, 5], "ensur": 4, "essenti": 5, "estim": 5, "exampl": 3, "example_b": 1, "execut": 5, "explan": 2, "extract": [2, 5], "facilit": 5, "fasta": 5, "fastq": [1, 5], "fastq1_file_path": 5, "fastq2_file_path": 5, "fastuniq": [4, 5], "file": [1, 3, 5], "final": 1, "find": 0, "firstli": 5, "folder": 1, "format": [1, 3, 5], "four": 5, "fq1": 5, "fq2": 5, "fragment": 3, "from": 5, "fulli": 5, "further": 5, "g": 5, "gaf": [3, 5], "galor": 4, "gener": [1, 5], "genom": [1, 2], "gfa": [1, 3, 5], "gfa1": 2, "gfa_file_path": 5, "gfatool": 2, "giraff": [4, 5], "git": 1, "github": [0, 1, 2], "glossari": 2, "graph": [1, 5], "graphic": 3, "ha": [4, 5], "have": 0, "here": 0, "how": 1, "html": 2, "http": [0, 1, 2], "hub": 4, "i": [1, 2, 4, 5], "id": 2, "ident": 5, "import": 5, "includ": [2, 5], "incorpor": 1, "index": 3, "index_prefix": 5, "instal": [1, 3], "involv": 5, "io": 2, "issu": 3, "its": 2, "lambda_phage_genome_path": 5, "lambdaphagefastafilepath": 5, "lastli": 5, "latest": 4, "lh3": 2, "link": 5, "list": 2, "lp": 5, "mai": 5, "main": [1, 3], "make_exampl": 1, "map": 3, "master": 2, "md": 2, "mean": 5, "methyl": [1, 5], "methylcal": 3, "methylgraph": [0, 1, 2, 4, 5], "minigraph": 2, "minimum_ident": 5, "minimum_mapq": 5, "more": 5, "must": 4, "n": 4, "next": 5, "note": 5, "number": 2, "one": 5, "onli": 5, "oper": 5, "our": 0, "output": [1, 2], "output_prefix": 5, "outputprefix": 5, "packag": 1, "pair": 2, "pangenom": 2, "parent": 5, "path": [4, 5], "percentag": 2, "perform": 4, "pip": 3, "pipelin": 1, "pleas": [0, 5], "posit": [2, 5], "prefix": 5, "preparegenom": 5, "preparegenomeoutputprefix": 5, "prerequisit": 3, "principl": 5, "process": 3, "provid": [2, 5], "py": 1, "python": 4, "python3": 4, "question": 0, "rate": 5, "ratio": 2, "read": 2, "recommend": 4, "redirect": 5, "refer": [3, 5], "rel": 2, "relat": 2, "remov": 5, "repo": 0, "result": 5, "rgfa": 2, "same": 5, "script": 1, "segment": [2, 5], "sh": 1, "share": 5, "show": 1, "simul": 1, "singl": 5, "sort": 5, "sourc": 3, "spec": 2, "specifi": 2, "spike": 5, "step": 5, "strand": 2, "submit": 0, "subsequ": 5, "sum": 2, "support": 2, "t": 5, "test": 4, "thank": 0, "thei": 5, "theworkingdir": 5, "thi": [1, 5], "threads_us": 5, "threadsus": 5, "toi": 3, "toolkit": 4, "total": 2, "tracker": 3, "transform": 5, "trim": 4, "twlab": [0, 1], "two": 5, "type": 3, "u": 3, "undergo": 5, "unmethyl": 2, "us": 1, "verifi": 1, "version": 4, "vg": [2, 4, 5], "via": 3, "whether": 2, "which": [1, 2], "within": 1, "work": 5, "work_dir": 5, "workflow": 1, "working_directori": 5, "would": 4, "y": 5, "you": [0, 1, 4, 5], "your": 4, "yourgfafilepath": 5}, "titles": ["Contact us", "Toy Example", "File Types", "Welcome to methylGrapher\u2019s documentation!", "Installation", "Process"], "titleterms": {"": 3, "align": 5, "analysi": 3, "argument": 5, "assembli": 2, "base": 3, "bisulfit": 3, "citat": 0, "code": [0, 4], "conda": 4, "contact": 0, "conversionr": 5, "dedupl": 5, "descript": 5, "dna": 3, "docker": 4, "document": 3, "exampl": [1, 5], "file": 2, "format": 2, "fragment": 2, "gaf": 2, "genom": [3, 5], "gfa": 2, "graph": [2, 3], "graphic": 2, "index": 5, "instal": 4, "issu": 0, "main": 5, "map": 2, "methyl": [2, 3], "methylcal": 5, "methylgraph": 3, "option": 5, "pip": 4, "prerequisit": 4, "process": 5, "refer": 2, "requir": 5, "sequenc": 3, "sourc": [0, 4], "toi": 1, "tracker": 0, "type": 2, "u": 0, "us": [3, 4], "usag": 5, "via": 4, "welcom": 3, "whole": 3}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"Align": [[5, "align"]], "Citation": [[0, "citation"]], "Contact us": [[0, null]], "ConversionRate": [[5, "conversionrate"]], "Deduplication": [[5, "deduplication"]], "Description": [[5, "description"], [5, "id1"], [5, "id5"], [5, "id9"], [5, "id13"]], "Example Usage": [[5, "example-usage"], [5, "id2"], [5, "id6"], [5, "id10"], [5, "id14"]], "File Types": [[2, null]], "GAF: Graphical mApping Format": [[2, "gaf-graphical-mapping-format"]], "GFA: Graphical Fragment Assembly format": [[2, "gfa-graphical-fragment-assembly-format"]], "Genome Indexing": [[5, "genome-indexing"]], "Install via CONDA": [[4, "install-via-conda"]], "Install via PIP": [[4, "install-via-pip"]], "Installation": [[4, null]], "Main": [[5, "main"]], "MethylCall": [[5, "methylcall"]], "Optional arguments": [[5, "optional-arguments"], [5, "id4"], [5, "id8"], [5, "id12"]], "Prerequisites": [[4, "prerequisites"]], "Process": [[5, null]], "Reference:": [[2, "reference"]], "Required arguments": [[5, "required-arguments"], [5, "id3"], [5, "id7"], [5, "id11"], [5, "id15"]], "Source code": [[4, "source-code"]], "Source code and issue tracker": [[0, "source-code-and-issue-tracker"]], "Toy Example": [[1, null]], "Use Docker": [[4, "use-docker"]], "Welcome to methylGrapher\u2019s documentation!": [[3, null]], "methyl: Graph methylation file": [[2, "methyl-graph-methylation-file"]], "methylGrapher: Genome-Graph-Based DNA Methylation Analysis Using Whole Genome Bisulfite Sequencing": [[3, "methylgrapher-genome-graph-based-dna-methylation-analysis-using-whole-genome-bisulfite-sequencing"]]}, "docnames": ["contact", "example", "filetype", "index", "install", "process"], "envversion": {"sphinx": 63, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["contact.rst", "example.rst", "filetype.rst", "index.rst", "install.rst", "process.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"0": [2, 5], "1": 5, "10": 4, "11": 4, "12": 4, "20": 5, "3": 4, "4096": 5, "9": 4, "For": 2, "If": 0, "The": [1, 2, 4, 5], "activ": 4, "ad": 4, "adapt": 4, "addition": 5, "against": 5, "align": [3, 4], "all": 1, "along": 1, "also": 1, "an": 0, "analyz": 1, "ani": 0, "anoth": 5, "ar": [1, 4, 5], "assembli": 3, "base": [2, 5], "bash": 1, "batch_siz": 5, "below": 1, "best": 4, "bioconda": 4, "blob": [1, 2], "both": 4, "c": 5, "calcul": 2, "call": 5, "can": [0, 1], "cd": 1, "citat": 3, "clone": 1, "code": 3, "com": [0, 1, 2], "compris": 5, "conda": 3, "construct": 2, "contact": 3, "context": 2, "convers": 5, "conversionr": 3, "convert": 5, "coordin": 2, "core": 5, "coverag": 2, "creat": 4, "cytosin": 2, "data": 1, "dedupl": [3, 4], "default": 5, "deplet": 5, "design": 2, "desir": 5, "detail": [2, 5], "direct": 5, "directori": 5, "discard_multimap": 5, "doc": 2, "docker": 3, "document": 5, "each": 2, "ensur": 4, "essenti": 5, "estim": 5, "exampl": 3, "example_b": 1, "execut": 5, "explan": 2, "extract": [2, 5], "facilit": 5, "fasta": 5, "fastq": [1, 5], "fastq1_file_path": 5, "fastq2_file_path": 5, "fastuniq": [4, 5], "file": [1, 3, 5], "final": 1, "find": 0, "firstli": 5, "folder": 1, "format": [1, 3, 5], "four": 5, "fq1": 5, "fq2": 5, "fragment": 3, "from": 5, "fulli": 5, "further": 5, "g": 5, "gaf": [3, 5], "galor": 4, "gener": [1, 5], "genom": [1, 2], "gfa": [1, 3, 5], "gfa1": 2, "gfa_file_path": 5, "gfatool": 2, "giraff": [4, 5], "git": 1, "github": [0, 1, 2], "glossari": 2, "graph": [1, 5], "graphic": 3, "ha": 4, "have": 0, "here": 0, "how": 1, "html": 2, "http": [0, 1, 2], "hub": 4, "i": [1, 2, 4, 5], "id": 2, "includ": [2, 5], "incorpor": 1, "index": 3, "index_prefix": 5, "instal": [1, 3], "involv": 5, "io": 2, "issu": 3, "its": 2, "lambda_phage_genome_path": 5, "lambdaphagefastafilepath": 5, "lastli": 5, "latest": 4, "lh3": 2, "list": 2, "lp": 5, "mai": 5, "main": [1, 3], "make_exampl": 1, "map": 3, "master": 2, "md": 2, "methyl": [1, 5], "methylcal": 3, "methylgraph": [0, 1, 2, 4, 5], "minigraph": 2, "minimum_ident": 5, "minimum_mapq": 5, "more": 5, "must": 4, "n": 4, "next": 5, "number": 2, "one": 5, "oper": 5, "our": 0, "output": [1, 2], "output_prefix": 5, "outputprefix": 5, "packag": 1, "pair": 2, "pangenom": 2, "path": 4, "percentag": 2, "perform": 4, "pip": 3, "pipelin": 1, "pleas": [0, 5], "posit": 2, "prefix": 5, "preparegenom": 5, "preparegenomeoutputprefix": 5, "prerequisit": 3, "process": 3, "provid": [2, 5], "py": 1, "python": 4, "python3": 4, "question": 0, "rate": 5, "ratio": 2, "read": 2, "recommend": 4, "refer": [3, 5], "rel": 2, "relat": 2, "repo": 0, "result": 5, "rgfa": 2, "script": 1, "segment": 2, "sh": 1, "show": 1, "simul": 1, "singl": 5, "sort": 5, "sourc": 3, "spec": 2, "specifi": 2, "spike": 5, "step": 5, "strand": 2, "submit": 0, "subsequ": 5, "sum": 2, "support": 2, "t": 5, "test": 4, "thank": 0, "theworkingdir": 5, "thi": [1, 5], "threads_us": 5, "threadsus": 5, "toi": 3, "toolkit": 4, "total": 2, "tracker": 3, "transform": 5, "trim": 4, "twlab": [0, 1], "two": 5, "type": 3, "u": 3, "undergo": 5, "unmethyl": 2, "us": 1, "verifi": 1, "version": 4, "vg": [2, 4, 5], "via": 3, "whether": 2, "which": [1, 2], "within": 1, "work": 5, "work_dir": 5, "workflow": 1, "working_directori": 5, "would": 4, "y": 5, "you": [0, 1, 4, 5], "your": 4, "yourgfafilepath": 5}, "titles": ["Contact us", "Toy Example", "File Types", "Welcome to methylGrapher\u2019s documentation!", "Installation", "Process"], "titleterms": {"": 3, "align": 5, "analysi": 3, "argument": 5, "assembli": 2, "base": 3, "bisulfit": 3, "citat": 0, "code": [0, 4], "conda": 4, "contact": 0, "conversionr": 5, "dedupl": 5, "descript": 5, "dna": 3, "docker": 4, "document": 3, "exampl": [1, 5], "file": 2, "format": 2, "fragment": 2, "gaf": 2, "genom": [3, 5], "gfa": 2, "graph": [2, 3], "graphic": 2, "index": 5, "instal": 4, "issu": 0, "main": 5, "map": 2, "methyl": [2, 3], "methylcal": 5, "methylgraph": 3, "option": 5, "pip": 4, "prerequisit": 4, "process": 5, "refer": 2, "requir": 5, "sequenc": 3, "sourc": [0, 4], "toi": 1, "tracker": 0, "type": 2, "u": 0, "us": [3, 4], "usag": 5, "via": 4, "welcom": 3, "whole": 3}})
\ No newline at end of file
diff --git a/docs/source/process.rst b/docs/source/process.rst
index f281181..51f995a 100644
--- a/docs/source/process.rst
+++ b/docs/source/process.rst
@@ -11,9 +11,6 @@ Description
 ~~~~~~~~~~~~~~~~~~~~~~
 This process involves transforming the genome graph from GFA format into two fully converted genome graphs: one depleted of C bases and another depleted of G bases. Additionally, if desired, you may include a spike-in genome in FASTA format to estimate the conversion rate in a single step further.
 
-.. note::
-    It's important to note that in the C-to-T genome graph, if both C and T segments are positioned identically—meaning they share the same parent and child segments, and each has only one parent and one child—the T segments are removed. Additionally, any associated links and paths are redirected to the corresponding C segment. This principle also applies in the G-to-A genome graph.
-
 Example Usage
 ~~~~~~~~~~~~~~~~~~~~~~
 .. code-block:: shell
diff --git a/src/config.ini b/src/config.ini
index f7b5e47..b1c56c1 100644
--- a/src/config.ini
+++ b/src/config.ini
@@ -1,6 +1,7 @@
 [default]
 # Provide the path to the executable
 vg_path = vg
+ga_path = GraphAligner
 fastuniq_path = fastuniq
 
 
diff --git a/src/gfa.py b/src/gfa.py
index cb44b7b..f5c2c7f 100644
--- a/src/gfa.py
+++ b/src/gfa.py
@@ -345,6 +345,32 @@ def get_sequences_by_segment_ID(self, segment_IDs):
         return res
 
 
+# Store segment length in memory
+class GraphicalFragmentAssemblySegmentLengthMemory(object):
+
+    def __init__(self):
+        self.clear()
+
+    def clear(self):
+        self._segment_length = {}
+
+    def parse(self, gfa_file):
+        with open(gfa_file) as gfa_fh:
+            for l in gfa_fh:
+                if l[0] not in "S":
+                    continue
+
+                l = l.strip().split("\t")
+
+                if l[0] == "S":
+                    rt, segID, seq, *tags = l
+                    # sequence, tag, links, parent_links_count
+                    self._segment_length[segID] = len(seq)
+
+
+    def get_sequence_length_by_segment_ID(self, segment_ID):
+        return self._segment_length[segment_ID]
+
 
 
 
diff --git a/src/longread.py b/src/longread.py
new file mode 100644
index 0000000..639bc7d
--- /dev/null
+++ b/src/longread.py
@@ -0,0 +1,642 @@
+
+
+import os
+import re
+import sys
+import time
+import gzip
+import argparse
+
+import gfa
+import utility
+
+
+
+
+
+
+
+
+utl = utility.Utility()
+
+cytosine_methylation_regex = re.compile(r"\+(\(.*\))\-(\(.*\))")
+cigar_regex = re.compile(r"\d+[MID]")
+
+
+def path_to_list(path_str):
+    # Example: <29983488>29983486<29983485<29983484
+    res = []
+    direction = True
+    segment_ID = ""
+    for c in path_str:
+        if c in "<>":
+            if c == "<":
+                direction = False
+            elif c == ">":
+                direction = True
+        else:
+            segment_ID += c
+            continue
+
+        res.append((segment_ID, direction))
+        segment_ID = ""
+
+    res.append((segment_ID, direction))
+    res.pop(0)
+
+    return res
+
+
+def debug_get_fasta_by_read_name(read_name, fasta_fp):
+    with open(fasta_fp) as f:
+        for l in f:
+            if l.startswith(">"):
+                if l[1:].strip() == read_name:
+                    return f.readline().strip()
+    return None
+
+def sam_bam_line_reader(file_path):
+    lower_fp = file_path.lower()
+    if lower_fp.endswith(".sam"):
+        with open(file_path, 'r') as f:
+            for line in f:
+                yield line
+    elif lower_fp.endswith(".bam"):
+        samtool_cmd = utility.SystemExecute()
+        samtool_out, samtool_err = samtool_cmd.execute(f"samtools view -h {file_path}", )
+        for line in samtool_out:
+            yield str(line, 'utf-8')
+        samtool_cmd.wait()
+
+
+
+def bam_sam_to_fasta(bsam_fp, fasta_fp):
+    # Explore sam file
+
+    output_fasta_fh = None
+    if utl.isGzip(fasta_fp):
+        output_fasta_fh = gzip.open(fasta_fp, 'wt')
+    else:
+        output_fasta_fh = open(fasta_fp, 'w')
+
+
+    no_methylation_tag = 0
+    read_line_count = 0
+
+
+    durations = [0] * 10
+    timestamps = [0] * 10
+    next_report = 10
+
+
+    for l in sam_bam_line_reader(bsam_fp):
+        # Skip header
+        if l.startswith('@'):
+            continue
+
+
+        if timestamps[0] == 0:
+            ts = time.time()
+            for i in range(10):
+                timestamps[i] = ts
+
+        durations[0] += time.time() - timestamps[0]
+
+        if sum(durations) > next_report:
+            print(durations)
+            next_report += 10
+
+        line = l.strip().split('\t')
+        basic_info = line[:11]
+        tags_list = line[11:]
+        tags = {}
+        for tag_str in tags_list:
+            tag = [tag_str[:4], tag_str[5:]]
+            if tag_str.startswith("Ml:B:C"):
+                tag = ["Ml:B:C", tag_str[7:]]
+            tags[tag[0]] = tag[1]
+
+        read_name = basic_info[0]
+        read_seq = basic_info[9].upper()
+
+        if 'Mm:Z' not in tags:
+            no_methylation_tag += 1
+            continue
+
+        if "Ml:B:C" not in tags:
+            no_methylation_tag += 1
+            continue
+
+        mm_tag = tags['Mm:Z']
+        ml_tag = tags["Ml:B:C"]
+
+        if mm_tag.endswith(";"):
+            mm_tag = mm_tag[:-1]
+        if ml_tag.endswith(";"):
+            ml_tag = ml_tag[:-1]
+
+        mm_tag = mm_tag.split(",")
+        ml_tag = ml_tag.split(",")
+
+        if mm_tag[0] != 'C+m':
+            # print(mm_tag[0], "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
+            continue
+        else:
+            mm_tag = mm_tag[1:]
+
+        if len(ml_tag) in [0, 1]:
+            continue
+
+        mm_tag = [int(x) for x in mm_tag]
+        ml_tag = [int(x) for x in ml_tag]
+        assert len(mm_tag) == len(ml_tag)
+
+        cg_count = read_seq.count("C")
+
+        cytosines_positions = []
+        cpg_positions = []
+        for i, nuc in enumerate(read_seq):
+            if nuc == "C":
+                cytosines_positions.append(i)
+                if i < len(read_seq) - 1 and read_seq[i + 1] == "G":
+                    cpg_positions.append(i)
+
+        methylated_cytosines = []
+        unmethylated_cytosines = []
+        nth_cytosine = 0
+        for i in range(len(mm_tag)):
+            interval = mm_tag[i] + 1
+            if nth_cytosine == 0:
+                nth_cytosine = interval - 1
+            else:
+                nth_cytosine += interval
+            # print(nth_cytosine, interval)
+
+            methylated = ml_tag[i] > 127
+            if methylated:
+                methylated_cytosines.append(cytosines_positions[nth_cytosine])
+            else:
+                unmethylated_cytosines.append(cytosines_positions[nth_cytosine])
+
+        cpg_covered_count = len(
+            set(cpg_positions).intersection(set(methylated_cytosines).union(set(unmethylated_cytosines))))
+        if cpg_covered_count < len(cpg_positions) - 5:
+            # print(read_line_count, cpg_covered_count, len(cpg_positions))
+            # continue
+            pass
+
+        # print(read_name)
+
+        # print(read_line_count)
+        # print(read_name)
+        # print(l)
+        # print(len(read_seq))
+        # print(line)
+        # print(basic_info[:9])
+        # print(tags)
+        # print(cg_count)
+        # print(len(mm_tag))
+        # print(len(ml_tag))
+
+        # print(mm_tag[:40])
+        # print(cytosines_positions[:80])
+        # print()
+        # print(methylated_cytosines[:20])
+        # print(cpg_positions[:20])
+
+        # print(len(methylated_cytosines), len(cpg_positions))
+        # print(methylated_cytosines[-1], cpg_positions[-1])
+
+        # print('\n')
+
+        methylated_cytosines = [str(x) for x in methylated_cytosines]
+        unmethylated_cytosines = [str(x) for x in unmethylated_cytosines]
+        met_str = f"+({','.join(methylated_cytosines)})-({','.join(unmethylated_cytosines)})"
+        fasta_entry = f">{read_name}{met_str}\n{read_seq}\n\n"
+
+        # print(fasta_entry)
+        output_fasta_fh.write(fasta_entry)
+
+        read_line_count += 1
+
+
+        tnow = time.time()
+        durations[1] += tnow - timestamps[0]
+        timestamps[0] = tnow
+
+
+
+
+    output_fasta_fh.close()
+    return None
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def generate_fp_within_wd(wd, file_name):
+    return os.path.join(wd, file_name)
+
+def generate_fasta_fp(wd):
+    return generate_fp_within_wd(wd, "input.fasta")
+
+
+def extract_methylation_single_thread(gfa_fp, wd, threads=1, debug=False):
+    """
+    Extract methylation information
+    :param wd: working directory
+    :param gfa: genome graph file path in GFA format
+    :return: methylation information
+    """
+
+    counter_pass = 0
+    counter_issue_no_methylation = 0
+    counter_issue = 0
+    counter_total = 0
+
+    methylation_result = {}
+
+    time_start = time.time()
+
+    gfa_instance = gfa.GraphicalFragmentAssemblySegmentLengthMemory()
+    gfa_instance.parse(gfa_fp)
+
+    if debug:
+        # Just in case the sequence is needed for debugging
+        gfa_seq_instance = gfa.GraphicalFragmentAssemblyMemorySegmentOptimized()
+        gfa_seq_instance.parse(gfa_fp)
+
+
+
+    with open(generate_fp_within_wd(wd, "align.gaf")) as f:
+        for l in f:
+
+            l = l.strip().split("\t")
+            # print(l)
+
+            counter_total += 1
+            if counter_total % 1000 == 0:
+                duration = time.time() - time_start
+                # TODO better logging
+                # print(counter_total, counter_pass, counter_issue_no_methylation, counter_issue, duration)
+
+            # Read the graph alignment file (GAF)
+            read_name = l[0]
+
+            query_len = int(l[1])
+            query_start = int(l[2])
+            query_end = int(l[3])
+
+            strand = l[4]
+
+            path_str = l[5]
+            path_list = path_to_list(path_str)
+
+            path_len = int(l[6])
+            path_start = int(l[7])
+            path_end = int(l[8])
+
+            matched_len = int(l[9])
+            alignment_block_len = int(l[10])
+            mapq = int(l[11])
+
+            tag_list = l[12:]
+            tags = {}
+            for tag_str in tag_list:
+                tname, tdt, tv = tag_str.split(":")
+                if tdt == "i":
+                    tv = int(tv)
+                elif tdt == "f":
+                    tv = float(tv)
+                tags[tname] = tv
+                # print(tname, tv)
+
+            # Example: 42M1I5M1D23M1I1M1I1M1I13M1D41M1D35M1D4M1D
+            cigar = []
+            cigar_str = tags["cg"]
+            for c in cigar_regex.findall(cigar_str):
+                cigar.append((int(c[:-1]), c[-1]))
+
+            alignment_block_type = []
+            block_start = 0
+            block_end = 0
+            for length, alignment_type in cigar:
+                block_end += length
+
+                alignment_block_type.append((block_start, block_end, alignment_type))
+                block_start = block_end
+            alignment_block_type.append((block_start, block_end, alignment_type))
+
+
+
+            # Sanity checks
+
+            # make sure there is methylation information here, and there is no conflict
+            mc = cytosine_methylation_regex.findall(read_name)
+            if len(mc) != 1:
+                counter_issue_no_methylation += 1
+                continue
+
+            methylated_cytosines = mc[0][0][1:-1].split(",")
+            unmethylated_cytosines = mc[0][1][1:-1].split(",")
+
+            if len(methylated_cytosines) + len(unmethylated_cytosines) == 0:
+                counter_issue_no_methylation += 1
+                continue
+
+            methylated = list(map(int, list(filter(lambda x: x != "", methylated_cytosines))))
+            unmethylated = list(map(int, list(filter(lambda x: x != "", unmethylated_cytosines))))
+
+
+            # Just to make sure the cigar interpretation is correct
+            cigar_freq = {}
+            for c in cigar:
+                if c[1] not in cigar_freq:
+                    cigar_freq[c[1]] = 0
+                cigar_freq[c[1]] += c[0]
+            assert path_end - path_start == cigar_freq["M"] + cigar_freq.get("D", 0)
+            assert query_end - query_start == cigar_freq["M"] + cigar_freq.get("I", 0)
+
+            reconstructed_cigar = ""
+            for c in cigar:
+                reconstructed_cigar += str(c[0]) + c[1]
+            assert reconstructed_cigar == cigar_str
+
+
+            # if ">" in path_str:
+            #    continue
+
+
+            # Just to make sure the path interpretation is correct
+            path_len_constructed = 0
+            path_segment_length = {}
+            for segID, direction in path_list:
+                seq_length = gfa_instance.get_sequence_length_by_segment_ID(segID)
+                path_len_constructed += seq_length
+                path_segment_length[segID] = seq_length
+            assert path_len_constructed == path_len
+
+
+            # Debugging
+            # query_seq = debug_get_fasta_by_read_name(read_name, "./short.fasta")
+            query_seq = ""
+            path_seq = ""
+
+            """
+            for i, (segID, direction) in enumerate(path_list):
+                seq = gfa_instance.get_sequence_by_segment_ID(segID)
+                if not direction:
+                    seq = seq_reverse_complement(seq)
+                path_seq += seq
+            """
+
+
+            pos_in_read = -1
+            pos_in_path = -1
+            for alignment_block_len, alignment_block_type in cigar:
+
+                for ai in range(alignment_block_len):
+                    if alignment_block_type == "M":
+                        pos_in_read += 1
+                        pos_in_path += 1
+
+                        pos_in_abs_path = path_start + pos_in_path
+                        pos_in_abs_read = query_start + pos_in_read
+
+                        met_flag = None
+                        if pos_in_abs_read in methylated:
+                            met_flag = "M"
+                        elif pos_in_abs_read in unmethylated:
+                            met_flag = "U"
+
+                        if met_flag is not None:
+
+                            corresponding_segment_ID = None
+                            corresponding_segment_pos = pos_in_abs_path
+                            for segID, direction in path_list:
+                                if corresponding_segment_pos < path_segment_length[segID]:
+                                    corresponding_segment_ID = segID
+                                    if not direction:
+                                        # TODO need double check
+                                        corresponding_segment_pos = path_segment_length[segID] - corresponding_segment_pos - 1
+                                    break
+
+                                corresponding_segment_pos -= path_segment_length[segID]
+
+                            if debug:
+                                segment_seq = gfa_seq_instance.get_sequence_by_segment_ID(corresponding_segment_ID)
+                                csp1 = corresponding_segment_pos
+                                csp2 = corresponding_segment_pos + 2
+                                if csp2 > len(segment_seq):
+                                    csp2 = csp1 + 1
+
+                                if not direction:
+                                    csp2 = corresponding_segment_pos
+                                    csp1 = corresponding_segment_pos - 2
+                                    if csp1 < 0:
+                                        csp1 = 0
+
+                                # This is to check whether I am aligning CG within the read to CG in the segment
+                                print(path_seq[pos_in_abs_path: pos_in_abs_path+2], query_seq[pos_in_abs_read:pos_in_abs_read+2], segment_seq[csp1: csp2], met_flag)
+
+                            if corresponding_segment_ID not in methylation_result:
+                                methylation_result[corresponding_segment_ID] = {}
+                            if corresponding_segment_pos not in methylation_result[corresponding_segment_ID]:
+                                dir_str = "-" if direction else "+"
+                                methylation_result[corresponding_segment_ID][corresponding_segment_pos] = [dir_str, 0, 0]
+                            methylation_result[corresponding_segment_ID][corresponding_segment_pos][2] += 1
+                            if met_flag == "M":
+                                methylation_result[corresponding_segment_ID][corresponding_segment_pos][1] += 1
+
+
+                    elif alignment_block_type == "I":
+                        pos_in_read += 1
+                    elif alignment_block_type == "D":
+                        pos_in_path += 1
+                    else:
+                        raise Exception("Unknown alignment block type")
+
+            # print(path_len_constructed, path_len)
+            # print(cigar)
+            # print(alignment_block_type)
+
+            # print(path_start, path_end, path_end - path_start)
+            # print(query_start, query_end, query_end-query_start)
+            # print(cigar_freq)
+
+            # print(path_seq[433:533])
+            # print(query_seq[:100])
+            # print(path_str)
+            counter_pass += 1
+            # break
+
+
+
+    for segment_ID in sorted(methylation_result):
+        for pos in sorted(methylation_result[segment_ID]):
+            strand, met, cov = methylation_result[segment_ID][pos]
+            unmet = cov - met
+            l = f"{segment_ID}\t{pos}\t{strand}\t{met}\t{unmet}\t{cov}"
+            print(l)
+    return None
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def prepare_fasta(basecall, wd, threads=1, debug=False):
+    """
+    Prepare fasta file from long read base call BAM/SAM file
+    :param basecall: long read base call BAM/SAM file
+    :param wd: working directory
+    :return: fasta file
+    """
+
+    bam_sam_to_fasta(basecall, generate_fasta_fp(wd))
+    return None
+
+
+def align(gfa, wd, threads=1, debug=False):
+    """
+    Align long reads to genome graph
+    :param wd: working directory
+    :param gfa: genome graph file path in GFA format
+    """
+
+    # TODO use non-default version of GraphAligner
+    cmd = f"GraphAligner -t {threads} --cigar-match-mismatch -g {gfa} -f {generate_fasta_fp(wd)} -a {generate_fp_within_wd(wd, 'align.gaf')} -x vg"
+    align_exe = utility.SystemExecute()
+
+    # TODO
+    # align_exe.execute(cmd, stdout=generate_fp_within_wd(wd, 'align.log'), stderr=generate_fp_within_wd(wd, 'align.err'))
+    print(cmd)
+
+    return None
+
+def extract_methylation(gfa_fp, wd, threads=1, debug=False):
+    """
+    Extract methylation information
+    :param wd: working directory
+    :param gfa_fp: genome graph file path in GFA format
+    :return: methylation information
+    """
+
+    extract_methylation_single_thread(gfa_fp, wd, debug=debug)
+
+    return None
+
+
+def main(basecall, gfa_fp, wd, threads=1, debug=False):
+    """
+    One step to run all
+    :param basecall: long read base call BAM/SAM file
+    :param gfa_fp: genome graph file path in GFA format
+    :param wd: working directory
+    :return: methylation information
+    """
+
+    prepare_fasta(basecall, wd, threads=threads, debug=debug)
+    align(gfa_fp, wd, threads=threads, debug=debug)
+    extract_methylation(gfa_fp, wd, threads=threads, debug=debug)
+
+    return None
+
+
+
+
+
+if __name__ == "__main__":
+
+    pass
diff --git a/src/main.py b/src/main.py
index 55eb3f9..2b48405 100644
--- a/src/main.py
+++ b/src/main.py
@@ -4,7 +4,7 @@
 __copyright__ = "Copyright 2023-2024, Ting Wang Lab"
 __credits__ = ["Juan Macias"]
 __license__ = "MIT"
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 __maintainer__ = "Wenjin Zhang"
 __email__ = "wenjin@wustl.edu"
 
@@ -98,18 +98,23 @@
         else:
             freport.write(f"Insert lambda phage genome into genome graph as segment: {lambda_segment_id}\n")
 
-        graph_trim_flag = kvargs.get("trim", "Y").lower() in "yestrue"
-        g = gfa.GraphicalFragmentAssemblyMemory()
-        g.parse(original_gfa_with_lambda, keep_link=True)
-        g.write_converted(gfa_c2t, "C", "T", SNV_trim=graph_trim_flag)
+        graph_trim_flag = kvargs.get("trim", "N").lower() in "yestrue"
 
-        del g
 
-        g = gfa.GraphicalFragmentAssemblyMemory()
-        g.parse(original_gfa_with_lambda, keep_link=True)
-        g.write_converted(gfa_g2a, "G", "A", SNV_trim=graph_trim_flag)
+        if graph_trim_flag:
+            g = gfa.GraphicalFragmentAssemblyMemory()
+            g.parse(original_gfa_with_lambda, keep_link=True)
+            g.write_converted(gfa_c2t, "C", "T", SNV_trim=graph_trim_flag)
 
-        del g
+            del g
+
+            g = gfa.GraphicalFragmentAssemblyMemory()
+            g.parse(original_gfa_with_lambda, keep_link=True)
+            g.write_converted(gfa_g2a, "G", "A", SNV_trim=graph_trim_flag)
+
+            del g
+        else:
+            utility.gfa_converter(original_gfa_with_lambda, prefix+".wl", compress=False)
 
         for gfa_file_path in [gfa_c2t, gfa_g2a]:
             index_prefix = gfa_file_path[:-4]
diff --git a/src/mainL.py b/src/mainL.py
new file mode 100644
index 0000000..2637bc1
--- /dev/null
+++ b/src/mainL.py
@@ -0,0 +1,167 @@
+
+
+
+import os
+import re
+import sys
+import argparse
+
+
+import longread
+
+
+
+
+
+
+
+
+if __name__ == "__main__":
+
+    # Arugment parser
+    parser = argparse.ArgumentParser(description='methylGrapher for long reads')
+
+    subparsers = parser.add_subparsers(dest='command', help='Sub-command help')
+
+
+    # main
+    parser_main = subparsers.add_parser('main', help='One step to run all')
+    parser_main.add_argument('-basecall', help='Long read base call BAM/SAM file', required=True)
+    parser_main.add_argument('-gfa', help='Genome graph file path in GFA format', required=True)
+    parser_main.add_argument('-t', help='number of threads', default=1, type=int)
+    parser_main.add_argument('-debug', help='debug mode')
+    parser_main.add_argument('-wd', help='working directory', default="./")
+
+
+    # 1 prepare fasta
+    parser_prepare_fasta = subparsers.add_parser('prepare_fasta', help='Prepare fasta file')
+    parser_prepare_fasta.add_argument('-basecall', help='Long read base call BAM/SAM file', required=True)
+    parser_prepare_fasta.add_argument('-wd', help='working directory', default="./")
+    parser_prepare_fasta.add_argument('-t', help='number of threads', default=1, type=int)
+    parser_prepare_fasta.add_argument('-debug', help='debug mode')
+
+
+    # 2 align
+    parser_align = subparsers.add_parser('align', help='Align long reads to genome graph')
+    parser_align.add_argument('-gfa', help='Genome graph file path in GFA format', required=True)
+    parser_align.add_argument('-wd', help='Long read fasta file', default="./")
+    parser_align.add_argument('-t', help='number of threads', default=1, type=int)
+    parser_align.add_argument('-debug', help='debug mode')
+
+    # 3 methylation extraction
+    parser_extraction = subparsers.add_parser('extraction', help='Extract methylation information')
+    parser_extraction.add_argument('-gfa', help='Genome graph file path in GFA format', required=True)
+    parser_extraction.add_argument('-wd', help='working directory', default="./")
+    parser_extraction.add_argument('-t', help='number of threads', default=1, type=int)
+    parser_extraction.add_argument('-debug', help='debug mode')
+
+
+    # TODO add function execution for subcommands
+
+
+
+    args = parser.parse_args()
+
+    if args.command == 'main':
+        print("Running methylGrapherL with main function", file=sys.stderr)
+
+
+        wd = "./"
+        thread = 1
+        debug = False
+        basecall_fp = args.basecall
+        gfa_fp = args.gfa
+
+        if args.wd:
+            wd = args.wd
+        if args.t:
+            thread = int(args.t)
+        if args.debug:
+            debug = True
+
+
+
+        # Log parameters
+        print("Long read base call file path: ", basecall_fp, file=sys.stderr)
+        print("Genome graph file path: ", gfa_fp, file=sys.stderr)
+        print("Working directory: ", wd, file=sys.stderr)
+        print("Number of threads: ", thread, file=sys.stderr)
+        print("Debug mode: ", debug, file=sys.stderr)
+
+        # Run main function
+        longread.main(basecall_fp, gfa_fp, wd)
+
+
+    elif args.command == 'prepare_fasta':
+        print("Prepare fasta", file=sys.stderr)
+
+        wd = "./"
+        thread = 1
+        debug = False
+        basecall_fp = args.basecall
+
+        if args.wd:
+            wd = args.wd
+        if args.t:
+            thread = int(args.t)
+        if args.debug:
+            debug = True
+
+        # Log parameters
+        print("Long read base call file path: ", basecall_fp, file=sys.stderr)
+        print("Working directory: ", wd, file=sys.stderr)
+        print("Number of threads: ", thread, file=sys.stderr)
+        print("Debug mode: ", debug, file=sys.stderr)
+
+        # Run prepare fasta function
+        longread.prepare_fasta(basecall_fp, wd)
+
+
+
+    elif args.command == 'align':
+        print("Align")
+
+        wd = "./"
+        thread = 1
+        debug = False
+        gfa_fp = args.gfa
+
+        if args.wd:
+            wd = args.wd
+        if args.t:
+            thread = int(args.t)
+        if args.debug:
+            debug = True
+
+
+        # Log parameters
+        print("Genome graph file path: ", gfa_fp, file=sys.stderr)
+        print("Working directory: ", wd, file=sys.stderr)
+        print("Number of threads: ", thread, file=sys.stderr)
+        print("Debug mode: ", debug, file=sys.stderr)
+
+
+
+    elif args.command == 'extraction':
+        wd = "./"
+        thread = 1
+        debug = False
+        gfa_fp = args.gfa
+
+        if args.wd:
+            wd = args.wd
+        if args.t:
+            thread = int(args.t)
+        if args.debug:
+            debug = True
+
+        # Log parameters
+        print("Genome graph file path: ", gfa_fp, file=sys.stderr)
+        print("Working directory: ", wd, file=sys.stderr)
+        print("Number of threads: ", thread, file=sys.stderr)
+        print("Debug mode: ", debug, file=sys.stderr)
+
+    else:
+        print('No command provided')
+
+
diff --git a/src/mainLOLD.py b/src/mainLOLD.py
new file mode 100644
index 0000000..b8b1b4a
--- /dev/null
+++ b/src/mainLOLD.py
@@ -0,0 +1,578 @@
+
+
+
+import os
+import re
+import sys
+import time
+
+import gfa
+import mcall
+import utility
+
+
+
+
+class tmpHelp:
+    def __init__(self):
+        self._help_text = """
+        Usage: main.py <command> [options]
+
+        Commands:
+        prepareGenome  Prepare genome for methylation calling
+        callMethylation  Call methylation from nanopore reads
+
+        Options:
+        -h, --help  Show this help message and exit
+        -t, --thread  Number of threads to use
+        """
+
+    def help_text(self):
+        return self._help_text
+
+
+help = tmpHelp()
+
+
+utl = utility.Utility()
+
+
+
+
+
+
+# Step1: SAM/BAM file to fasta file
+# Step2: Alignment
+# Step3: Methylation extraction
+
+
+
+
+
+cytosine_methylation_regex = re.compile(r"\+(\(.*\))\-(\(.*\))")
+
+
+def path_to_list(path_str):
+    # Example: <29983488>29983486<29983485<29983484
+    res = []
+    direction = True
+    segment_ID = ""
+    for c in path_str:
+        if c in "<>":
+            if c == "<":
+                direction = False
+            elif c == ">":
+                direction = True
+        else:
+            segment_ID += c
+            continue
+
+        res.append((segment_ID, direction))
+        segment_ID = ""
+
+    res.append((segment_ID, direction))
+    res.pop(0)
+
+    return res
+
+
+def debug_get_fasta_by_read_name(read_name, fasta_fp):
+    with open(fasta_fp) as f:
+        for l in f:
+            if l.startswith(">"):
+                if l[1:].strip() == read_name:
+                    return f.readline().strip()
+    return None
+
+
+def seq_reverse_complement(seq):
+    return seq.translate(str.maketrans("ATCG", "TAGC"))[::-1]
+
+
+gfa_fp = f"/scratch/wzhang/ref/graph/wgbs_bspg/bspg.wl.gfa"
+
+
+# Store entire GFA in memory
+class GraphicalFragmentAssemblyMemory(object):
+
+    def __init__(self):
+        self.clear()
+
+    def clear(self):
+        self._header = ""
+        self._segment = {}
+        self._walk = {}
+
+        self._original_gfa_path = ""
+
+    def parse(self, gfa_file):
+        self._original_gfa_path = gfa_file
+        # TODO when not keep_tag, keep_link, forbid to write GFA and access those two
+        for l in open(gfa_file):
+            if l[0] not in "HSL":
+                continue
+
+            l = l.strip().split("\t")
+
+            if l[0] == "H":
+                self._header = "\t".join(l[1:])
+
+            if l[0] == "S":
+                rt, segID, seq, *tags = l
+                # sequence, tag, links, parent_links_count
+                self._segment[segID] = seq
+
+            else:
+                continue
+
+    def get_sequence_by_segment_ID(self, segment_ID):
+        return self._segment[segment_ID]
+
+    def get_sequence_length_by_segment_ID(self, segment_ID):
+        return len(self._segment[segment_ID])
+
+    def get_sequences_by_segment_ID(self, segment_IDs):
+        res = {}
+        for sID in segment_IDs:
+            res[sID] = self.get_sequence_by_segment_ID(sID)
+        return res
+
+    def get_tag_by_segment_ID(self, segment_ID):
+        raise NotImplementedError
+
+    def get_parent_link_count(self, segment_ID):
+        return self._segment[segment_ID][3]
+
+    def get_parent_links(self, segment_ID):
+        raise NotImplementedError
+
+    def get_child_links(self, segment_ID):
+        return self._segment[segment_ID][2]
+
+    def all_segment_IDs(self):
+        return self._segment.keys()
+
+
+gfa_instance = GraphicalFragmentAssemblyMemory()
+gfa_instance.parse(gfa_fp)
+
+
+
+
+
+
+
+
+
+
+
+
+
+def bsam_to_fasta(bsam_fp, fasta_fp):
+    # Explore sam file
+
+    output_fasta = open(fasta_fp, "w")
+
+    no_methylation_tag = 0
+    with open(bsam_fp) as f:
+        read_line_count = 0
+        for l in f:
+            # Skip header
+            if l.startswith('@'):
+                continue
+
+            line = l.strip().split('\t')
+            basic_info = line[:11]
+            tags_list = line[11:]
+            tags = {}
+            for tag_str in tags_list:
+                tag = [tag_str[:4], tag_str[5:]]
+                if tag_str.startswith("Ml:B:C"):
+                    tag = ["Ml:B:C", tag_str[7:]]
+                tags[tag[0]] = tag[1]
+
+            read_name = basic_info[0]
+            read_seq = basic_info[9].upper()
+
+            if 'Mm:Z' not in tags:
+                no_methylation_tag += 1
+                continue
+
+            if "Ml:B:C" not in tags:
+                no_methylation_tag += 1
+                continue
+
+            mm_tag = tags['Mm:Z']
+            ml_tag = tags["Ml:B:C"]
+
+            if mm_tag.endswith(";"):
+                mm_tag = mm_tag[:-1]
+            if ml_tag.endswith(";"):
+                ml_tag = ml_tag[:-1]
+
+            mm_tag = mm_tag.split(",")
+            ml_tag = ml_tag.split(",")
+
+            if mm_tag[0] != 'C+m':
+                # print(mm_tag[0], "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
+                continue
+            else:
+                mm_tag = mm_tag[1:]
+
+            if len(ml_tag) in [0, 1]:
+                continue
+
+            mm_tag = [int(x) for x in mm_tag]
+            ml_tag = [int(x) for x in ml_tag]
+            assert len(mm_tag) == len(ml_tag)
+
+            cg_count = read_seq.count("C")
+
+            cytosines_positions = []
+            cpg_positions = []
+            for i, nuc in enumerate(read_seq):
+                if nuc == "C":
+                    cytosines_positions.append(i)
+                    if i < len(read_seq) - 1 and read_seq[i + 1] == "G":
+                        cpg_positions.append(i)
+
+            methylated_cytosines = []
+            unmethylated_cytosines = []
+            nth_cytosine = 0
+            for i in range(len(mm_tag)):
+                interval = mm_tag[i] + 1
+                if nth_cytosine == 0:
+                    nth_cytosine = interval - 1
+                else:
+                    nth_cytosine += interval
+                # print(nth_cytosine, interval)
+
+                methylated = ml_tag[i] > 127
+                if methylated:
+                    methylated_cytosines.append(cytosines_positions[nth_cytosine])
+                else:
+                    unmethylated_cytosines.append(cytosines_positions[nth_cytosine])
+
+            cpg_covered_count = len(
+                set(cpg_positions).intersection(set(methylated_cytosines).union(set(unmethylated_cytosines))))
+            if cpg_covered_count < len(cpg_positions) - 5:
+                print(read_line_count, cpg_covered_count, len(cpg_positions))
+                continue
+            # print(read_name)
+
+            # print(read_line_count)
+            # print(read_name)
+            # print(l)
+            # print(len(read_seq))
+            # print(line)
+            # print(basic_info[:9])
+            # print(tags)
+            # print(cg_count)
+            # print(len(mm_tag))
+            # print(len(ml_tag))
+
+            # print(mm_tag[:40])
+            # print(cytosines_positions[:80])
+            # print()
+            # print(methylated_cytosines[:20])
+            # print(cpg_positions[:20])
+
+            # print(len(methylated_cytosines), len(cpg_positions))
+            # print(methylated_cytosines[-1], cpg_positions[-1])
+
+            # print('\n')
+
+            methylated_cytosines = [str(x) for x in methylated_cytosines]
+            unmethylated_cytosines = [str(x) for x in unmethylated_cytosines]
+            met_str = f"+({','.join(methylated_cytosines)})-({','.join(unmethylated_cytosines)})"
+            fasta_entry = f">{read_name}{met_str}\n{read_seq}\n\n"
+
+            # print(fasta_entry)
+            output_fasta.write(fasta_entry)
+
+            read_line_count += 1
+            if read_line_count > 10:
+                pass
+
+    output_fasta.close()
+    return None
+
+
+
+
+
+counter_pass = 0
+counter_issue_no_methylation = 0
+counter_issue = 0
+counter_total = 0
+
+methylation_result = {}
+
+time_start = time.time()
+with open(output_fp) as f:
+    for l in f:
+
+        l = l.strip().split("\t")
+        # print(l)
+
+        counter_total += 1
+        if counter_total % 1000 == 0:
+            duration = time.time() - time_start
+            print(counter_total, counter_pass, counter_issue_no_methylation, counter_issue, duration)
+
+        read_name = l[0]
+
+        query_len = int(l[1])
+        query_start = int(l[2])
+        query_end = int(l[3])
+
+        strand = l[4]
+
+        path_str = l[5]
+        # if ">" in path_str:
+        #    continue
+
+        path_len = int(l[6])
+        path_start = int(l[7])
+        path_end = int(l[8])
+
+        matched_len = int(l[9])
+        alignment_block_len = int(l[10])
+        mapq = int(l[11])
+
+        tag_list = l[12:]
+        tags = {}
+
+        # print(tag_list)
+
+        for tag_str in tag_list:
+            tname, tdt, tv = tag_str.split(":")
+            if tdt == "i":
+                tv = int(tv)
+            elif tdt == "f":
+                tv = float(tv)
+            tags[tname] = tv
+            # print(tname, tv)
+
+        mc = cytosine_methylation_regex.findall(read_name)
+        if len(mc) != 1:
+            counter_issue_no_methylation += 1
+            continue
+
+        methylated_cytosines = mc[0][0][1:-1].split(",")
+        unmethylated_cytosines = mc[0][1][1:-1].split(",")
+
+        if len(methylated_cytosines) + len(unmethylated_cytosines) == 0:
+            counter_issue_no_methylation += 1
+            continue
+
+        methylated = list(map(int, list(filter(lambda x: x != "", methylated_cytosines))))
+        unmethylated = list(map(int, list(filter(lambda x: x != "", unmethylated_cytosines))))
+
+        # Example: 42M1I5M1D23M1I1M1I1M1I13M1D41M1D35M1D4M1D
+        cigar_str = tags["cg"]
+        cigar = []
+        for c in re.findall(r"\d+[MID]", cigar_str):
+            cigar.append((int(c[:-1]), c[-1]))
+
+        alignment_block_type = []
+        block_start = 0
+        block_end = 0
+        for length, alignment_type in cigar:
+            block_end += length
+
+            alignment_block_type.append((block_start, block_end, alignment_type))
+            block_start = block_end
+        alignment_block_type.append((block_start, block_end, alignment_type))
+
+        """     
+        TODO uncomment
+        cigar_freq = {}
+        for c in cigar:
+            if c[1] not in cigar_freq:
+                cigar_freq[c[1]] = 0
+            cigar_freq[c[1]] += c[0]
+
+        assert path_end - path_start == cigar_freq["M"] + cigar_freq.get("D", 0)
+        assert query_end - query_start == cigar_freq["M"] + cigar_freq.get("I", 0)
+
+
+        # Just to make sure the cigar interpretation is correct
+        reconstructed_cigar = ""
+        for c in cigar:
+            reconstructed_cigar += str(c[0]) + c[1]
+        assert reconstructed_cigar == cigar_str
+        """
+
+        # Just to make sure the path interpretation is correct
+        path_len_constructed = 0
+        path_list = path_to_list(path_str)
+        for segID, direction in path_list:
+            seq = gfa_instance.get_sequence_by_segment_ID(segID)
+            # print(segID, len(seq))
+            path_len_constructed += len(seq)
+        # print(path_str)
+        assert path_len_constructed == path_len
+
+        # query_seq = debug_get_fasta_by_read_name(read_name, "./short.fasta")
+        query_seq = ""
+        path_seq = ""
+
+        """
+        for i, (segID, direction) in enumerate(path_list):
+            seq = gfa_instance.get_sequence_by_segment_ID(segID)
+            if not direction:
+                seq = seq_reverse_complement(seq)
+            path_seq += seq
+        """
+
+        path_segment_length = {}
+        for i, (segID, direction) in enumerate(path_list):
+            seql = gfa_instance.get_sequence_length_by_segment_ID(segID)
+            path_segment_length[segID] = seql
+
+        pos_in_read = -1
+        pos_in_path = -1
+        for alignment_block_len, alignment_block_type in cigar:
+
+            for ai in range(alignment_block_len):
+                if alignment_block_type == "M":
+                    pos_in_read += 1
+                    pos_in_path += 1
+
+                    pos_in_abs_path = path_start + pos_in_path
+                    pos_in_abs_read = query_start + pos_in_read
+
+                    met_flag = None
+                    if pos_in_abs_read in methylated:
+                        met_flag = "M"
+                    elif pos_in_abs_read in unmethylated:
+                        met_flag = "U"
+
+                    if met_flag is not None:
+
+                        corresponding_segment_ID = None
+                        corresponding_segment_pos = pos_in_abs_path
+                        for segID, direction in path_list:
+                            if corresponding_segment_pos < path_segment_length[segID]:
+                                corresponding_segment_ID = segID
+                                if not direction:
+                                    # TODO need double check
+                                    corresponding_segment_pos = path_segment_length[
+                                                                    segID] - corresponding_segment_pos - 1
+                                break
+
+                            corresponding_segment_pos -= path_segment_length[segID]
+
+                        segment_seq = gfa_instance.get_sequence_by_segment_ID(corresponding_segment_ID)
+                        csp1 = corresponding_segment_pos
+                        csp2 = corresponding_segment_pos + 2
+                        if csp2 > len(segment_seq):
+                            csp2 = csp1 + 1
+
+                        if not direction:
+                            csp2 = corresponding_segment_pos
+                            csp1 = corresponding_segment_pos - 2
+                            if csp1 < 0:
+                                csp1 = 0
+                        # print(path_seq[pos_in_abs_path: pos_in_abs_path+2], query_seq[pos_in_abs_read:pos_in_abs_read+2], segment_seq[csp1: csp2], met_flag)
+
+                        if corresponding_segment_ID not in methylation_result:
+                            methylation_result[corresponding_segment_ID] = {}
+                        if corresponding_segment_pos not in methylation_result[corresponding_segment_ID]:
+                            dir_str = "-" if direction else "+"
+                            methylation_result[corresponding_segment_ID][corresponding_segment_pos] = [dir_str, 0, 0]
+                        methylation_result[corresponding_segment_ID][corresponding_segment_pos][2] += 1
+                        if met_flag == "M":
+                            methylation_result[corresponding_segment_ID][corresponding_segment_pos][1] += 1
+
+
+
+
+                elif alignment_block_type == "I":
+                    pos_in_read += 1
+                elif alignment_block_type == "D":
+                    pos_in_path += 1
+                else:
+                    raise Exception("Unknown alignment block type")
+
+        # print(path_len_constructed, path_len)
+        # print(cigar)
+        # print(alignment_block_type)
+
+        # print(path_start, path_end, path_end - path_start)
+        # print(query_start, query_end, query_end-query_start)
+        # print(cigar_freq)
+
+        # print(path_seq[433:533])
+        # print(query_seq[:100])
+        # print(path_str)
+        counter_pass += 1
+        # break
+
+
+
+
+if __name__ == "__main__":
+    args = sys.argv
+    args.pop(0)
+
+    ga_path = "GraphAligner"
+    thread = 1
+
+    # Find config file
+    try:
+        config = utility.ConfigParser("config.ini")
+        vg_path = config.get("default", "ga_path")
+        thread = config.get("default", "thread")
+    except:
+        pass
+
+    try:
+        thread = int(thread)
+    except:
+        thread = 1
+
+    if len(args) == 0:
+        print("No command specified. Use 'help' for more information.")
+        print(help.help_text())
+        sys.exit(0)
+
+    command = args.pop(0)
+    command = command.lower()
+    if command not in ["help", "-h", "--help", "main"]:
+        print(f"Unknown command: {command}")
+        sys.exit(1)
+
+    kvargs = {}
+    while len(args) > 0:
+        arg = args.pop(0)
+        if arg.startswith("-"):
+            key = arg[1:]
+            value = args.pop(0)
+            kvargs[key] = value
+        else:
+            print(f"Unknown argument: {arg}\n\n")
+            print(help.help_text())
+            sys.exit(1)
+
+    if "thread" in kvargs:
+        thread = int(kvargs["thread"])
+        del kvargs["thread"]
+    if "t" in kvargs:
+        thread = int(kvargs["t"])
+        del kvargs["t"]
+
+
+    if command in ["help", "-h", "--help"]:
+        print(help.help_text())
+        sys.exit(0)
+
+
+
+    if command == "preparegenome":
+        original_gfa_file_path = kvargs["gfa"]
+        prefix = kvargs["prefix"]
+
+        fn_report = prefix + ".prepare.genome.report.txt"
+        freport = open(fn_report, "w")
\ No newline at end of file
diff --git a/src/mgmp.py b/src/mgmp.py
new file mode 100644
index 0000000..650c187
--- /dev/null
+++ b/src/mgmp.py
@@ -0,0 +1,361 @@
+
+
+import os
+import sys
+import time
+import multiprocessing
+
+
+
+
+# write a framework for both single threaded and multi-threaded execution
+
+# 3 main class
+# 1. Functions to execute
+# 2. Worker class
+# 3. Task class
+
+
+
+
+# The single threaded execution is just one function after another
+# The multi-processed execution is more complex
+
+
+
+def test_step1(iter):
+
+    i = 0
+    while i < 10000:
+        i += 1
+        yield i
+
+
+def test_step2(iter):
+
+    for i in iter:
+        time.sleep(0.0001)
+        yield i**2
+
+
+
+
+
+def worker_function(pid, name, function, input_queue, output_queue, batch_size=1024):
+    print(f"{name} worker {pid} started")
+    while True:
+        try:
+            tasks = input_queue.get()
+            if tasks is None:
+                break
+
+            # print(pid, "tasks")
+
+            result = []
+            for r in function(tasks):
+                result.append(r)
+
+                if len(result) >= batch_size:
+                    output_queue.put(result)
+                    result = []
+
+            if len(result) > 0:
+                output_queue.put(result)
+
+        except Exception as e:
+            print(e)
+            break
+
+    print(f"{name} worker {pid} finished")
+
+
+def end_function(pid, last_queue):
+    print(f"End function {pid} started")
+    while True:
+        try:
+            tasks = last_queue.get()
+            if tasks is None:
+                break
+
+            # print(pid, len(tasks))
+        except Exception as e:
+            print(e)
+            break
+    print(f"End function {pid} finished")
+
+
+
+class TaskOLD:
+
+    def __init__(self):
+        pass
+
+    def run(self, threads=1):
+        maxsize = 100
+        batch_size = 256
+
+        q0 = multiprocessing.Queue(maxsize=maxsize)
+        q1 = multiprocessing.Queue(maxsize=maxsize)
+        q2 = multiprocessing.Queue(maxsize=maxsize)
+
+
+
+        pool1 = []
+        for i in range(1):
+            p = multiprocessing.Process(
+                name=f"MGWorke1-{i}",
+                target=worker_function,
+                args=(i, "1", test_step1, q0, q1),
+                kwargs={"batch_size": batch_size}
+            )
+            p.start()
+            pool1.append(p)
+
+        pool2 = []
+        for i in range(threads):
+            p = multiprocessing.Process(
+                name=f"MGWorker2-{i}",
+                target=worker_function,
+                args=(i, "2", test_step2, q1, q2),
+                kwargs={"batch_size": batch_size}
+            )
+            p.start()
+            pool2.append(p)
+
+        p = multiprocessing.Process(
+            name=f"MGEnd",
+            target=end_function,
+            args=(0, q2)
+        )
+        p.start()
+
+
+
+
+        q0.put(1)
+        q0.put(None)
+
+        for p in pool1:
+            p.join()
+
+        for p in pool2:
+            q1.put(None)
+
+        for p in pool2:
+            p.join()
+
+        q2.put(None)
+
+
+
+        print("Finished")
+
+        return None
+
+
+
+
+# TODO how to pass parameters
+class Task:
+
+    def __init__(self, steps, batch_size=1024, queue_max_size=100):
+
+        # Example steps
+        steps_example = [
+            {"name": "step1", "function": test_step1, "args": [], "kwargs": {}, "threads": 1},
+            {"name": "step2", "function": test_step2, "args": [], "kwargs": {}, "threads": 2},
+        ]
+
+        self._steps = steps
+        self._batch_size = batch_size
+        self._queue_max_size = queue_max_size
+
+        return
+
+    def run(self):
+        threads = 10
+        maxsize = 100
+        batch_size = 256
+
+
+        # N steps need N queues, but for formatting purposes, it need N+1 queues
+        q0 = multiprocessing.Queue(maxsize=self._queue_max_size)
+        q1 = multiprocessing.Queue(maxsize=self._queue_max_size)
+        q2 = multiprocessing.Queue(maxsize=self._queue_max_size)
+
+
+        # N steps need N pools
+        pool1 = []
+        for i in range(1):
+            p = multiprocessing.Process(
+                name=f"MGWorke1-{i}",
+                target=worker_function,
+                args=(i, "1", test_step1, q0, q1),
+                kwargs={"batch_size": batch_size}
+            )
+            p.start()
+            pool1.append(p)
+
+        pool2 = []
+        for i in range(3):
+            p = multiprocessing.Process(
+                name=f"MGWorker2-{i}",
+                target=worker_function,
+                args=(i, "2", test_step2, q1, q2),
+                kwargs={"batch_size": batch_size}
+            )
+            p.start()
+            pool2.append(p)
+
+        p = multiprocessing.Process(
+            name=f"MGEnd",
+            target=end_function,
+            args=(0, q2)
+        )
+        p.start()
+
+
+
+
+
+        # Let the steps begin
+        q0.put([1])
+        q0.put(None)
+
+        for p in pool1:
+            p.join()
+
+        for p in pool2:
+            q1.put(None)
+
+        for p in pool2:
+            p.join()
+
+        q2.put(None)
+
+
+
+        print("Finished")
+
+        return None
+
+
+
+def myfunc(x, y, z):
+    print(x)
+    print(y)
+    print(z)
+    print(x,y,z)
+    return x + y + z
+
+
+def test_args(a, b, c, *args, kw=3, **kwargs):
+    print(a)
+    print(b)
+    print(c)
+    print(kw)
+    print(args)
+    print(kwargs)
+    myfunc(*args)
+    return None
+
+
+
+
+def to_worker_function(function, pid, name, input_queue, output_queue, *args, **kwargs):
+
+
+    def wrapper(function, pid, name, input_queue, output_queue, *args, **kwargs):
+        batch_size = kwargs.get("batch_size", 1024)
+        print(f"{name} worker {pid} started")
+        while True:
+            try:
+                tasks = input_queue.get()
+                if tasks is None:
+                    break
+
+                # print(pid, "tasks")
+
+                result = []
+                for r in function(tasks):
+                    result.append(r)
+
+                    if len(result) >= batch_size:
+                        output_queue.put(result)
+                        result = []
+
+                if len(result) > 0:
+                    output_queue.put(result)
+
+            except Exception as e:
+                print(e)
+                break
+
+        print(f"{name} worker {pid} finished")
+
+    return wrapper
+
+
+
+
+
+test_args(1, 2, 3, 100, 200, 300, verbose=True, silent=False)
+
+
+sys.exit(3)
+
+if __name__ == "__main__":
+
+
+    # Single threaded execution
+
+    ts1 = time.time()
+    s1_gen = test_step1(lambda x: x)
+    s2_gen = test_step2(s1_gen)
+    for i in s2_gen:
+        pass
+    duration1 = time.time() - ts1
+
+
+    # Multi-threaded execution
+
+    ts2 = time.time()
+    task = Task(1)
+    task.run()
+    duration2 = time.time() - ts2
+
+    ts5 = time.time()
+    task = Task(1)
+    task.run()
+    duration5 = time.time() - ts5
+
+    ts10 = time.time()
+    task = Task(1)
+    task.run()
+    duration10 = time.time() - ts10
+
+    print(duration1, duration2, duration5, duration10)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/utility.py b/src/utility.py
index 0432a2c..c0d417f 100644
--- a/src/utility.py
+++ b/src/utility.py
@@ -131,8 +131,8 @@ def gfa_converter(input_gfa, output_file_prefix, compress=True, thread=1):
             l = l.strip().split('\t')
 
             if l[0] == 'S':
-                l[2] = l[2].replace(conversion[0], conversion[1])
-                l[2] = l[2].replace(conversion[0].lower(), conversion[1].lower())
+                sc = l[2].upper().replace(conversion[0], conversion[1])
+                l[2] = sc
 
             newl = '\t'.join(l) + '\n'
 
@@ -631,7 +631,7 @@ def header(self):
 """.strip()
 
     def version(self):
-        return "0.1.1"
+        return "0.1.2"
 
     def help_text_raw(self):
         return """