diff --git a/.github/workflows/build-and-test-MACS3-macos.yml b/.github/workflows/build-and-test-MACS3-macos.yml
index 2c45f9eb..794a84e2 100644
--- a/.github/workflows/build-and-test-MACS3-macos.yml
+++ b/.github/workflows/build-and-test-MACS3-macos.yml
@@ -24,7 +24,7 @@ jobs:
     strategy:
       matrix:
         os: [macos-13]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
     runs-on: ${{ matrix.os }}
     name: Build on ${{ matrix.os }} with Python ${{ matrix.python-version }}
 
diff --git a/.github/workflows/build-and-test-MACS3-x64.yml b/.github/workflows/build-and-test-MACS3-x64.yml
index 2f7c7db5..e272f7f7 100644
--- a/.github/workflows/build-and-test-MACS3-x64.yml
+++ b/.github/workflows/build-and-test-MACS3-x64.yml
@@ -24,7 +24,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
         arch: ['x64']
     name: Build on x64 with Python ${{ matrix.python-version }}
     steps:
diff --git a/.github/workflows/publish-to-anaconda.yml b/.github/workflows/publish-to-anaconda.yml
index 48477be4..55c5863a 100644
--- a/.github/workflows/publish-to-anaconda.yml
+++ b/.github/workflows/publish-to-anaconda.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy: 
       matrix: 
-        python-version: ['3.11']
+        python-version: ['3.12']
         arch: ['x64']
     name: Build conda package in x64 under Python ${{ matrix.python-version }}
     steps:
diff --git a/.github/workflows/publish-to-gh-with-sphinx.yml b/.github/workflows/publish-to-gh-with-sphinx.yml
index 63bf77ae..37c4c6b1 100644
--- a/.github/workflows/publish-to-gh-with-sphinx.yml
+++ b/.github/workflows/publish-to-gh-with-sphinx.yml
@@ -27,7 +27,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: '3.11'  # Set up the Python version you need
+        python-version: '3.12'  # Set up the Python version you need
 
     - name: Install dependencies
       run: |
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
index fea7e87d..ee6eb2db 100644
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@@ -27,7 +27,7 @@ jobs:
       - name: Set up Python 
         uses: actions/setup-python@v5 
         with: 
-          python-version: '3.11'
+          python-version: '3.12'
 
       # Here we build sdist in this job. If we plan to build wheels
       # and upload to PyPI, we need another job to build wheels, and
diff --git a/ChangeLog b/ChangeLog
index 7d00be76..31b2c22f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,5 @@
-2024-10-08  Tao Liu  <vladimir.liu@gmail.com>
-	MACS 3.0.3b
+2024-11-30  Tao Liu  <vladimir.liu@gmail.com>
+	MACS 3.0.3b1
 
 	* Features added
 
@@ -10,7 +10,8 @@
 	including the barcodes and counts information. In the `PETrackII`
 	class, we are able to extract a subset using a list of barcodes,
 	which enables us to call peaks only on a pool (pseudo-bulk) of
-	cells.
+	cells. Functions such as `callpeak`, `pileup` and `hmmratac` will
+	support this FRAG format in the future.
 
 	2) We extensively rewrote the `pyx` codes into `py` codes. In
 	another words, we now apply the 'pure python style' with PEP-484
@@ -27,6 +28,13 @@
 	alignment records that overlap with givin genomic
 	coordinates using BAM/BAI files.
 
+	2) `hmmratac`: wrong class name used while saving digested signals
+	in BedGraph files. Multiple other issues related to output
+	filenames. #682
+
+	3) `predictd` and `filterdup`: wrong variable name used while
+	reading multiple pe/frag files.
+
 	* Doc
 
 	1) Explanation on the filtering criteria on SAM/BAM/BAMPE files.
diff --git a/MACS3/Commands/callpeak_cmd.py b/MACS3/Commands/callpeak_cmd.py
index 024f539e..00954226 100644
--- a/MACS3/Commands/callpeak_cmd.py
+++ b/MACS3/Commands/callpeak_cmd.py
@@ -1,4 +1,4 @@
-# Time-stamp: <2024-10-02 15:58:36 Tao Liu>
+# Time-stamp: <2024-11-29 21:43:31 Tao Liu>
 
 """Description: MACS 3 call peak main executable
 
@@ -57,7 +57,7 @@ def run(args):
 
     # 0 output arguments
     info("\n"+options.argtxt)
-    options.PE_MODE = options.format in ('BAMPE', 'BEDPE')
+    options.PE_MODE = options.format in ('BAMPE', 'BEDPE', 'FRAG')
     if options.PE_MODE:
         tag = 'fragment'        # call things fragments not tags
     else:
diff --git a/MACS3/Commands/hmmratac_cmd.py b/MACS3/Commands/hmmratac_cmd.py
index 57ab2f4f..846edbde 100644
--- a/MACS3/Commands/hmmratac_cmd.py
+++ b/MACS3/Commands/hmmratac_cmd.py
@@ -1,5 +1,4 @@
-# Time-stamp: <2024-10-08 10:53:48 Tao Liu>
-
+# Time-stamp: <2025-02-05 12:38:55 Tao Liu>
 
 """Description: Main HMMR command
 
@@ -24,7 +23,7 @@
 # from MACS3.Utilities.Constants import *
 from MACS3.Utilities.OptValidator import opt_validate_hmmratac
 from MACS3.IO.PeakIO import PeakIO
-from MACS3.IO.Parser import BAMPEParser, BEDPEParser  # BAMaccessor
+from MACS3.IO.Parser import BAMPEParser, BEDPEParser, FragParser  # BAMaccessor
 from MACS3.Signal.HMMR_EM import HMMR_EM
 from MACS3.Signal.HMMR_Signal_Processing import (generate_weight_mapping,
                                                  generate_digested_signals,
@@ -103,6 +102,9 @@ def run(args):
     elif options.format == "BEDPE":
         options.info("#1 Read fragments from BEDPE file...")
         parser = BEDPEParser
+    elif options.format == "FRAG":
+        options.info("#1 Read fragments from FRAG file...")
+        parser = FragParser
     else:
         raise Exception("wrong format")
 
@@ -204,7 +206,7 @@ def run(args):
         fc_bdg = digested_atac_signals[0]
     else:
         options.info("#  Pile up all fragments")
-        fc_bdg = petrack.pileup_bdg([1.0, ], baseline_value=0)
+        fc_bdg = petrack.pileup_bdg(baseline_value=0)
     (sum_v, n_v, max_v, min_v, mean_v, std_v) = fc_bdg.summary()
     options.info("#  Convert pileup to fold-change over average signal")
     fc_bdg.apply_func(lambda x: x/mean_v)
@@ -464,14 +466,8 @@ def run(args):
     # First, the likelihoods for each of the three states in a bedGraph
     if options.save_likelihoods:
         options.info(f"# Write the likelihoods for each states into three bedGraph files {options.name}_open.bdg, {options.name}_nuc.bdg, and {options.name}_bg.bdg")
-        open_state_bdg_fhd = open(open_state_bdgfile, "w")
-        nuc_state_bdg_fhd = open(nuc_state_bdgfile, "w")
-        bg_state_bdg_fhd = open(bg_state_bdgfile, "w")
-        save_proba_to_bedGraph(predicted_proba_file, options.hmm_binsize, open_state_bdg_fhd, nuc_state_bdg_fhd, bg_state_bdg_fhd, i_open_region, i_nucleosomal_region, i_background_region)
+        save_proba_to_bedGraph(predicted_proba_file, options.hmm_binsize, open_state_bdgfile, nuc_state_bdgfile, bg_state_bdgfile, i_open_region, i_nucleosomal_region, i_background_region)
         predicted_proba_file.seek(0)  # reset
-        open_state_bdg_fhd.close()
-        nuc_state_bdg_fhd.close()
-        bg_state_bdg_fhd.close()
         options.info("# finished writing proba_to_bedgraph")
 
     # # Generate states path:
diff --git a/MACS3/Commands/pileup_cmd.py b/MACS3/Commands/pileup_cmd.py
index 933100b9..bdbcaa34 100644
--- a/MACS3/Commands/pileup_cmd.py
+++ b/MACS3/Commands/pileup_cmd.py
@@ -4,7 +4,8 @@
 under the terms of the BSD License (see the file LICENSE included with
 the distribution).
 """
-# Time-stamp: <2024-10-02 16:51:15 Tao Liu>
+# Time-stamp: <2025-02-05 12:39:14 Tao Liu>
+
 # ------------------------------------
 # python modules
 # ------------------------------------
@@ -16,7 +17,8 @@
 # ------------------------------------
 # from MACS3.Utilities.Constants import *
 from MACS3.Utilities.OptValidator import opt_validate_pileup
-from MACS3.Signal.Pileup import pileup_and_write_se, pileup_and_write_pe
+from MACS3.Signal.Pileup import pileup_and_write_se
+from MACS3.IO.BedGraphIO import bedGraphIO
 # ------------------------------------
 # Main function
 # ------------------------------------
@@ -35,10 +37,10 @@ def run(o_options):
     # error = options.error
 
     # 0 output arguments
-    options.PE_MODE = options.format in ('BAMPE', 'BEDPE')
+    options.PE_MODE = options.format in ('BAMPE', 'BEDPE', 'FRAG')
 
     # 0 prepare output file
-    outfile = os.path.join(options.outdir, options.outputfile).encode()
+    outfile = os.path.join(options.outdir, options.outputfile)
     if os.path.isfile(outfile):
         info("# Existing file %s will be replaced!" % outfile)
         os.unlink(outfile)
@@ -51,8 +53,9 @@ def run(o_options):
         t0 = treat.total                          # total fragments
         info("# total fragments/pairs in alignment file: %d" % (t0))
         info("# Pileup paired-end alignment file.")
-        pileup_and_write_pe(treat, outfile)
-
+        bdg = treat.pileup_bdg()
+        bdgio = bedGraphIO(outfile, data=bdg)
+        bdgio.write_bedGraph(trackline=False)
     else:
         (tsize, treat) = load_tag_files_options(options)
 
@@ -63,10 +66,10 @@ def run(o_options):
 
         if options.bothdirection:
             info("# Pileup alignment file, extend each read towards up/downstream direction with %d bps" % options.extsize)
-            pileup_and_write_se(treat, outfile, options.extsize * 2, 1, directional=False, halfextension=False)
+            pileup_and_write_se(treat, outfile.encode(), options.extsize * 2, 1, directional=False, halfextension=False)
         else:
             info("# Pileup alignment file, extend each read towards downstream direction with %d bps" % options.extsize)
-            pileup_and_write_se(treat, outfile, options.extsize, 1, directional=True, halfextension=False)
+            pileup_and_write_se(treat, outfile.encode(), options.extsize, 1, directional=True, halfextension=False)
 
     info("# Done! Check %s" % options.outputfile)
 
diff --git a/MACS3/IO/BedGraphIO.py b/MACS3/IO/BedGraphIO.py
index 6fdd6b50..9914c37d 100644
--- a/MACS3/IO/BedGraphIO.py
+++ b/MACS3/IO/BedGraphIO.py
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-10-08 10:07:47 Tao Liu>
+# Time-stamp: <2025-02-05 12:38:24 Tao Liu>
 
 """Module Description:  IO Module for bedGraph file
 
diff --git a/MACS3/IO/Parser.py b/MACS3/IO/Parser.py
index 49954944..bb31a18a 100644
--- a/MACS3/IO/Parser.py
+++ b/MACS3/IO/Parser.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # cython: profile=True
 # cython: linetrace=True
-# Time-stamp: <2024-10-22 10:25:23 Tao Liu>
+# Time-stamp: <2024-11-29 23:30:03 Tao Liu>
 
 """Module for all MACS Parser classes for input. Please note that the
 parsers are for reading the alignment files ONLY.
@@ -1528,6 +1528,10 @@ def pe_parse_line(self, thisline: bytes):
             raise Exception("Less than 5 columns found at this line: %s\n" %
                             thisline)
 
+    @cython.ccall
+    def build_petrack(self):
+        return self.build_petrack2()
+
     @cython.ccall
     def build_petrack2(self):
         """Build PETrackII from all lines.
diff --git a/MACS3/IO/PeakIO.py b/MACS3/IO/PeakIO.py
index 0ad8f36c..4993bd07 100644
--- a/MACS3/IO/PeakIO.py
+++ b/MACS3/IO/PeakIO.py
@@ -1046,7 +1046,7 @@ class BroadPeakIO:
     """IO for broad peak information.
 
     """
-    peaks: dict
+    peaks = cython.declare(dict, visibility="public")
 
     def __init__(self):
         self.peaks = {}
diff --git a/MACS3/Signal/CallPeakUnit.py b/MACS3/Signal/CallPeakUnit.py
index 2e99613b..d5288ff7 100644
--- a/MACS3/Signal/CallPeakUnit.py
+++ b/MACS3/Signal/CallPeakUnit.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # cython: profile=True
 # cython: linetrace=True
-# Time-stamp: <2024-10-22 11:42:37 Tao Liu>
+# Time-stamp: <2025-02-05 12:40:36 Tao Liu>
 
 """Module for Calculate Scores.
 
@@ -40,7 +40,7 @@
 from MACS3.Signal.SignalProcessing import maxima, enforce_peakyness
 from MACS3.IO.PeakIO import PeakIO, BroadPeakIO
 from MACS3.Signal.FixWidthTrack import FWTrack
-from MACS3.Signal.PairedEndTrack import PETrackI
+from MACS3.Signal.PairedEndTrack import PETrackI, PETrackII
 from MACS3.Signal.Prob import poisson_cdf
 from MACS3.Utilities.Logger import logging
 
@@ -346,8 +346,8 @@ class CallerFromAlignments:
     It will compute for each chromosome separately in order to save
     memory usage.
     """
-    treat: object            # FWTrack or PETrackI object for ChIP
-    ctrl: object             # FWTrack or PETrackI object for Control
+    treat: object            # FWTrack or PETrackI/II object for ChIP
+    ctrl: object             # FWTrack or PETrackI/II object for Control
 
     d: cython.int                           # extension size for ChIP
     # extension sizes for Control. Can be multiple values
@@ -458,8 +458,10 @@ def __init__(self,
             self.PE_mode = False
         elif isinstance(treat, PETrackI):
             self.PE_mode = True
+        elif isinstance(treat, PETrackII):
+            self.PE_mode = True
         else:
-            raise Exception("Should be FWTrack or PETrackI object!")
+            raise Exception("Should be FWTrack or PETrackI/II object!")
         # decide if there is control
         self.treat = treat
         if ctrl:
@@ -565,12 +567,12 @@ def pileup_treat_ctrl_a_chromosome(self, chrom: bytes):
 
         if self.PE_mode:
             treat_pv = self.treat.pileup_a_chromosome(chrom,
-                                                      [self.treat_scaling_factor,],
+                                                      self.treat_scaling_factor,
                                                       baseline_value=0.0)
         else:
             treat_pv = self.treat.pileup_a_chromosome(chrom,
-                                                      [self.d,],
-                                                      [self.treat_scaling_factor,],
+                                                      self.d,
+                                                      self.treat_scaling_factor,
                                                       baseline_value=0.0,
                                                       directional=True,
                                                       end_shift=self.end_shift)
@@ -585,11 +587,11 @@ def pileup_treat_ctrl_a_chromosome(self, chrom: bytes):
                                                           self.ctrl_scaling_factor_s,
                                                           baseline_value=self.lambda_bg)
             else:
-                ctrl_pv = self.ctrl.pileup_a_chromosome(chrom,
-                                                        self.ctrl_d_s,
-                                                        self.ctrl_scaling_factor_s,
-                                                        baseline_value=self.lambda_bg,
-                                                        directional=False)
+                ctrl_pv = self.ctrl.pileup_a_chromosome_c(chrom,
+                                                          self.ctrl_d_s,
+                                                          self.ctrl_scaling_factor_s,
+                                                          baseline_value=self.lambda_bg,
+                                                          directional=False)
         else:
             # a: set global lambda
             ctrl_pv = [treat_pv[0][-1:], np.array([self.lambda_bg,],
@@ -814,7 +816,7 @@ def __cal_pvalue_qvalue_table(self):
             if q <= 0:
                 q = 0
                 break
-            #q = max(0,min(pre_q,q))           # make q-score monotonic
+            # q = max(0,min(pre_q,q))           # make q-score monotonic
             self.pqtable[v] = q
             pre_q = q
             k += l
@@ -828,8 +830,9 @@ def __cal_pvalue_qvalue_table(self):
     def __pre_computes(self,
                        max_gap: cython.int = 50,
                        min_length: cython.int = 200):
-        """After this function is called, self.pqtable and self.pvalue_length is built. All
-        chromosomes will be iterated. So it will take some time.
+        """After this function is called, self.pqtable and
+        self.pvalue_length is built. All chromosomes will be
+        iterated. So it will take some time.
 
         """
         chrom: bytes
@@ -1049,7 +1052,6 @@ def call_peaks(self,
             else:
                 self.__cal_pvalue_qvalue_table()
 
-
         # prepare bedGraph file
         if self.save_bedGraph:
             self.bedGraph_treat_f = fopen(self.bedGraph_treat_filename, "w")
diff --git a/MACS3/Signal/FixWidthTrack.py b/MACS3/Signal/FixWidthTrack.py
index 9774c236..9fe05a7d 100644
--- a/MACS3/Signal/FixWidthTrack.py
+++ b/MACS3/Signal/FixWidthTrack.py
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-10-14 14:53:06 Tao Liu>
+# Time-stamp: <2025-02-05 10:23:13 Tao Liu>
 
 """Module for FWTrack classes.
 
@@ -587,14 +587,73 @@ def compute_region_tags_from_peaks(self, peaks: PeakIO,
         return retval
 
     @cython.ccall
-    def pileup_a_chromosome(self, chrom: bytes, ds: list,
-                            scale_factor_s: list,
+    def pileup_a_chromosome(self, chrom: bytes,
+                            d: cython.long,
+                            scale_factor: cython.float = 1.0,
                             baseline_value: cython.float = 0.0,
                             directional: bool = True,
                             end_shift: cython.int = 0) -> list:
         """pileup a certain chromosome, return [p,v] (end position and
         value) list.
 
+        d : tag will be extended to this value to 3' direction,
+            unless directional is False.
+
+        scale_factor : linearly scale the pileup value.
+
+        baseline_value : a value to be filled for missing values, and
+                         will be the minimum pileup.
+
+        directional : if False, the strand or direction of tag will be
+                      ignored, so that extension will be both sides
+                      with d/2.
+
+        end_shift : move cutting ends towards 5->3 direction if value
+                    is positive, or towards 3->5 direction if
+                    negative. Default is 0 -- no shift at all.
+
+        p and v are numpy.ndarray objects.
+        """
+        five_shift: cython.long
+        # adjustment to 5' end and 3' end positions to make a fragment
+        three_shift: cython.long
+        rlength: cython.long
+        chrlengths: dict
+        tmp_pileup: list
+
+        chrlengths = self.get_rlengths()
+        rlength = chrlengths[chrom]
+
+        # adjust extension length according to 'directional' and
+        # 'halfextension' setting.
+        if directional:
+            # only extend to 3' side
+            five_shift = - end_shift
+            three_shift = end_shift + d
+        else:
+            # both sides
+            five_shift = d//2 - end_shift
+            three_shift = end_shift + d - d//2
+
+        tmp_pileup = se_all_in_one_pileup(self.locations[chrom][0],
+                                          self.locations[chrom][1],
+                                          five_shift,
+                                          three_shift,
+                                          rlength,
+                                          scale_factor,
+                                          baseline_value)
+        return tmp_pileup
+
+    @cython.ccall
+    def pileup_a_chromosome_c(self, chrom: bytes, ds: list,
+                              scale_factor_s: list,
+                              baseline_value: cython.float = 0.0,
+                              directional: bool = True,
+                              end_shift: cython.int = 0) -> list:
+        """pileup a certain chromosome, return [p,v] (end position and
+        value) list. This is for control data for which we have
+        multiple different `d` and `scale_factor`.
+
         ds : tag will be extended to this value to 3' direction,
              unless directional is False. Can contain multiple
              extension values. Final pileup will the maximum.
@@ -615,6 +674,7 @@ def pileup_a_chromosome(self, chrom: bytes, ds: list,
                     negative. Default is 0 -- no shift at all.
 
         p and v are numpy.ndarray objects.
+
         """
         d: cython.long
         five_shift: cython.long
diff --git a/MACS3/Signal/PairedEndTrack.py b/MACS3/Signal/PairedEndTrack.py
index 72c39d91..b438b1d3 100644
--- a/MACS3/Signal/PairedEndTrack.py
+++ b/MACS3/Signal/PairedEndTrack.py
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-10-15 15:56:00 Tao Liu>
+# Time-stamp: <2025-02-05 12:40:47 Tao Liu>
 
 """Module for filter duplicate tags from paired-end data
 
@@ -475,41 +475,24 @@ def print_to_bed(self, fhd=None):
     @cython.ccall
     def pileup_a_chromosome(self,
                             chrom: bytes,
-                            scale_factor_s: list,
+                            scale_factor: cython.float = 1.0,
                             baseline_value: cython.float = 0.0) -> list:
         """pileup a certain chromosome, return [p,v] (end position and
         value) list.
 
-        scale_factor_s : linearly scale the pileup value applied to
-                         each d in ds. The list should have the same
-                         length as ds.
+        scale_factor : linearly scale the pileup value.
 
         baseline_value : a value to be filled for missing values, and
                          will be the minimum pileup.
 
         """
         tmp_pileup: list
-        prev_pileup: list
-        scale_factor: cython.float
-
-        prev_pileup = None
 
-        for i in range(len(scale_factor_s)):
-            scale_factor = scale_factor_s[i]
-
-            # Can't directly pass partial nparray there since that will mess up with pointer calculation.
-            tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
-                                      np.sort(self.locations[chrom]['r']),
-                                      scale_factor, baseline_value)
-
-            if prev_pileup:
-                prev_pileup = over_two_pv_array(prev_pileup,
-                                                tmp_pileup,
-                                                func="max")
-            else:
-                prev_pileup = tmp_pileup
+        tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
+                                  np.sort(self.locations[chrom]['r']),
+                                  scale_factor, baseline_value)
 
-        return prev_pileup
+        return tmp_pileup
 
     @cython.ccall
     def pileup_a_chromosome_c(self,
@@ -574,48 +557,31 @@ def pileup_a_chromosome_c(self,
 
     @cython.ccall
     def pileup_bdg(self,
-                   scale_factor_s: list,
+                   scale_factor: cython.float = 1.0,
                    baseline_value: cython.float = 0.0):
         """pileup all chromosomes, and return a bedGraphTrackI object.
 
-        scale_factor_s : linearly scale the pileup value applied to
-                         each d in ds. The list should have the same
-                         length as ds.
-
+        scale_factor : a value to scale the pileup values.
         baseline_value : a value to be filled for missing values, and
                          will be the minimum pileup.
 
         """
         tmp_pileup: list
-        prev_pileup: list
-        scale_factor: cython.float
         chrom: bytes
         bdg: bedGraphTrackI
 
         bdg = bedGraphTrackI(baseline_value=baseline_value)
 
         for chrom in sorted(self.get_chr_names()):
-            prev_pileup = None
-            for i in range(len(scale_factor_s)):
-                scale_factor = scale_factor_s[i]
-
-                # Can't directly pass partial nparray there since that
-                # will mess up with pointer calculation.
-                tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
-                                          np.sort(self.locations[chrom]['r']),
-                                          scale_factor,
-                                          baseline_value)
-
-                if prev_pileup:
-                    prev_pileup = over_two_pv_array(prev_pileup,
-                                                    tmp_pileup,
-                                                    func="max")
-                else:
-                    prev_pileup = tmp_pileup
+            tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
+                                      np.sort(self.locations[chrom]['r']),
+                                      scale_factor,
+                                      baseline_value)
+
             # save to bedGraph
             bdg.add_chrom_data(chrom,
-                               pyarray('i', prev_pileup[0]),
-                               pyarray('f', prev_pileup[1]))
+                               pyarray('i', tmp_pileup[0]),
+                               pyarray('f', tmp_pileup[1]))
         return bdg
 
     @cython.ccall
@@ -749,6 +715,8 @@ def add_loc(self,
                 self.buf_size[chromosome] += self.buffer_size
                 self.locations[chromosome].resize((self.buf_size[chromosome]),
                                                   refcheck=False)
+                self.barcodes[chromosome].resize((self.buf_size[chromosome]),
+                                                 refcheck=False)                
             self.locations[chromosome][i] = (start, end, count)
             self.barcodes[chromosome][i] = bn
             self.size[chromosome] = i + 1
diff --git a/MACS3/Signal/PeakDetect.py b/MACS3/Signal/PeakDetect.py
index cea6f442..e9f1caa9 100644
--- a/MACS3/Signal/PeakDetect.py
+++ b/MACS3/Signal/PeakDetect.py
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-10-15 10:38:40 Tao Liu>
+# Time-stamp: <2024-10-24 15:20:26 Tao Liu>
 
 """Module Description: Detect peaks, main module
 
@@ -31,10 +31,6 @@ def subpeak_letters(i: cython.short) -> bytes:
 class PeakDetect:
     """Class to do the peak calling.
 
-    e.g
-    >>> from MACS3.cPeakDetect import cPeakDetect
-    >>> pd = PeakDetect(treat=treatdata, control=controldata, pvalue=pvalue_cutoff, d=100, gsize=3000000000)
-    >>> pd.call_peaks()
     """
     def __init__(self,
                  opt=None,
diff --git a/MACS3/Signal/Pileup.py b/MACS3/Signal/Pileup.py
index 074d14a6..6497facd 100644
--- a/MACS3/Signal/Pileup.py
+++ b/MACS3/Signal/Pileup.py
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-10-22 10:35:32 Tao Liu>
+# Time-stamp: <2025-02-05 12:41:01 Tao Liu>
 
 """Module Description: For pileup functions.
 
@@ -59,7 +59,8 @@ def fix_coordinates(poss: cnp.ndarray, rlength: cython.int) -> cnp.ndarray:
     """Fix the coordinates.
     """
     i: cython.long
-    ptr: cython.pointer(cython.int) = cython.cast(cython.pointer(cython.int), poss.data)  # pointer
+    ptr: cython.pointer(cython.int) = cython.cast(cython.pointer(cython.int),
+                                                  poss.data)  # pointer
 
     # fix those negative coordinates
     for i in range(poss.shape[0]):
@@ -106,7 +107,7 @@ def pileup_and_write_se(trackI,
     three_shift: cython.long
     i: cython.long
     rlength: cython.long
-    l_data: cython.long
+    l_data: cython.long = 0
     chroms: list
     n_chroms: cython.int
     chrom: bytes
@@ -152,12 +153,26 @@ def pileup_and_write_se(trackI,
         plus_tags_pos = cython.cast(cython.p_int, plus_tags.data)
         minus_tags_pos = cython.cast(cython.p_int, minus_tags.data)
 
-        _data = c_single_end_pileup(plus_tags_pos, plus_tags.shape[0], minus_tags_pos, minus_tags.shape[0], five_shift, three_shift, 0, rlength, scale_factor, baseline_value, cython.address(l_data))
+        _data = c_single_end_pileup(plus_tags_pos,
+                                    plus_tags.shape[0],
+                                    minus_tags_pos,
+                                    minus_tags.shape[0],
+                                    five_shift,
+                                    three_shift,
+                                    0,
+                                    rlength,
+                                    scale_factor,
+                                    baseline_value,
+                                    cython.address(l_data))
 
         # write
         py_bytes = chrom
         chrom_char = py_bytes
-        c_write_pv_array_to_bedGraph(_data, l_data, chrom_char, output_filename, 1)
+        c_write_pv_array_to_bedGraph(_data,
+                                     l_data,
+                                     chrom_char,
+                                     output_filename,
+                                     1)
 
         # clean
         free(_data)
@@ -192,7 +207,7 @@ def pileup_and_write_pe(petrackI,
     py_bytes: bytes
     chrom_char: cython.pointer(cython.char)
     _data: cython.pointer(PosVal)
-    l_data: cython.long
+    l_data: cython.long = 0
 
     chroms = list(chrlengths.keys())
     n_chroms = len(chroms)
@@ -201,7 +216,7 @@ def pileup_and_write_pe(petrackI,
     fh.write("")
     fh.close()
 
-    for i in range( n_chroms ):
+    for i in range(n_chroms):
         chrom = chroms[i]
         locs = petrackI.get_locations_by_chr(chrom)
 
@@ -210,12 +225,21 @@ def pileup_and_write_pe(petrackI,
         start_pos = cython.cast(cython.p_int, locs0.data)  # <int *> locs0.data
         end_pos = cython.cast(cython.p_int, locs1.data)  # <int *> locs1.data
 
-        _data = c_quick_pileup(start_pos, end_pos, locs0.shape[0], scale_factor, baseline_value, cython.address(l_data))
+        _data = c_quick_pileup(start_pos,
+                               end_pos,
+                               locs0.shape[0],
+                               scale_factor,
+                               baseline_value,
+                               cython.address(l_data))
 
         # write
         py_bytes = chrom
         chrom_char = py_bytes
-        c_write_pv_array_to_bedGraph(_data, l_data, chrom_char, output_filename, 1)
+        c_write_pv_array_to_bedGraph(_data,
+                                     l_data,
+                                     chrom_char,
+                                     output_filename,
+                                     1)
 
         # clean
         free(_data)
@@ -235,7 +259,7 @@ def se_all_in_one_pileup(plus_tags: cnp.ndarray,
                          three_shift: cython.long,
                          rlength: cython.int,
                          scale_factor: cython.float,
-                         baseline_value: cython.float ) -> list:
+                         baseline_value: cython.float) -> list:
     """Return pileup given 5' end of fragment at plus or minus strand
     separately, and given shift at both direction to recover a
     fragment. This function is for single end sequencing library
@@ -291,8 +315,10 @@ def se_all_in_one_pileup(plus_tags: cnp.ndarray,
 
     lx = start_poss.shape[0]
 
-    start_poss_ptr = cython.cast(cython.pointer(cython.int), start_poss.data)  # <int32_t *> start_poss.data
-    end_poss_ptr = cython.cast(cython.pointer(cython.int), end_poss.data)  # <int32_t *> end_poss.data
+    start_poss_ptr = cython.cast(cython.pointer(cython.int),
+                                 start_poss.data)  # <int32_t *> start_poss.data
+    end_poss_ptr = cython.cast(cython.pointer(cython.int),
+                               end_poss.data)  # <int32_t *> end_poss.data
 
     ret_p = np.zeros(2 * lx, dtype="i4")
     ret_v = np.zeros(2 * lx, dtype="f4")
@@ -423,8 +449,10 @@ def quick_pileup(start_poss: cnp.ndarray,
     ret_p_ptr: cython.pointer(cython.int)
     ret_v_ptr: cython.pointer(cython.float)
 
-    start_poss_ptr = cython.cast(cython.pointer(cython.int), start_poss.data)  # <int32_t *> start_poss.data
-    end_poss_ptr = cython.cast(cython.pointer(cython.int), end_poss.data)  # <int32_t *> end_poss.data
+    start_poss_ptr = cython.cast(cython.pointer(cython.int),
+                                 start_poss.data)  # <int32_t *> start_poss.data
+    end_poss_ptr = cython.cast(cython.pointer(cython.int),
+                               end_poss.data)  # <int32_t *> end_poss.data
 
     ret_p = np.zeros(l, dtype="i4")
     ret_v = np.zeros(l, dtype="f4")
@@ -506,10 +534,12 @@ def quick_pileup(start_poss: cnp.ndarray,
 
 @cython.ccall
 def naive_quick_pileup(sorted_poss: cnp.ndarray, extension: int) -> list:
-    """Simple pileup, every tag will be extended left and right with length `extension`.
+    """Simple pileup, every tag will be extended left and right with
+    length `extension`.
 
     Note: Assumption is that `poss` has to be pre-sorted! There is no
     check on whether it's sorted.
+
     """
     p: cython.int
     pre_p: cython.int
@@ -536,8 +566,10 @@ def naive_quick_pileup(sorted_poss: cnp.ndarray, extension: int) -> list:
     start_poss[start_poss < 0] = 0
     end_poss = sorted_poss + extension
 
-    start_poss_ptr = cython.cast(cython.pointer(cython.int), start_poss.data)  # <int32_t *> start_poss.data
-    end_poss_ptr = cython.cast(cython.pointer(cython.int), end_poss.data)  # <int32_t *> end_poss.data
+    start_poss_ptr = cython.cast(cython.pointer(cython.int),
+                                 start_poss.data)  # <int32_t *> start_poss.data
+    end_poss_ptr = cython.cast(cython.pointer(cython.int),
+                               end_poss.data)  # <int32_t *> end_poss.data
 
     ret_p = np.zeros(2*l, dtype="i4")
     ret_v = np.zeros(2*l, dtype="f4")
@@ -614,7 +646,9 @@ def naive_quick_pileup(sorted_poss: cnp.ndarray, extension: int) -> list:
 
 
 @cython.ccall
-def over_two_pv_array(pv_array1: list, pv_array2: list, func: str = "max") -> list:
+def over_two_pv_array(pv_array1: list,
+                      pv_array2: list,
+                      func: str = "max") -> list:
     """Merge two position-value arrays. For intersection regions, take
     the maximum value within region.
 
@@ -671,7 +705,8 @@ def over_two_pv_array(pv_array1: list, pv_array2: list, func: str = "max") -> li
     l1 = a1_pos.shape[0]
     l2 = a2_pos.shape[0]
 
-    # pre_p = 0                   # remember the previous position in the new bedGraphTrackI object ret
+    # pre_p = 0
+    # remember the previous position in the new bedGraphTrackI object ret
 
     while i1 < l1 and i2 < l2:
         ret_v_ptr[0] = f(a1_v_ptr[0], a2_v_ptr[0])
@@ -717,7 +752,8 @@ def over_two_pv_array(pv_array1: list, pv_array2: list, func: str = "max") -> li
 
 @cython.ccall
 def naive_call_peaks(pv_array: list, min_v: cython.float,
-                     max_v: cython.float = 1e30, max_gap: cython.int = 50,
+                     max_v: cython.float = 1e30,
+                     max_gap: cython.int = 50,
                      min_length: cython.int = 200):
 
     pre_p: cython.int
@@ -730,20 +766,20 @@ def naive_call_peaks(pv_array: list, min_v: cython.float,
 
     peak_content = []
     (ps, vs) = pv_array
-    psn = iter(ps).__next__         # assign the next function to a viable to speed up
+    psn = iter(ps).__next__  # assign the next function to a viable to speed up
     vsn = iter(vs).__next__
     x = 0
     pre_p = 0                   # remember previous position
     while True:
         # find the first region above min_v
-        try:                    # try to read the first data range for this chrom
+        try:         # try to read the first data range for this chrom
             p = psn()
             v = vsn()
         except Exception:
             break
         x += 1                  # index for the next point
         if v > min_v:
-            peak_content = [(pre_p , p, v),]
+            peak_content = [(pre_p, p, v),]
             pre_p = p
             break               # found the first range above min_v
         else:
@@ -771,13 +807,16 @@ def naive_call_peaks(pv_array: list, min_v: cython.float,
 
     # save the last peak
     if peak_content:
-        if peak_content[-1][1] - peak_content[0][0] >= min_length:       
+        if peak_content[-1][1] - peak_content[0][0] >= min_length:
             __close_peak(peak_content, ret_peaks, max_v, min_length)
     return ret_peaks
 
 
 @cython.cfunc
-def __close_peak(peak_content, peaks, max_v: cython.float, min_length: cython.int):
+def __close_peak(peak_content,
+                 peaks,
+                 max_v: cython.float,
+                 min_length: cython.int):
     """Internal function to find the summit and height
 
     If the height is larger than max_v, skip
diff --git a/MACS3/Signal/PileupV2.py b/MACS3/Signal/PileupV2.py
index 299ecf6e..3aaf36c4 100644
--- a/MACS3/Signal/PileupV2.py
+++ b/MACS3/Signal/PileupV2.py
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-10-14 21:19:00 Tao Liu>
+# Time-stamp: <2025-02-05 11:55:08 Tao Liu>
 
 """Module Description:
 
@@ -197,7 +197,7 @@ def pileup_PV(PV_array: cnp.ndarray) -> cnp.ndarray:
     i: cython.ulong
     c: cython.ulong
     # this is in bedGraph style as in Pileup.pyx, p is the end of a
-    # region, and v is the pileup value. It's 
+    # region, and v is the pileup value.
     pileup_PV: cnp.ndarray
     z = 0
     pre_z = -10000
diff --git a/MACS3/Utilities/Constants.py b/MACS3/Utilities/Constants.py
index 8e64282e..b9343b23 100644
--- a/MACS3/Utilities/Constants.py
+++ b/MACS3/Utilities/Constants.py
@@ -1,4 +1,4 @@
-MACS_VERSION = "3.0.3b"
+MACS_VERSION = "3.0.3b1"
 MAX_PAIRNUM = 1000
 MAX_LAMBDA = 100000
 FESTEP = 20
diff --git a/MACS3/Utilities/OptValidator.py b/MACS3/Utilities/OptValidator.py
index 451fe0b4..1ae98597 100644
--- a/MACS3/Utilities/OptValidator.py
+++ b/MACS3/Utilities/OptValidator.py
@@ -1,4 +1,4 @@
-# Time-stamp: <2024-10-02 19:47:03 Tao Liu>
+# Time-stamp: <2025-02-05 12:41:15 Tao Liu>
 """Module Description
 
 This code is free software; you can redistribute it and/or modify it
@@ -19,7 +19,9 @@
 from MACS3.IO.Parser import (BEDParser, ELANDResultParser,
                              ELANDMultiParser, ELANDExportParser,
                              SAMParser, BAMParser, BAMPEParser,
-                             BEDPEParser, BowtieParser,  guess_parser)
+                             BEDPEParser, BowtieParser,
+                             FragParser,
+                             guess_parser)
 
 from MACS3.Utilities.Constants import EFFECTIVEGS as efgsize
 
@@ -76,6 +78,8 @@ def opt_validate_callpeak(options):
     elif options.format == "BAM":
         options.parser = BAMParser
         options.gzip_flag = True
+    elif options.format == "BOWTIE":
+        options.parser = BowtieParser
     elif options.format == "BAMPE":
         options.parser = BAMPEParser
         options.gzip_flag = True
@@ -83,8 +87,9 @@ def opt_validate_callpeak(options):
     elif options.format == "BEDPE":
         options.parser = BEDPEParser
         options.nomodel = True
-    elif options.format == "BOWTIE":
-        options.parser = BowtieParser
+    elif options.format == "FRAG":
+        options.parser = FragParser
+        options.nomodel = True        
     elif options.format == "AUTO":
         options.parser = guess_parser
     else:
@@ -96,6 +101,11 @@ def opt_validate_callpeak(options):
         if not options.keepduplicates.isdigit():
             logger.error("--keep-dup should be 'auto', 'all' or an integer!")
             sys.exit(1)
+    # for duplicate reads filter, if format is FRAG, we turn it off by
+    # setting it as 'all'
+    if options.format == 'FRAG' and options.keepduplicates != "all":
+        logger.warning("Since the format is 'FRAG', `--keep-dup` will be set as 'all'.")
+        options.keepduplicates = "all"
 
     if options.extsize < 1:
         logger.error("--extsize must >= 1!")
@@ -222,7 +232,7 @@ def opt_validate_callpeak(options):
     if options.fecutoff != 1.0:
         options.argtxt += "# Additional cutoff on fold-enrichment is: %.2f\n" % (options.fecutoff)
 
-    if options.format in ["BAMPE", "BEDPE"]:
+    if options.format in ["BAMPE", "BEDPE", "FRAG"]:
         # neutralize SHIFT
         options.shift = 0
         options.argtxt += "# Paired-End mode is on\n"
@@ -528,6 +538,8 @@ def opt_validate_pileup(options):
         options.gzip_flag = True
     elif options.format == "BEDPE":
         options.parser = BEDPEParser
+    elif options.format == "FRAG":
+        options.parser = FragParser
     else:
         logger.error("Format \"%s\" cannot be recognized!" % (options.format))
         sys.exit(1)
diff --git a/bin/macs3 b/bin/macs3
index 0df21cf8..824be5f7 100644
--- a/bin/macs3
+++ b/bin/macs3
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Time-stamp: <2024-10-02 12:29:58 Tao Liu>
+# Time-stamp: <2025-02-05 12:43:00 Tao Liu>
 
 """Description: MACS v3 main executable.
 
@@ -192,8 +192,13 @@ def add_callpeak_parser(subparsers):
     $ macs3 callpeak -f BAMPE -t ATAC.bam -g hs -n test -B -q 0.01
 4. Peak calling on ATAC-seq (focusing on insertion sites, and using single-end mode):
     $ macs3 callpeak -f BAM -t ATAC.bam -g hs -n test -B -q 0.01 --shift -50 --extension 100
+5. Peak calling on scATAC-seq (paired-end mode):
+    $ macs3 callpeak -f BEDPE -t scATAC.fragments.tsv.gz -g hs -n test -B -q 0.01 -n test
 """)
 
+#6. Peak calling on scATAC-seq (paired-end mode) and only for given barcodes:
+#    $ macs3 callpeak -f FRAG -t scATAC.fragments.tsv.gz -g hs -n test -B -q 0.01 -n test --barcodes barcodes.txt
+
     # group for input files
     group_input = argparser_callpeak.add_argument_group("Input files arguments")
     group_input.add_argument("-t", "--treatment", dest="tfile", type=str, required=True, nargs="+",
@@ -201,17 +206,24 @@ def add_callpeak_parser(subparsers):
     group_input.add_argument("-c", "--control", dest="cfile", type=str, nargs="*",
                              help="Control file. If multiple files are given as '-c A B C', they will be pooled to estimate ChIP-seq background noise.")
     group_input.add_argument("-f", "--format", dest="format", type=str,
+                             #choices=("AUTO", "BAM", "SAM", "BED", "ELAND",
+                             #         "ELANDMULTI", "ELANDEXPORT", "BOWTIE",
+                             #         "BAMPE", "BEDPE", "FRAG"),
+                             #help="Format of tag file, \"AUTO\", \"BED\" or \"ELAND\" or \"ELANDMULTI\" or \"ELANDEXPORT\" or \"SAM\" or \"BAM\" or \"BOWTIE\" or \"BAMPE\" or \"BEDPE\" or \"FRAG\". The default AUTO option will let MACS decide which format (except for BAMPE, BEDPE, and FRAG which should be implicitly set) the file is. Please check the definition in README. Please note that if the format is set as BAMPE, BEDPE or FRAG, MACS3 will call its special Paired-end mode to call peaks by piling up the actual ChIPed fragments defined by both aligned ends, instead of predicting the fragment size first and extending reads. Also please note that the BEDPE only contains three columns, and is NOT the same BEDPE format used by BEDTOOLS. The FRAG format is for single-cell ATAC-seq fragment file which is a five columns BEDPE file with extra barcode and fragment count column. DEFAULT: \"AUTO\"",
                              choices=("AUTO", "BAM", "SAM", "BED", "ELAND",
                                       "ELANDMULTI", "ELANDEXPORT", "BOWTIE",
                                       "BAMPE", "BEDPE"),
-                             help="Format of tag file, \"AUTO\", \"BED\" or \"ELAND\" or \"ELANDMULTI\" or \"ELANDEXPORT\" or \"SAM\" or \"BAM\" or \"BOWTIE\" or \"BAMPE\" or \"BEDPE\". The default AUTO option will let MACS decide which format (except for BAMPE and BEDPE which should be implicitly set) the file is. Please check the definition in README. Please note that if the format is set as BAMPE or BEDPE, MACS3 will call its special Paired-end mode to call peaks by piling up the actual ChIPed fragments defined by both aligned ends, instead of predicting the fragment size first and extending reads. Also please note that the BEDPE only contains three columns, and is NOT the same BEDPE format used by BEDTOOLS. DEFAULT: \"AUTO\"",
+                             help="Format of tag file, \"AUTO\", \"BED\" or \"ELAND\" or \"ELANDMULTI\" or \"ELANDEXPORT\" or \"SAM\" or \"BAM\" or \"BOWTIE\" or \"BAMPE\" or \"BEDPE\". The default AUTO option will let MACS decide which format (except for BAMPE and BEDPEwhich should be implicitly set) the file is. Please check the definition in README. Please note that if the format is set as BAMPE, or BEDPE, MACS3 will call its special Paired-end mode to call peaks by piling up the actual ChIPed fragments defined by both aligned ends, instead of predicting the fragment size first and extending reads. Also please note that the BEDPE only contains three columns, and is NOT the same BEDPE format used by BEDTOOLS. DEFAULT: \"AUTO\"",                             
                              default="AUTO")
     group_input.add_argument("-g", "--gsize", dest="gsize", type=str, default="hs",
                              help="Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2,913,022,398), 'mm' for mouse (2,652,783,500), 'ce' for C. elegans (100,286,401) and 'dm' for fruitfly (142,573,017), Default:hs. The effective genome size numbers for the above four species are collected from Deeptools https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html Please refer to deeptools to define the best genome size you plan to use.")
     group_input.add_argument("-s", "--tsize",  dest="tsize", type=int, default=None,
                              help="Tag size/read length. This will override the auto detected tag size. DEFAULT: Not set")
     group_input.add_argument("--keep-dup", dest="keepduplicates", type=str, default="1",
+                             #help="It controls the  behavior towards duplicate tags at the exact same location -- the same coordination and the same strand. The 'auto' option makes MACS calculate the maximum tags at the exact same location based on binomal distribution using 1e-5 as pvalue cutoff; and the 'all' option keeps every tags. If an integer is given, at most this number of tags will be kept at the same location. Note, if you've used samtools or picard to flag reads as 'PCR/Optical duplicate' in bit 1024, MACS3 will still read them although the reads may be decided by MACS3 as duplicate later. If you plan to rely on samtools/picard/any other tool to filter duplicates, please remove those duplicate reads and save a new alignment file then ask MACS3 to keep all by '--keep-dup all'. If the format is FRAG, this option will be ignored and MACS3 will behave like '--keep-dup all'. The default is to keep one tag at the same location. Default: 1")
                              help="It controls the  behavior towards duplicate tags at the exact same location -- the same coordination and the same strand. The 'auto' option makes MACS calculate the maximum tags at the exact same location based on binomal distribution using 1e-5 as pvalue cutoff; and the 'all' option keeps every tags. If an integer is given, at most this number of tags will be kept at the same location. Note, if you've used samtools or picard to flag reads as 'PCR/Optical duplicate' in bit 1024, MACS3 will still read them although the reads may be decided by MACS3 as duplicate later. If you plan to rely on samtools/picard/any other tool to filter duplicates, please remove those duplicate reads and save a new alignment file then ask MACS3 to keep all by '--keep-dup all'. The default is to keep one tag at the same location. Default: 1")
+    #group_input.add_argument("--barcodes", dest="barcodefile", type=str, default="",
+    #                         help="A plain text file containing the barcodes for the fragment file while the format is 'FRAG'. Won't have any effect if the fromat is not 'FRAG'. Each row in the file should only have the barcode string. MACS3 will extract only the fragments for the specified barcodes.")
 
     # group for output files
     group_output = argparser_callpeak.add_argument_group("Output arguments")
@@ -314,7 +326,7 @@ def add_filterdup_parser(subparsers):
                                      help="Alignment file. If multiple files are given as '-t A B C', then they will all be read and combined. REQUIRED.")
     argparser_filterdup.add_argument("-f", "--format", dest="format", type=str,
                                      choices=("AUTO", "BAM", "SAM", "BED", "ELAND", "ELANDMULTI", "ELANDEXPORT", "BOWTIE", "BAMPE", "BEDPE"),
-                                     help="Format of tag file, \"AUTO\", \"BED\" or \"ELAND\" or \"ELANDMULTI\" or \"ELANDEXPORT\" or \"SAM\" or \"BAM\" or \"BOWTIE\" or \"BAMPE\" or \"BEDPE\". The default AUTO option will let '%(prog)s' decide which format the file is. Please check the definition in README file if you choose ELAND/ELANDMULTI/ELANDEXPORT/SAM/BAM/BOWTIE or BAMPE/BEDPE. DEFAULT: \"AUTO\"",
+                                     help="Format of tag file, \"AUTO\", \"BED\", \"ELAND\", \"ELANDMULTI\", \"ELANDEXPORT\", \"SAM\", \"BAM\", \"BOWTIE\", \"BAMPE\", or \"BEDPE\". The default AUTO option will let MACS3 guess which format the file is. Please check the definition in README file for each specific format. DEFAULT: \"AUTO\"",
                                      default="AUTO")
     argparser_filterdup.add_argument("-g", "--gsize", dest="gsize", type=str, default="hs",
                                      help="Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2,913,022,398), 'mm' for mouse (2,652,783,500), 'ce' for C. elegans (100,286,401) and 'dm' for fruitfly (142,573,017), Default:hs. The effective genome size numbers for the above four species are collected from Deeptools https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html Please refer to deeptools to define the best genome size you plan to use.")
@@ -336,6 +348,7 @@ def add_filterdup_parser(subparsers):
                                      help="When set, filterdup will only output numbers instead of writing output files, including maximum allowable duplicates, total number of reads before filtering, total number of reads after filtering, and redundant rate. Default: not set")
     return
 
+
 def add_bdgpeakcall_parser(subparsers):
     """Add function 'peak calling on bedGraph' argument parsers.
     """
@@ -368,6 +381,7 @@ def add_bdgpeakcall_parser(subparsers):
 
     return
 
+
 def add_bdgbroadcall_parser(subparsers):
     """Add function 'broad peak calling on bedGraph' argument parsers.
     """
@@ -394,6 +408,7 @@ def add_bdgbroadcall_parser(subparsers):
     add_output_group(argparser_bdgbroadcall)
     return
 
+
 def add_bdgcmp_parser(subparsers):
     """Add function 'peak calling on bedGraph' argument parsers.
     """
@@ -422,6 +437,7 @@ def add_bdgcmp_parser(subparsers):
                               help="Output filename. Mutually exclusive with --o-prefix. The number and the order of arguments for --ofile must be the same as for -m.")
     return
 
+
 def add_bdgopt_parser(subparsers):
     """Add function 'operations on score column of bedGraph' argument parsers.
     """
@@ -526,6 +542,7 @@ def add_bdgdiff_parser(subparsers):
 
     return
 
+
 def add_refinepeak_parser(subparsers):
     argparser_refinepeak = subparsers.add_parser("refinepeak",
                                                  help="Take raw reads alignment, refine peak summits. Inspired by SPP.")
@@ -549,7 +566,6 @@ def add_refinepeak_parser(subparsers):
 
     add_outdir_option(argparser_refinepeak)
     add_output_group(argparser_refinepeak)
-
     return
 
 
@@ -586,7 +602,6 @@ def add_predictd_parser(subparsers):
 
     argparser_predictd.add_argument("--verbose", dest="verbose", type=int, default=2,
                                     help="Set verbose level of runtime message. 0: only show critical message, 1: show additional warning message, 2: show process information, 3: show debug messages. DEFAULT:2")
-
     return
 
 
@@ -599,8 +614,10 @@ def add_pileup_parser(subparsers):
                                   help="Output bedGraph file name. If not specified, will write to standard output. REQUIRED.")
     add_outdir_option(argparser_pileup)
     argparser_pileup.add_argument("-f", "--format", dest="format", type=str,
+                                  #choices=("AUTO", "BAM", "SAM", "BED", "ELAND", "ELANDMULTI", "ELANDEXPORT", "BOWTIE", "BAMPE", "BEDPE", "FRAG"),
+                                  #help="Format of tag file, \"AUTO\", \"BED\", \"ELAND\", \"ELANDMULTI\", \"ELANDEXPORT\", \"SAM\", \"BAM\", \"BOWTIE\", \"BAMPE\", \"BEDPE\", or \"FRAG\". The default AUTO option will let '%(prog)s' decide which format the file is. DEFAULT: \"AUTO\", MACS3 will pick a format from \"AUTO\", \"BED\", \"ELAND\", \"ELANDMULTI\", \"ELANDEXPORT\", \"SAM\", \"BAM\" and \"BOWTIE\". If the format is BAMPE, BEDPE or FRAG, please specify it explicitly. Please note that when the format is BAMPE, BEDPE or FRAG, the -B and --extsize options would be ignored, and MACS3 will process the input in Paired-End mode.",
                                   choices=("AUTO", "BAM", "SAM", "BED", "ELAND", "ELANDMULTI", "ELANDEXPORT", "BOWTIE", "BAMPE", "BEDPE"),
-                                  help="Format of tag file, \"AUTO\", \"BED\", \"ELAND\", \"ELANDMULTI\", \"ELANDEXPORT\", \"SAM\", \"BAM\", \"BOWTIE\", \"BAMPE\", or \"BEDPE\". The default AUTO option will let '%(prog)s' decide which format the file is. DEFAULT: \"AUTO\", MACS3 will pick a format from \"AUTO\", \"BED\", \"ELAND\", \"ELANDMULTI\", \"ELANDEXPORT\", \"SAM\", \"BAM\" and \"BOWTIE\". If the format is BAMPE or BEDPE, please specify it explicitly. Please note that when the format is BAMPE or BEDPE, the -B and --extsize options would be ignored.",
+                                  help="Format of tag file, \"AUTO\", \"BED\", \"ELAND\", \"ELANDMULTI\", \"ELANDEXPORT\", \"SAM\", \"BAM\", \"BOWTIE\", \"BAMPE\", or \"BEDPE\". The default AUTO option will let '%(prog)s' decide which format the file is. DEFAULT: \"AUTO\", MACS3 will pick a format from \"AUTO\", \"BED\", \"ELAND\", \"ELANDMULTI\", \"ELANDEXPORT\", \"SAM\", \"BAM\" and \"BOWTIE\". If the format is BAMPE or BEDPE, please specify it explicitly. Please note that when the format is BAMPE or BEDPE, the -B and --extsize options would be ignored, and MACS3 will process the input in Paired-End mode.",                                  
                                   default="AUTO")
     argparser_pileup.add_argument("-B", "--both-direction", dest="bothdirection", action="store_true", default=False,
                                   help="By default, any read will be extended towards downstream direction by extension size. So it's [0,size-1] (1-based index system) for plus strand read and [-size+1,0] for minus strand read where position 0 is 5' end of the aligned read. Default behavior can simulate MACS3 way of piling up ChIP sample reads where extension size is set as fragment size/d. If this option is set as on, aligned reads will be extended in both upstream and downstream directions by extension size. It means [-size,size] where 0 is the 5' end of a aligned read. It can partially simulate MACS3 way of piling up control reads. However MACS3 local bias is calculated by maximizing the expected pileup over a ChIP fragment size/d estimated from 10kb, 1kb, d and whole genome background. This option will be ignored when the format is set as BAMPE or BEDPE. DEFAULT: False")
@@ -857,6 +874,8 @@ plus an extra option for the HMM model file like `macs3 hmmratac
     group_input.add_argument("-i", "--input", dest="input_file", type=str, required=True, nargs="+",
                              help="Input files containing the aligment results for ATAC-seq paired end reads. If multiple files are given as '-t A B C', then they will all be read and pooled together. The file should be in BAMPE or BEDPE format (aligned in paired end mode). Files can be gzipped. Note: all files should be in the same format! REQUIRED.")
     group_input.add_argument("-f", "--format", dest="format", type=str,
+                             #choices=("BAMPE", "BEDPE", "FRAG"),
+                             #help="Format of input files, \"BAMPE\", \"BEDPE\", or \"FRAG\". If there are multiple files, they should be in the same format -- either BAMPE, BEDPE or FRAG. Please check the definition in README. Also please note that the BEDPE only contains three columns -- chromosome, left position of the whole pair, right position of the whole pair-- and is NOT the same BEDPE format used by BEDTOOLS. To convert BAMPE to BEDPE, you can use this command `macs3 filterdup --keep-dup all -f BAMPE -i input.bam -o output.bedpe`. And the FRAG format is a five columns BEDPE with extra barcode and fragment count columns. DEFAULT: \"BAMPE\"",
                              choices=("BAMPE", "BEDPE"),
                              help="Format of input files, \"BAMPE\" or \"BEDPE\". If there are multiple files, they should be in the same format -- either BAMPE or BEDPE. Please check the definition in README. Also please note that the BEDPE only contains three columns -- chromosome, left position of the whole pair, right position of the whole pair-- and is NOT the same BEDPE format used by BEDTOOLS. To convert BAMPE to BEDPE, you can use this command `macs3 filterdup --keep-dup all -f BAMPE -i input.bam -o output.bedpe`. DEFAULT: \"BAMPE\"",
                              default="BAMPE")
diff --git a/docs/source/docs/INSTALL.md b/docs/source/docs/INSTALL.md
index b189f0cc..277c9895 100644
--- a/docs/source/docs/INSTALL.md
+++ b/docs/source/docs/INSTALL.md
@@ -126,8 +126,8 @@ and activated beforehand for a smooth installation process.
 
 MACS uses `pip` for source code installations. To install a source
 distribution of MACS, unpack the distribution tarball, or clone Git
-repository with `git clone --recurse-submodules
-git@github.com:macs3-project/MACS.git`.  Go to the directory where you
+repository with `git clone --recurse-submodules 
+https://github.com/macs3-project/MACS.git`.  Go to the directory where you
 cloned MACS from github or unpacked from tarball, and simply run the
 install command:
 
diff --git a/docs/source/docs/hmmratac.md b/docs/source/docs/hmmratac.md
index 2c90204f..9e2b9c42 100644
--- a/docs/source/docs/hmmratac.md
+++ b/docs/source/docs/hmmratac.md
@@ -54,14 +54,14 @@ columns are:
  2. start position of the accessible region
  3. end position of the accesssible region
  4. peak name
- 5. peak score. The score is the maximum foldchange (signal/average
-    signal) within the peak. By default, the signal is the total
-    pileup of all types of fragments from short to tri-nuc size
-    fragments.
- 6. Not used
+ 5. peak score. The score is the 10times the maximum foldchange 
+    (signal/average signal) within the peak. By default, the 'signal'
+    used to calculate foldchange is the total pileup of all types of
+    fragments from short to tri-nuc size fragments.
  7. Not used
  8. Not used
- 9. peak summit position. It's the relative position from the start
+ 9. Not used
+ 10. peak summit position. It's the relative position from the start
     position to the peak summit which is defined as the position with
     the maximum foldchange score.
 
@@ -93,39 +93,52 @@ If specified all output files will be written to that
 directory. Default: the current working directory 
 
 ### `-n NAME`/ `--name NAME`
+
 Name for this experiment, which will be used as a prefix to generate
 output file names. DEFAULT: "NA" 
 
+### `-e BLACKLIST`/`--blacklist BLACKLIST`
+
+Filename of blacklisted regions to exclude from training and peak
+detection. An example of such file can be found from [ENCODE
+project](https://github.com/Boyle-Lab/Blacklist/). By default, there
+is no blacklist file.
+
 ### `--modelonly`
- This option will only generate the HMM model as a JSON file and
- quit. This model can then be applied using the `--model`
- option. Default: False 
+
+This option will only generate the HMM model as a JSON file and
+quit. This model can then be applied using the `--model`
+option. Default: False
 
 ### `--model`
- If provided, HMM training will be skipped and a JSON file generated
- from a previous HMMRATAC run will be used instead of creating new
- one. Default: NA 
+
+If provided, HMM training will be skipped and a JSON file generated
+from a previous HMMRATAC run will be used instead of creating new
+one. Default: NA 
    
 ### `-t HMM_TRAINING_REGIONS` / `--training HMM_TRAINING_REGIONS`
- Customized training regions can be provided through this option. `-t`
- takes the filename of training regions (previously was BED_file) to
- use for training HMM, instead of using foldchange settings to
- select. Default: NA 
+
+Customized training regions can be provided through this option. `-t`
+takes the filename of training regions (previously was BED_file) to
+use for training HMM, instead of using foldchange settings to
+select. Default: NA 
 
 ### `--min-frag-p MIN_FRAG_P`
- We will exclude the abnormal fragments that can't be assigned to any
- of the four signal tracks. After we use EM to find the means and
- stddevs of the four distributions, we will calculate the likelihood
- that a given fragment length fit any of the four using normal
- distribution. The criteria we will use is that if a fragment length
- has less than MIN_FRAG_P probability to be like either of short,
- mono, di, or tri-nuc fragment, we will exclude it while generating
- the four signal tracks for later HMM training and prediction. The
- value should be between 0 and 1. Larger the value, more abnormal
- fragments will be allowed. So if you want to include more 'ideal'
- fragments, make this value smaller. Default = 0.001 
+
+We will exclude the abnormal fragments that can't be assigned to any
+of the four signal tracks. After we use EM to find the means and
+stddevs of the four distributions, we will calculate the likelihood
+that a given fragment length fit any of the four using normal
+distribution. The criteria we will use is that if a fragment length
+has less than MIN_FRAG_P probability to be like either of short, mono,
+di, or tri-nuc fragment, we will exclude it while generating the four
+signal tracks for later HMM training and prediction. The value should
+be between 0 and 1. Larger the value, more abnormal fragments will be
+allowed. So if you want to include more 'ideal' fragments, make this
+value smaller. Default = 0.001
 
 ### `--cutoff-analysis-only`
+
 Only run the cutoff analysis and output a report. After generating the
 report, the whole process will stop. By default, the cutoff analysis
 will be included in the whole process, but won't quit after the report
@@ -138,12 +151,14 @@ controlled by `--cutoff-analysis-max` and `--cutoff-analysis-steps`
 options.
 
 ### `--cutoff-analysis-max`
+
 The maximum cutoff score for performing cutoff analysis. Together with
 `--cutoff-analysis-steps`, the resolution in the final report can be
 controlled. Please check the description in `--cutoff-analysis-steps`
 for detail. The default value is 100.
 
 ### `--cutoff-analysis-steps`
+
 Steps for performing cutoff analysis. It will be used to decide which
 cutoff value should be included in the final report. Larger the value,
 higher resolution the cutoff analysis can be. The cutoff analysis
@@ -181,6 +196,7 @@ cutoff analysis result that can capture some (typically hundreds of)
 extremely high enrichment and unusually wide peaks. Default: 20
 
 ### `-l HMM_LOWER` / `--lower HMM_LOWER`
+
 Lower limit on fold change range for choosing training sites. This is
 an important parameter for training so please read. The purpose of
 this parameter is to ONLY INCLUDE those chromatin regions having
diff --git a/test/cmdlinetest b/test/cmdlinetest
index 568f954f..c2129861 100755
--- a/test/cmdlinetest
+++ b/test/cmdlinetest
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Time-stamp: <2024-02-15 12:30:51 Tao Liu>
+# Time-stamp: <2025-02-05 12:12:52 Tao Liu>
 
 # integrative subcmds testing
 
@@ -42,6 +42,8 @@ CALLVARPEAK=callvar_testing.narrowPeak
 ATACSEQBAM=yeast_500k_SRR1822137.bam
 ATACSEQBED=yeast_500k_SRR1822137.bedpe.gz
 
+FRAGFILE=test.fragments.tsv.gz
+
 # callpeak
 echo "1. callpeak"
 echo "1.1 callpeak narrow"
@@ -216,9 +218,9 @@ mkdir ${OUTPUTDIR_PREFIX}_run_hmmratac
 # check default (no hmm-type specified) this is technically the same as --hmm-type gaussian (can include if preferred)
 # macs3 hmmratac -i $ATACSEQBAM -n hmmratac_yeast500k --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log
 
-# check both gaussian and poisson hmm models, save training data
-macs3 hmmratac -i $ATACSEQBAM --hmm-type gaussian -n hmmratac_yeast500k --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log
-macs3 hmmratac -i $ATACSEQBAM --hmm-type poisson -n hmmratac_yeast500k_poisson --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_poisson.log
+# check both gaussian and poisson hmm models, save everything
+macs3 hmmratac -i $ATACSEQBAM --hmm-type gaussian -n hmmratac_yeast500k --save-digested --save-states --save-likelihoods --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log
+macs3 hmmratac -i $ATACSEQBAM --hmm-type poisson -n hmmratac_yeast500k_poisson --save-digested --save-states --save-likelihoods --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_poisson.log
 
 # check bedpe for both gaussian, poisson
 macs3 hmmratac -i $ATACSEQBED -n hmmratac_yeast500k_bedpe -f BEDPE --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_bedpe.log
diff --git a/test/test_PairedEndTrack.py b/test/test_PairedEndTrack.py
index 3723c944..9779feb3 100644
--- a/test/test_PairedEndTrack.py
+++ b/test/test_PairedEndTrack.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Time-stamp: <2024-10-15 16:07:27 Tao Liu>
+# Time-stamp: <2025-02-05 09:28:39 Tao Liu>
 
 import unittest
 from MACS3.Signal.PairedEndTrack import PETrackI, PETrackII
@@ -74,6 +74,12 @@ def test_sample_percent(self):
         pe.sample_percent(0.5)
         self.assertEqual(pe.total, 8)
 
+    def test_pileupbdg(self):
+        pe = PETrackI()
+        for (c, l, r) in self.input_regions:
+            pe.add_loc(c, l, r)
+        pe.finalize()
+        pe.pileup_bdg()
 
 class Test_PETrackII(unittest.TestCase):
     def setUp(self):