diff --git a/peptide_statistics_hpp/binding.xml b/peptide_statistics_hpp/binding.xml
index 18189bb..12722e8 100644
--- a/peptide_statistics_hpp/binding.xml
+++ b/peptide_statistics_hpp/binding.xml
@@ -29,6 +29,9 @@
         <download port="fastadb" type="folder">
             <query name="resource" value="fastadb"/>
         </download>
+        <download port="con_fastadb" type="folder">
+            <query name="resource" value="con_fastadb"/>
+        </download>
         <download port="dna" type="folder">
                 <query name="resource" value="dna"/>
             </download>
@@ -54,6 +57,7 @@
     <bind action="download_latest_kb" tool="download_latest_kb">
         <inputAsRequirement port="params"    requirement="params"/>
         <inputAsRequirement port="fastadb"                  requirement="fastadb"/>
+        <inputAsRequirement port="con_fastadb"                  requirement="con_fastadb"/>
         <inputAsRequirement port="peptide_coverage_comparisons" requirement="peptide_coverage_comparisons"/>
         <productionToOutput port="kb_pep"    production="kb_pep"/>
     </bind>
@@ -61,6 +65,7 @@
     <bind action="map_peptides" tool="map_peptides">
         <inputAsRequirement port="peptide_list"             requirement="peptide_list"/>
         <inputAsRequirement port="fastadb"                  requirement="fastadb"/>
+        <inputAsRequirement port="con_fastadb"                  requirement="con_fastadb"/>
         <inputAsRequirement port="exon_fasta"               requirement="exon_fasta"/>
 		    <productionToOutput port="peptide_coverage"         production="peptide_coverage"/>
     </bind>
@@ -87,7 +92,9 @@
 
     <bind action="peptide_protein_cosine" tool="peptide_protein_cosine">
         <inputAsRequirement port="kb_pep"                         requirement="kb_pep"/>
+        <inputAsRequirement port="peptide_coverage_merged_external_compare"                         requirement="peptide_coverage_merged_external_compare"/>
         <inputAsRequirement port="fastadb"                  requirement="fastadb"/>
+        <inputAsRequirement port="con_fastadb"                  requirement="con_fastadb"/>
         <inputAsRequirement port="novel_psms_w_cosine"       requirement="novel_psms_w_cosine"/>
         <inputAsRequirement port="peptide_coverage"        requirement="peptide_coverage"/>
         <inputAsRequirement port="novel_psms_w_cosine_external"       requirement="novel_psms_w_cosine_external"/>
diff --git a/peptide_statistics_hpp/flow.xml b/peptide_statistics_hpp/flow.xml
index efff10a..63e744e 100644
--- a/peptide_statistics_hpp/flow.xml
+++ b/peptide_statistics_hpp/flow.xml
@@ -13,6 +13,7 @@
 		<object name="kb_pep"/>
 
 	<collection name="fastadb"/>
+	<collection name="con_fastadb"/>
 
 	<collection name="parallel_params"/>
 	<collection name="intermediate_protein_coverage"/>
@@ -41,6 +42,8 @@
         <output port="mztab"       			 collection="mztab"/>
 				<output port="spec_on_server"       collection="spec_on_server"/>
         <output port="fastadb" 		  collection="fastadb"/>
+		<output port="con_fastadb" 		  collection="con_fastadb"/>
+
 				<output port="synthetics" 		  collection="synthetics"/>
         <output port="params" object="workflowParameters"/>
 				<output port="peptide_coverage_merged_external"       			 collection="peptide_coverage_merged_external"/>
@@ -60,7 +63,9 @@
 
 		<action name="download_latest_kb">
 			<input   port="params"       			    object="workflowParameters"/>
-			<input  port="fastadb"             			object="fastadb"/>
+			<input  port="fastadb"             			collection="fastadb"/>
+			<input  port="con_fastadb"             	    collection="con_fastadb"/>
+
 			<input port="peptide_coverage_comparisons" collection="peptide_coverage_comparisons"/>
 			<output  port="kb_pep"             		object="kb_pep"/>
 		</action>
@@ -68,8 +73,9 @@
 	<collection name="intermediate_peptide_coverage"/>
 	<action name="map_peptides" multiplicity="multiple" type="parallel">
         <input  port="peptide_list"             collection="peptide_list"  transformation="unfolding"/>
-        <input  port="fastadb"             			object="fastadb"/>
-				<input  port="exon_fasta"             	object="exon_fasta"/>
+			<input  port="fastadb"             			collection="fastadb"/>
+			<input  port="con_fastadb"             	    collection="con_fastadb"/>
+							<input  port="exon_fasta"             	object="exon_fasta"/>
         <output port="peptide_coverage"       	collection="peptide_coverage"/>
     </action>
 
@@ -111,8 +117,10 @@
 
 		<action name="peptide_protein_cosine">
 			<input   port="kb_pep"       			    object="kb_pep"/>
+			<input   port="peptide_coverage_merged_external_compare"  collection="peptide_coverage_comparisons"/>
 			<input   port="novel_psms_w_cosine"          collection="novel_psms_w_cosine"/>
-			<input  port="fastadb"             			object="fastadb"/>
+			<input  port="fastadb"             			collection="fastadb"/>
+			<input  port="con_fastadb"             	    collection="con_fastadb"/>
 			<input   port="peptide_coverage"   collection="peptide_coverage"/>
 			<input   port="novel_psms_w_cosine_external"       collection="novel_psms_w_cosine_external"/>
 			<input   port="peptide_coverage_merged_external"   collection="peptide_coverage_merged_external"/>
diff --git a/peptide_statistics_hpp/input.xml b/peptide_statistics_hpp/input.xml
index f30a264..64dae31 100644
--- a/peptide_statistics_hpp/input.xml
+++ b/peptide_statistics_hpp/input.xml
@@ -13,6 +13,10 @@
 		<parameter name="fastadb" label="Proteome DB">
 			<fileGenerator type="upload" purpose="fasta" target="fastadb"/>
 		</parameter>
+        <!-- Search file parameters -->
+		<parameter name="con_fastadb" label="Proteome DB - Contaminants">
+			<fileGenerator type="upload" purpose="con_fasta" target="con_fastadb"/>
+		</parameter>
 
 		<parameter name="mztab" label="Search mzTab Files">
 			<fileGenerator type="upload" purpose="mztab" target="mztab"/>
@@ -41,8 +45,8 @@
 			<validator type="float" minimum="0"/>
 		</parameter>
 
-		<parameter name="annotated_ions_threshold" label="Annotated Ions Threshold">
-			<default value="6"/>
+		<parameter name="annotated_ions_threshold" label="Peptide Breaks Threshold">
+			<default value="5"/>
 			<validator type="integer" minimum="0"/>
 		</parameter>
 
@@ -80,22 +84,40 @@
 
 		<parameter name="main_fdr" label="FDR for HPP proteins">
 			<options>
-				<option label="Common HPP" value="common_hpp" />
-				<option label="Picked HPP" value="picked_hpp" />
-				<option label="Traditional" value="traditional" />
+				<option label="Traditional" value="common_hpp" />
+				<option label="Picked" value="picked_hpp" />
 			</options>
 			<default value="common_hpp" />
 			<validator type="set" />
 		</parameter>
 
-		<parameter name="leftover_fdr" label="FDR for leftover proteins (if using non-traditional FDR for HPP stage)">
+		<!--<parameter name="leftover_fdr" label="FDR for leftover proteins">
 			<options>
-				<option label="Common" value="common_leftover" />
+				<option label="Traditional" value="common_leftover" />
 				<option label="Picked" value="picked_leftover" />
 			</options>
 			<default value="picked_leftover" />
 			<validator type="set" />
+		</parameter>-->
+
+		<parameter name="use_variants" label="Variant-level analysis">
+			<options>
+				<option label="Peptidoforms" value="0" />
+				<option label="Variants" value="1" />
+			</options>
+			<default value="0" />
+			<validator type="set" />
+		</parameter>
+
+		<parameter name="hpp_aggregation_function" label="Sequence-level HPP Aggregation">
+			<options>
+				<option label="Sum of variants" value="sum" />
+				<option label="Max of variants" value="max" />
+			</options>
+			<default value="sum" />
+			<validator type="set" />
 		</parameter>
+		
 
 		<parameter name="parallel_cosine.parallel_cosine" label="Parallelism (For Cosine Calculations)">
 			<default value="10"/>
@@ -113,7 +135,7 @@
 		</parameter>
 
 		<parameter name="min_mz" label="Minimum m/z">
-			<default value="232"/>
+			<default value="0"/>
 			<validator type="float" minimum="0"/>
 		</parameter>
 
@@ -227,6 +249,16 @@
                 <module id="fastadb" type="fileSelector">
                     <property name="label">Proteome DB</property>
                 </module>
+            </cell>
+			<cell>
+                <label>
+                    <content parameter="con_fastadb"/>
+                </label>
+            </cell>
+            <cell colspan="3">
+                <module id="con_fastadb" type="fileSelector">
+                    <property name="label">Proteome DB - Contaminants</property>
+                </module>
             </cell>
         </row>
 
@@ -363,14 +395,14 @@
 					<cell colspan="3">
 						<input type="select" parameter="main_fdr"/>
 					</cell>
-					<cell>
+					<!-- <cell>
 						<label>
 							<content parameter="leftover_fdr"/>
 						</label>
 					</cell>
 					<cell colspan="3">
 						<input type="select" parameter="leftover_fdr"/>
-					</cell>
+					</cell> -->
 				</row>
 				<row>
 					<cell>
@@ -382,6 +414,24 @@
 						<input type="select" parameter="synthetic_filters"/>
 					</cell>
 				</row>
+				<row>
+					<cell>
+						<label>
+							<content parameter="use_variants"/>
+						</label>
+					</cell>
+					<cell colspan="2">
+						<input type="select" parameter="use_variants"/>
+					</cell>
+					<cell>
+						<label>
+							<content parameter="hpp_aggregation_function"/>
+						</label>
+					</cell>
+					<cell colspan="2">
+						<input type="select" parameter="hpp_aggregation_function"/>
+					</cell>
+				</row>
 
 	</block>
 	<block label="Comparison">
diff --git a/peptide_statistics_hpp/result.xml b/peptide_statistics_hpp/result.xml
index 81de135..fbcf4ac 100644
--- a/peptide_statistics_hpp/result.xml
+++ b/peptide_statistics_hpp/result.xml
@@ -35,14 +35,15 @@
 
           <column type="float" precision="4"  field="hpp_score" label="Score (HPP 2+)" width="4" tooltip=""/>
           <column type="float" precision="4"  field="hpp_fdr" label="FDR (HPP 2+)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="leftover_score" label="Score (Leftover)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="leftover_fdr" label="FDR (Leftover)" width="4" tooltip=""/>
           <!-- <column type="float" precision="4"  field="all_score" label="Score (Canonical old)" width="4" tooltip=""/>
           <column type="float" precision="4"  field="all_fdr" label="FDR (Canonical old)" width="4" tooltip=""/> -->
-          <column type="float" precision="4"  field="picked_score" label="Score (Picked)" width="4" tooltip=""/>
-          <column type="float" precision="4"  field="picked_fdr" label="FDR (Picked)" width="4" tooltip=""/>
-          <column type="float" precision="4"  field="common_score" label="Score (Canonical)" width="4" tooltip=""/>
-          <column type="float" precision="4"  field="common_fdr" label="FDR (Canonical)" width="4" tooltip=""/>
-         <column type="float" precision="4"  field="leftover_score" label="Score (Leftover)" width="4" tooltip=""/>
-          <column type="float" precision="4"  field="leftover_fdr" label="FDR (Leftover)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="picked_score" label="Score (Canonical Picked)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="picked_fdr" label="FDR (Canonical Picked)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="common_score" label="Score (Canonical Traditional)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="common_fdr" label="FDR (Canonical Traditional)" width="4" tooltip=""/>
+
           <!-- <column type="float" precision="4"  field="all_score" label="Score (Canonical)" width="4" tooltip=""/>
           <column type="float" precision="4"  field="all_fdr" label="FDR (Canonical)" width="4" tooltip=""/> -->
         <column type="float" precision="3"  field="coverage_incl_shared" label="Total Coverage" width="4" tooltip=""/>
@@ -54,7 +55,7 @@
                 <parameter name="USETASK" value="True"/>
                 <parameter name="REQUESTPARAMETER=view" value="view_added_peptides"/>
                 <parameter name="HASHPARAMTER=protein_input" value="[protein]"/>
-                <parameter name="HASHPARAMTER=hpp_match_input" value="True"/>
+                <parameter name="HASHPARAMTER=hpp_match_input" value="Yes"/>
                 <parameter name="HASHPARAMTER=explained_intensity_lowerinput" value="[explained_intensity_cutoff]"/>
                 <parameter name="LABEL" value="View HPP Compliant Peptides"/>
                 <parameter name="PASS_URL" value="True"/>
@@ -73,7 +74,7 @@
             <column type="expandable" field="novel_peptides" label="Novel Peptides" width="20"/>
             <column type="integer" field="combined_hpp" label="#HPP Peptides (Combined)" width="5"/>
             <column type="integer" field="combined_fdr_hpp" label="#HPP Peptides (Combined, Pass FDR)" width="5"/>
-            <column type="integer" field="comparison_hpp_allfdr" label="#HPP Peptides (Reference Common FDR)" width="2"/>
+            <column type="integer" field="comparison_hpp_allfdr" label="#HPP Peptides (Reference Leftover FDR)" width="2"/>
             <column type="integer" field="comparison_hpp_hppfdr" label="#HPP Peptides (Reference HPP FDR)" width="2"/>
 
             <column type="integer" field="new_hpp" label="#HPP Peptides (Added Only)" width="2"/>
@@ -83,7 +84,7 @@
             <column type="expandable" field="supporting_peptides_w_synthetic" label="Previously-known Peptides (Synthetic)" width="20"/>
             <column type="expandable" field="novel_peptides_w_synthetic" label="Novel Peptides (Synthetic)" width="20"/>
             <column type="integer" field="combined_hpp_w_synthetic" label="#HPP Peptides (Combined) (Synthetic)" width="5"/>
-            <column type="integer" field="comparison_hpp_allfdr_w_synthetic" label="#HPP Peptides (Reference Common FDR) (Synthetic)" width="2"/>
+            <column type="integer" field="comparison_hpp_allfdr_w_synthetic" label="#HPP Peptides (Reference Leftover FDR) (Synthetic)" width="2"/>
             <column type="integer" field="comparison_hpp_hppfdr_w_synthetic" label="#HPP Peptides (Reference HPP FDR)(Synthetic)" width="2"/>
 
             <column type="integer" field="new_hpp_w_synthetic" label="#HPP Peptides (Added Only) (Synthetic)" width="2"/>
@@ -94,7 +95,7 @@
             <column type="expandable" field="supporting_peptides_w_synthetic_cosine" label="Previously-known Peptides (Synthetic + Pass Cosine Threshold)" width="20"/>
             <column type="expandable" field="novel_peptides_w_synthetic_cosine" label="Novel Peptides (Synthetic + Pass Cosine Threshold)" width="20"/>
                         <column type="integer" field="combined_hpp_w_synthetic_cosine" label="#HPP Peptides (Combined) (Synthetic + Pass Cosine Threshold)" width="5"/>
-            <column type="integer" field="comparison_hpp_allfdr_w_synthetic_cosine" label="#HPP Peptides (Reference Common FDR) (Synthetic + Pass Cosine Threshold)" width="2"/>
+            <column type="integer" field="comparison_hpp_allfdr_w_synthetic_cosine" label="#HPP Peptides (Reference Leftover FDR) (Synthetic + Pass Cosine Threshold)" width="2"/>
             <column type="integer" field="comparison_hpp_hppfdr_w_synthetic_cosine" label="#HPP Peptides (Reference HPP FDR) (Synthetic + Pass Cosine Threshold)" width="2"/>
             <column type="integer" field="new_hpp_w_synthetic_cosine" label="#HPP Peptides (Added Only) (Synthetic + Pass Cosine Threshold)" width="2"/>
             <column type="float" field="total_coverage_w_synthetic_cosine" label="Coverage (Total) (Synthetic + Pass Cosine Threshold)" width="2"/>
@@ -147,7 +148,7 @@
             <column type="text" field="protein_type" label="Protein Type" width="4"/>
             <column type="integer" field="aa_start" label="AA Start" width="1"/>
             <column type="integer" field="aa_end" label="AA End" width="1"/>
-            <column type="integer" field="pe" label="PE" width="5"/>
+            <column type="integer" field="pe" label="PE" width="5" nonNumericCheckbox="true"/>
             <column type="text" field="hpp_match" label="HPP Compliant" width="2"/>
             <column type="text" field="usi" label="USI" width="20"/>
             <!-- <column type="text" field="filename" label="Filename" width="20"/>
@@ -160,6 +161,7 @@
             <column autoHideSentinel="true" type="text" field="synthetic_usi" label="Synthetic Match" width="20"/>
             <!-- <column type="text" field="synthetic_filename" label="Synthetic Filename" width="20"/>
             <column type="integer" field="synthetic_scan" label="Synthetic Scan" width="4"/> -->
+            <column type="float" precision="4"  field="cosine" label="Cosine to Synthetic" width="4" tooltip=""/>
             <column type="integer" field="canonical_matches" label="#SAAP protein matches" width="1"/>
             <column type="text" field="gene_unique" label="Gene Unique" width="2"/>
             <column type="integer" field="exon_junctions_covered" label="Exon Junctions Covered" width="1"/>
@@ -216,13 +218,13 @@
                            <column type="slideExpandableHidden" characterhidelimit="18" field="protein" label="Protein Accession" width="4"/>
 
                 <column type="float" precision="4" field="precursor_fdr" label="Precursor FDR" width="4" tooltip=""/>
-                <column type="float" precision="4" field="picked_protein_fdr" label="Protein FDR (Canonical)" width="4" tooltip=""/>
+                <column type="float" precision="4" field="picked_protein_fdr" label="Protein FDR (Leftover)" width="4" tooltip=""/>
                 <column type="float" precision="4"  field="hpp_protein_fdr" label="Protein FDR (HPP 2+)" width="4" tooltip=""/>
                 <column type="expandable" field="gene" label="Gene" width="4" tooltip="Gene for the protein"/>
                 <column type="text" field="protein_type" label="Protein Type" width="4"/>
                 <column type="integer" field="aa_start" label="AA Start" width="1" tooltip="Start location of peptide.  If the peptide maps multiple times, to either multiple proteins or the same protein, this will be N/A."/>
                 <column type="integer" field="aa_end" label="AA End" width="1" tooltip="End location of peptide.  If the peptide maps multiple times, to either multiple proteins or the same protein, this will be N/A."/>
-                <column type="integer" field="pe" label="Protein Existence (PE) Level" width="5" tooltip="Protein Existence (PE) Level from neXtProt 2020 release."/>
+                <column type="integer" field="pe" label="Protein Existence (PE) Level" width="5" tooltip="Protein Existence (PE) Level from latest neXtProt release." nonNumericCheckbox="true" />
                 <column type="text" field="sequence" label="Sequence" width="20"/>
                 <column type="text" field="sequence_unmodified" label="Demodified Sequence" width="20"/>
                 <column type="text" field="sequence_unmodified_il" label="IL Demodified Sequence" width="20"/>
@@ -245,9 +247,15 @@
                 <column type="float" precision="3"  field="explained_intensity" label="Explained Intensity" width="6"/>
                 <column type="integer" field="matched_ions" label="Matched Ions" width="6"/>
                 <!-- <column type="text" field="pass" label="Above Library Threshold" width="6"/> -->
-                <column type="float" precision="3"  field="cosine" label="Best Annotated Cosine to Synthetic Spectrum Match" width="6" tooltip="Spectrum cosine using only annotated peaks from the input spectrum and closest matching synthetic."/>
+                <column type="float" precision="4"  field="cosine" label="Cosine to Synthetic" width="4" tooltip=""/>
+                <column type="float" precision="4"  field="best_overall_psm_score" label="Best PSM Score" width="4" tooltip=""/>
+                <column type="integer" field="filtered_psms" label="#PSMs - Passing Quality Filters" width="4" tooltip=""/>
+                <column type="integer" field="total_psms" label="#PSMs - All" width="4" tooltip=""/>
+                <column type="integer" field="num_peptidoforms" label="#Peptidoforms" width="4" tooltip=""/>
+                <column type="integer" field="variant_number" label="Variant Number" width="4" tooltip=""/>
                 <column type="integer" field="canonical_matches" label="#SAAP protein matches" width="1" tooltip = "Number of proteins the peptide matches to with a single amino acid variant."/>
                 <column type="text" field="gene_unique" label="Gene Unique" width="2" tooltip = "Peptide matches uniquely to a single gene with a single amino acid variant (True/False)."/>
+                <column type="text" field="decoy" label="Decoy" width="2" tooltip = ""/>
                 <column type="integer" field="exon_junctions_covered" label="Exon Junctions Covered" width="1"/>
                 <column type="integer" field="exons_covered_no_junction" label="Complete Exons Covered" width="1"/>
                 <column type="integer" field="total_unique_exons_covered" label="Total Exons Covered" width="1"/>
@@ -408,8 +416,8 @@
           
           <!-- <column type="float" precision="4"  field="all_score" label="Score (Canonical old)" width="4" tooltip=""/>
           <column type="float" precision="4"  field="all_fdr" label="FDR (Canonical old)" width="4" tooltip=""/> -->
-          <column type="float" precision="4"  field="picked_score" label="Score (Picked)" width="4" tooltip=""/>
-          <column type="float" precision="4"  field="picked_fdr" label="FDR (Picked)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="picked_score" label="Score (Canonical Picked)" width="4" tooltip=""/>
+          <column type="float" precision="4"  field="picked_fdr" label="FDR (Canonical Picked)" width="4" tooltip=""/>
           <column type="float" precision="4"  field="common_score" label="Score (Canonical)" width="4" tooltip=""/>
           <column type="float" precision="4"  field="common_fdr" label="FDR (Canonical)" width="4" tooltip=""/>
                    <column type="float" precision="4"  field="leftover_score" label="Score (Leftover)" width="4" tooltip=""/>
diff --git a/peptide_statistics_hpp/tool.xml b/peptide_statistics_hpp/tool.xml
index bbe1f8e..7d30b04 100644
--- a/peptide_statistics_hpp/tool.xml
+++ b/peptide_statistics_hpp/tool.xml
@@ -68,7 +68,8 @@
 
     <tool name="download_latest_kb">
       <require name="params"    type="file"/>
-            <require name="fastadb"            type="folder"/>
+      <require name="fastadb"            type="folder"/>
+      <require name="con_fastadb"            type="folder"/>
       <require name="peptide_coverage_comparisons"    type="folder"/>
       <produce name="kb_pep" type="file" naming="explicit"  extension="out"/>
 
@@ -76,6 +77,7 @@
             <arg pathRef="download_latest_kb.script"/>
             <arg option="-params"  valueRef="params"/>
             <arg option="-proteome_fasta"          valueRef="fastadb"/>
+            <arg option="-contaminants_fasta"          valueRef="con_fastadb"/>
             <arg option="-comparisons"  valueRef="peptide_coverage_comparisons"/>
             <arg option="-backup_kb_pep"  pathRef="predownloaded_pep"/>
             <arg option="-kb_pep"  valueRef="kb_pep"/>
@@ -85,11 +87,13 @@
   <tool name="map_peptides">
       <require name="peptide_list"       type="file"/>
       <require name="fastadb"            type="folder"/>
+      <require name="con_fastadb"            type="folder"/>
       <require name="exon_fasta"            type="file"/>
       <produce name="peptide_coverage"        type="folder"/>
       <execution env="binary" argConvention="adhoc">
           <arg pathRef="map_peptides.script"/>
           <arg option="-proteome_fasta"          valueRef="fastadb"/>
+          <arg option="-contaminants_fasta"          valueRef="con_fastadb"/>
           <arg option="-exon_fasta"          valueRef="exon_fasta"/>
           <arg option="-peptide_list"   valueRef="peptide_list"/>
           <arg option="-output_folder"  valueRef="peptide_coverage"/>
@@ -137,10 +141,12 @@
   <tool name="peptide_protein_cosine">
       <require name="kb_pep"    type="file"/>
       <require name="fastadb"            type="folder"/>
+      <require name="con_fastadb"            type="folder"/>
       <require name="novel_psms_w_cosine"             type="folder"/>
       <require name="peptide_coverage"                type="folder"/>
       <require name="novel_psms_w_cosine_external"     type="folder"/>
       <require name="peptide_coverage_merged_external" type="folder"/>
+      <require name="peptide_coverage_merged_external_compare" type="folder"/>
       <require name="external_provenance"             type="file"/>
       <produce name="merged_novel_psms_w_cosine"      type="file" naming="explicit"  extension="tsv"/>
       <produce name="novel_peptides_w_cosine"         type="file" naming="explicit"  extension="tsv"/>
@@ -157,10 +163,12 @@
           <arg pathRef="peptide_protein_cosine.script"/>
           <arg option="-comparison_pep"               valueRef="kb_pep"/>
           <arg option="-input_psms"           valueRef="novel_psms_w_cosine"/>
-          <arg option="-fasta"                      valueRef="fastadb"/>
+          <arg option="-proteome_fasta"          valueRef="fastadb"/>
+          <arg option="-contaminants_fasta"          valueRef="con_fastadb"/>
           <arg option="-input_psms_external"           valueRef="novel_psms_w_cosine_external"/>
           <arg option="-protein_coverage"     valueRef="peptide_coverage"/>
           <arg option="-protein_coverage_external"     valueRef="peptide_coverage_merged_external"/>
+          <arg option="-protein_coverage_external_compare"     valueRef="peptide_coverage_merged_external_compare"/>
           <arg option="-output_psms_flag"          valueRef="@export_psms"/>
           <arg option="-output_psms"          valueRef="merged_novel_psms_w_cosine"/>
           <arg option="-output_peptides"      valueRef="novel_peptides_w_cosine"/>
@@ -181,7 +189,7 @@
           <arg option="-picked_protein_fdr_comparison"        valueRef="@picked_protein_fdr_comparison"/>
           <arg option="-filter_rows_fdr"        valueRef="@filter_rows_fdr"/>
           <arg option="-main_fdr"        valueRef="@main_fdr"/>
-          <arg option="-leftover_fdr"        valueRef="@leftover_fdr"/>
+          <arg option="-leftover_fdr"        valueRef="picked_leftover"/>
           <arg option="-nextprot_releases" pathRef="nextprot_releases"/>
           <arg option="-msv_to_pxd_mapping" pathRef="msv_to_pxd_mapping"/>
           <arg option="-external_provenance"  valueRef="external_provenance"/>
@@ -189,7 +197,8 @@
           <arg option="-library_name"         valueRef="@library_name"/>
           <arg option="-export_explorers"     valueRef="@export_explorers"/>
           <arg option="-explorers_output"     valueRef="explorer_export"/>
-
+          <arg option="-variant_output"     valueRef="@use_variants"/>
+          <arg option="-hpp_protein_score_aggregation" valueRef="@hpp_aggregation_function"/>
       </execution>
   </tool>
   
diff --git a/tools/peptide_statistics_hpp/cosine_to_synthetics.py b/tools/peptide_statistics_hpp/cosine_to_synthetics.py
index aa4496c..9f970fc 100644
--- a/tools/peptide_statistics_hpp/cosine_to_synthetics.py
+++ b/tools/peptide_statistics_hpp/cosine_to_synthetics.py
@@ -94,8 +94,11 @@ def extract_annotated_peaks(spectrum, fragment_tolerance, low_mass_filter, min_s
             fragment_tolerance,
             precursor_filter_window=1.5,
             low_mass_filter=low_mass_filter,
-            isobaric_tag_type=None,
-            min_snr=min_snr
+            min_snr=0,
+            window_filter_size = 50,
+            window_filter_top_peaks = 8,
+            num_top_unannotated_envelopes_to_remove=2,
+            isobaric_tag_type='TMT 16-plex' #remove these peaks for all jobs
     )
     ion_vector = spectrum._replace(peaks = ion_vector)
     ion_vector = processing.normalize_spectrum(ion_vector)
@@ -107,16 +110,16 @@ def find_ei_and_intensity(spectrum, psm, synthetic_scans, tol, low_mass_filter,
     charge = psm['charge']
     tolerance = psm['tolerance'] if psm['tolerance'] else tol
     spectrum = spectrum._replace(precursor_z = int(charge), annotation = processing.Annotation(sequence, None))
-    matching_synthetics = synthetic_scans.get((sequence.replace('+229.163','').replace('+229.162932',''),charge),[])
+    matching_synthetics = synthetic_scans.get((''.join([s for s in sequence if s.isalpha()]),charge),[])
     spectrum_ei, spectrum_ion_vector = extract_annotated_peaks(spectrum, tolerance, low_mass_filter, min_snr)
-    for synthetic_filescan, synthetic_ion_vector in matching_synthetics:
+    for synthetic_filescan, synthetic_ion_vector, synthetic_peptide in matching_synthetics:
         cosine = processing.match_peaks(spectrum_ion_vector, synthetic_ion_vector, tolerance)
         if not best_cosine or cosine > best_cosine[0]:
-            best_cosine = (cosine,synthetic_filescan)
+            best_cosine = (cosine,synthetic_filescan,synthetic_peptide)
     return spectrum_ei, best_cosine
 
 def process_spectrum(psms_to_consider, filename, synthetic_scans, tol, low_mass_filter, min_snr, threshold, peaks_obj, spectrum_select_func):
-    cosine_to_synthetic = defaultdict(lambda: (-1,('N/A','N/A')))
+    cosine_to_synthetic = defaultdict(lambda: (-1,('N/A','N/A'),'N/A'))
     explained_intensity_per_spectrum = {}
     for scan in psms_to_consider[filename].keys():
         spectrum = spectrum_select_func(peaks_obj,scan)
@@ -128,7 +131,7 @@ def process_spectrum(psms_to_consider, filename, synthetic_scans, tol, low_mass_
 
 def process_spectrum_read_file(psms_to_consider, filename, synthetic_scans, tol, low_mass_filter, min_snr, threshold, reader, spectrum_select_func,read_scan):
     start_time = datetime.now()
-    cosine_to_synthetic = defaultdict(lambda: (-1,('N/A','N/A')))
+    cosine_to_synthetic = defaultdict(lambda: (-1,('N/A','N/A'),'N/A'))
     explained_intensity_per_spectrum = {}
     all_spectra = []
     for i,s in enumerate(reader):
@@ -172,7 +175,7 @@ def main():
         r = csv.DictReader(f, delimiter='\t')
         psms_header = r.fieldnames
         for l in r:
-            synthetic_keys.add((l['sequence'].replace('+229.163','').replace('+229.162932',''),l['charge']))
+            synthetic_keys.add((''.join([a for a in l['sequence'] if a.isalpha()]),l['charge']))
             all_psms.append(l)
             try:
                 tolerance = float(l['frag_tol'])
@@ -195,9 +198,10 @@ def main():
                 with mgf.read(synthetics_file) as reader:
                     for i,s in enumerate(reader):
                         peptide = s['params'].get('seq')
+                        sequence = ''.join([a for a in peptide if a.isalpha()])
                         charge = int(s['params'].get('charge')[0])
                         # print(peptide, str(charge))
-                        if (peptide, str(charge)) in synthetic_keys:
+                        if (sequence, str(charge)) in synthetic_keys:
                             synthetics_loaded += 1
                             filename = s['params'].get('originalfile_filename',s['params'].get('provenance_filename'))
                             scan = s['params'].get('originalfile_scan',s['params'].get('provenance_scan'))
@@ -214,12 +218,12 @@ def main():
                             )
                             synthetic_low_mass, synthetic_min_snr = (args.low_mass_filter, args.min_snr) if args.filter_synthetics == 'Yes' else (0,0)
                             _, synthetic_ion_vector = extract_annotated_peaks(spectrum, 0.05, synthetic_low_mass, synthetic_min_snr)
-                            synthetic_scans[(peptide, str(charge))].append(((filename,scan),synthetic_ion_vector))
+                            synthetic_scans[(sequence, str(charge))].append(((filename,scan),synthetic_ion_vector,peptide))
         print("{}: Loaded {} synthetics".format(datetime.now().strftime("%H:%M:%S"),synthetics_loaded))
     else:
         print("Not loading synthetics")
 
-    cosine_to_synthetic = defaultdict(lambda: (-1,('N/A','N/A')))
+    cosine_to_synthetic = defaultdict(lambda: (-1,('N/A','N/A'),'N/A'))
     explained_intensity_per_spectrum = {}
 
     min_spectra_to_load_file = 20
@@ -313,9 +317,9 @@ def main():
             for scan in psms_to_consider[filename].keys():
                 sequence = psms_to_consider[filename][scan]['sequence']
                 charge = psms_to_consider[filename][scan]['charge']
-                matching_synthetics = synthetic_scans.get((sequence.replace('+229.163','').replace('+229.162932',''),charge),[])
-                for synthetic_filescan, _ in matching_synthetics:
-                    cosine_to_synthetic[(filename,scan)] = (0,synthetic_filescan)
+                matching_synthetics = synthetic_scans.get((''.join([s for s in sequence if s.isalpha()]),charge),[])
+                for synthetic_filescan, _ , synthetic_peptide in matching_synthetics:
+                    cosine_to_synthetic[(filename,scan)] = (0,synthetic_filescan,synthetic_peptide)
 
     print("{}: About to write out PSMs".format(datetime.now().strftime("%H:%M:%S")))
 
@@ -324,7 +328,7 @@ def main():
         w_psm = csv.DictWriter(fw_psm, delimiter = '\t', fieldnames = header)
         w_psm.writeheader()
         for psm in all_psms:
-            cosine, best_synthetic = cosine_to_synthetic[(psm['filename'],psm['scan'])]
+            cosine, best_synthetic, synthetic_peptide = cosine_to_synthetic[(psm['filename'],psm['scan'])]
             if best_synthetic[0] != 'N/A':
                 synthetic_filename = 'f.' + best_synthetic[0].replace('/data/massive/','')
                 synthetic_scan = best_synthetic[1]
@@ -334,7 +338,8 @@ def main():
             psm['usi'] = make_usi(psm['filename'], psm['scan'], psm['sequence'], psm['charge'])
             psm['synthetic_filename'] = synthetic_filename
             psm['synthetic_scan'] = synthetic_scan
-            psm['synthetic_usi'] = make_usi(synthetic_filename, synthetic_scan, psm['sequence'].replace('+229.163','').replace('+229.162932',''), psm['charge'])
+            psm['synthetic_usi'] = make_usi(synthetic_filename, synthetic_scan, synthetic_peptide, psm['charge'])
+            psm['synthetic_sequence'] = synthetic_peptide
             psm['cosine'] = cosine
             ei, num_matched_peaks = explained_intensity_per_spectrum.get((psm['filename'],psm['scan']),(0,0))
             psm['explained_intensity'] = ei
diff --git a/tools/peptide_statistics_hpp/download_latest_kb.py b/tools/peptide_statistics_hpp/download_latest_kb.py
index db56a11..d3457a8 100644
--- a/tools/peptide_statistics_hpp/download_latest_kb.py
+++ b/tools/peptide_statistics_hpp/download_latest_kb.py
@@ -10,7 +10,8 @@ def arguments():
     parser = argparse.ArgumentParser(description='mzTab to list of peptides')
     parser.add_argument('-p','--params', type = str, help='Input Parameters')
     parser.add_argument('-c','--comparisons', type = Path, help='Comparison Jobs')
-    parser.add_argument('-f','--proteome_fasta', type = Path, help='FASTA File')
+    parser.add_argument('-f','--proteome_fasta', type = Path, help='Input FASTA Protein Database')
+    parser.add_argument('-m','--contaminants_fasta', type = Path, help='Input FASTA Protein Contaminants Database')
     parser.add_argument('-b','--backup_kb_pep', type = Path, help='Backup KB Peptides')
     parser.add_argument('-t','--use_job_level_thresholds', type = bool, help='Use job level thresholds',default=True)
     parser.add_argument('-k','--kb_pep', type = Path, help='Output KB Peptides')
@@ -65,7 +66,7 @@ def main():
 
         try:
 
-            proteome = mapping.add_decoys(mapping.read_uniprot(args.proteome_fasta))
+            proteome = mapping.add_decoys(mapping.merge_proteomes([mapping.read_uniprot(args.proteome_fasta),mapping.read_fasta(args.contaminants_fasta)]))
 
             with open(args.kb_pep, 'w') as w:
                 r = csv.DictWriter(w, delimiter = '\t', fieldnames = header)
diff --git a/tools/peptide_statistics_hpp/map_peptides.py b/tools/peptide_statistics_hpp/map_peptides.py
index 86e3308..40d298e 100755
--- a/tools/peptide_statistics_hpp/map_peptides.py
+++ b/tools/peptide_statistics_hpp/map_peptides.py
@@ -9,6 +9,7 @@
 def arguments():
     parser = argparse.ArgumentParser(description='Map Peptides')
     parser.add_argument('-f','--proteome_fasta', type = Path, help='Input FASTA Protein Database')
+    parser.add_argument('-c','--contaminants_fasta', type = Path, help='Input FASTA Protein Contaminants Database')
     parser.add_argument('-e','--exon_fasta', type = Path, help='Input FASTA Exon Mapping Database')
     parser.add_argument('-p','--peptide_list', type = Path, help='Peptide List')
     parser.add_argument('-o','--output_folder', type = Path, help='Output Folder')
@@ -28,7 +29,7 @@ def load_peptide_list(input_peptide_filename):
 def main():
     args = arguments()
 
-    proteome = mapping.read_uniprot(args.proteome_fasta)
+    proteome = mapping.merge_proteomes([mapping.read_uniprot(args.proteome_fasta),mapping.read_fasta(args.contaminants_fasta)])
     proteome_with_decoys = mapping.add_decoys(proteome)
     peptide_list = load_peptide_list(args.peptide_list)
 
diff --git a/tools/peptide_statistics_hpp/peptide_protein_cosine.py b/tools/peptide_statistics_hpp/peptide_protein_cosine.py
index 50cb0e0..762af4b 100755
--- a/tools/peptide_statistics_hpp/peptide_protein_cosine.py
+++ b/tools/peptide_statistics_hpp/peptide_protein_cosine.py
@@ -16,7 +16,8 @@
 def arguments():
     parser = argparse.ArgumentParser(description='mzTab to list of peptides')
     parser.add_argument('--comparison_pep', type = Path, help='Peptides to Compare')
-    parser.add_argument('--fasta', type = Path, help='Input FASTA')
+    parser.add_argument('--proteome_fasta', type = Path, help='Input Proteome FASTA')
+    parser.add_argument('--contaminants_fasta', type = Path, help='Input Contaminants FASTA')
     parser.add_argument('--input_psms', type = Path, help='Input PSMs')
     parser.add_argument('--input_psms_external', type = Path, help='Input PSMs (External)')
     parser.add_argument('--input_peptides', type = Path, help='Input PSMs (External)')
@@ -32,6 +33,7 @@ def arguments():
     parser.add_argument('--output_task_proteins_all', type = Path, help='Output Task Proteins (All)')
     parser.add_argument('--protein_coverage', type = Path, help='Added Protein Coverage')
     parser.add_argument('--protein_coverage_external', type = Path, help='Added Protein Coverage (External)')
+    parser.add_argument('--protein_coverage_external_compare', type = Path, help='Added Protein Coverage (External) - Comparison')
     parser.add_argument('--cosine_cutoff', type = float, help='Cosine Cutoff')
     parser.add_argument('--explained_intensity_cutoff', type = float, help='Explained Intensity Cutoff')
     parser.add_argument('--annotated_ions_cutoff', type = float, help='Annotated Ion Cutoff')
@@ -50,6 +52,9 @@ def arguments():
     parser.add_argument('--library_name', type = int, help='Library Name')
     parser.add_argument('--export_explorers', type = int, help='Export Explorer Tables (0/1)')
     parser.add_argument('--explorers_output', type = Path, help='Tables for Explorers')
+    parser.add_argument('--variant_output', type = int, help='Variant level outputs',default=0)
+    parser.add_argument('--hpp_protein_score_aggregation', type = str, help='HPP Protein Aggregation (max or sum')
+
     if len(sys.argv) < 4:
         parser.print_help()
         sys.exit(1)
@@ -67,6 +72,16 @@ def unit_delta(aa1, aa2):
 
 no_mod_il = lambda pep: ''.join([p.replace('I','L') for p in pep if p.isalpha()])
 
+def seq_theoretical_mass(sequence):
+    aa = ''.join([a for a in sequence if a.isalpha()])
+    mods = ''.join([m for m in sequence if not m.isalpha()])
+    if len(mods) > 0:
+        mods = eval(mods)
+    else:
+        mods = 0
+    return (theoretical_mass(aa) + mods + 1.007276035)
+
+
 def theoretical_mz(sequence,charge):
     aa = ''.join([a for a in sequence if a.isalpha()])
     mods = ''.join([m for m in sequence if not m.isalpha()])
@@ -76,6 +91,10 @@ def theoretical_mz(sequence,charge):
         mods = 0
     return (theoretical_mass(aa) + mods + (int(charge)*1.007276035))/int(charge)
 
+def integer_mod_mass(sequence):
+    mods = ''.join([m for m in sequence if not m.isalpha()])
+    mods = [int(round(float(m))) if i == 0 else -int(round(float(m))) for ms in mods.split('+') for i,m in enumerate(ms.split('-')) if m != '' ]
+    return sum(mods)
 
 def add_brackets(pep):
     aa_breakpoints = []
@@ -92,7 +111,7 @@ def add_brackets(pep):
         pep = pep[:breakpoint] + end_bracket + pep[breakpoint:]
     return pep
 
-protein_type = lambda protein, proteome: 'TrEMBL' if proteome.proteins[protein].db == 'tr' else ('Canonical' if proteome.proteins[protein].iso == None else 'Isoform')
+protein_type = lambda protein, proteome: 'Contaminant' if proteome.proteins[protein].db == 'con' else ('TrEMBL' if proteome.proteins[protein].db == 'tr' else ('Canonical' if proteome.proteins[protein].iso == None else 'Isoform'))
 
 def msv_to_pxd(msv, msv_mapping):
     output_mapping = msv_mapping.get(msv,{}).get('px_accession')
@@ -163,9 +182,14 @@ def find_overlap(existing_peptides, new_peptides, protein_length, protein_pe, na
 def main():
     args = arguments()
 
+    hpp_score_aggregation = lambda xs: sum(xs) if args.hpp_protein_score_aggregation == 'sum' else max(xs)
+
     representative_per_precursor = {}
+    variant_to_best_precursor = {}
+    variant_to_all_precursors = defaultdict(set)
+    variant_number = {}
 
-    proteome = mapping.add_decoys(mapping.read_uniprot(args.fasta))
+    proteome = mapping.add_decoys(mapping.merge_proteomes([mapping.read_uniprot(args.proteome_fasta),mapping.read_fasta(args.contaminants_fasta)]))
 
     all_datasets = set()
     datasets_per_sequence= defaultdict(set)
@@ -183,6 +207,13 @@ def main():
     comparison_picked_fdr = {}
     comparison_hpp_fdr = {}
 
+    precursors_per_protein_all = defaultdict(lambda: defaultdict(list))
+    precursors_per_protein_hpp = defaultdict(lambda: defaultdict(list))
+    precursors_per_protein_non_unique = defaultdict(lambda: defaultdict(list))
+    
+    all_sequences = {}
+    unique_sequences = {}
+
     frequency = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
 
     with open(args.msv_to_pxd_mapping) as json_file:
@@ -193,32 +224,6 @@ def main():
             if pxd:
                 pxd_mapping[pxd] = msv
 
-
-    with open(args.comparison_pep) as f:
-        r = csv.DictReader(f, delimiter='\t')
-        for l in r:
-            il_peptide = l['demodified'].replace('I','L')
-            has_synthetic = False
-            has_synthetic_cosine = False
-            if l['protein'] in proteome.proteins:
-                
-                if proteome.proteins[l['protein']].db == 'sp' and not proteome.proteins[l['protein']].iso:
-                    #peptides that are not uniquely matching will have both FDRs as 1
-                    comparison_picked_fdr[l['protein']] = min(float(l['all_protein_fdr']),comparison_picked_fdr.get(l['protein'],1))
-                    comparison_hpp_fdr[l['protein']] = min(float(l['hpp_protein_fdr']),comparison_hpp_fdr.get(l['protein'],1))
-
-                protein_mapping[l['protein']][il_peptide].add((int(l['aa_start']),int(l['aa_end']),None,None,None,None))
-                comparison = sequences_found[il_peptide].comparison
-                if comparison != SeqOccurances(True,True,True):
-                    if float(l.get('synthetic_cosine',-1)) >= 0:
-                        has_synthetic = True
-                        if float(l['synthetic_cosine']) > args.cosine_cutoff:
-                            has_synthetic_cosine = True
-                    comparison = SeqOccurances(True, comparison.synthetic_match or has_synthetic, comparison.synthetic_match_cosine or has_synthetic_cosine)
-                    sequences_found[il_peptide] = sequences_found[il_peptide]._replace(comparison = comparison)
-                if l['is_hpp'] == 'True':
-                    sequences_found[il_peptide] = sequences_found[il_peptide]._replace(hpp = True)
-
     peptide_to_protein = defaultdict(list)
     peptide_to_exon_map = defaultdict(list)
 
@@ -233,33 +238,54 @@ def main():
     latest_nextprot_release = sorted(list(nextprot_releases_pe.keys()))[-1]
     nextprot_pe = nextprot_releases_pe[latest_nextprot_release]
 
-    def update_precursor_representative(l,from_psm = True):
+    def update_precursor_representative(l,from_psm = True, variant_level = False, psm_fdr = 0):
         datasets = set([d for d in l.get('datasets','').split(';') if d != ''])
         tasks = set([t for t in l.get('tasks','').split(';') if t != ''])
         sequence, charge = l['sequence'],l['charge']
-        if not (sequence, charge) in representative_per_precursor:
+        precursor_theoretical_mz = theoretical_mz(sequence, charge)
+        integer_mods = integer_mod_mass(sequence)
+        aa_seq = ''.join([a for a in sequence if a.isalpha()])
+
+        if variant_level:
+            variant = (aa_seq, charge, integer_mods)
+        else:
+            variant = (sequence, charge)
+        update_peptidoform = False
+
+        score = float(l['score']) if psm_fdr <= 0.01 else 0
+
+        variant_to_all_precursors[variant].add((sequence, charge))
+
+        if not variant in variant_to_best_precursor:
+            current_variant_number = len(variant_number)
+            variant_number[variant] = current_variant_number
+            variant_to_best_precursor[variant] = (sequence, charge)
             representative_per_precursor[(sequence, charge)] = l.copy()
             precursor_representative = representative_per_precursor[(sequence, charge)]
             precursor_representative['datasets'] = datasets
             precursor_representative['tasks'] = tasks
+            precursor_representative['parent_mass'] = precursor_theoretical_mz
+            precursor_representative['filtered_psms'] = 0
+            precursor_representative['total_psms'] = 0
+            precursor_representative['variant_number'] = current_variant_number
             if from_psm:
                 precursor_representative['database_filename'] = l['filename']
                 precursor_representative['database_scan'] = l['scan']
                 precursor_representative['database_usi'] = l['usi']
                 precursor_representative['explained_intensity'] = float(l['explained_intensity'])
                 precursor_representative['cosine'] = float(l['cosine'])
-                precursor_representative['score'] = float(l['score'])
+                precursor_representative['score'] = score
+                precursor_representative['best_overall_psm_score'] = score
                 precursor_representative.pop('filename')
                 precursor_representative.pop('scan')
                 precursor_representative.pop('usi')
 
-        precursor_representative = representative_per_precursor[(sequence, charge)]
+        precursor_representative = representative_per_precursor[variant_to_best_precursor[variant]]
 
         precursor_representative['datasets'] |= datasets
         precursor_representative['tasks'] |= tasks
 
         best_cosine = float(l['cosine']) >= float(precursor_representative['cosine'])
-        best_score = float(l['score']) >= float(precursor_representative['score'])
 
         this_pass_ei = float(l['explained_intensity']) >= args.explained_intensity_cutoff
         this_pass_cos = float(l['cosine']) >= args.cosine_cutoff
@@ -268,6 +294,8 @@ def update_precursor_representative(l,from_psm = True):
         best_pass_cos = float(precursor_representative['cosine']) >= args.cosine_cutoff
         best_pass_by = int(precursor_representative['matched_ions']) >= args.annotated_ions_cutoff
 
+        best_score = float(l['explained_intensity']) >= float(precursor_representative['explained_intensity']) if score == float(precursor_representative['score']) else score > float(precursor_representative['score'])
+
         # we want to find the spectrum with the best score,
         # but sometimes the highest scoring spectrum does not pass the filters
 
@@ -285,11 +313,18 @@ def update_precursor_representative(l,from_psm = True):
             precursor_representative['database_filename'] = l['filename'] if from_psm else l['database_filename']
             precursor_representative['database_scan'] = l['scan'] if from_psm else l['database_scan']
             precursor_representative['database_usi'] = l['usi'] if from_psm else l['database_usi']
-            precursor_representative['score'] = float(l['score'])
-            #consider best EI And matched ions over all representatives
-            if float(l['explained_intensity']) > float(precursor_representative.get('explained_intensity',0.0)):
-                precursor_representative['explained_intensity'] = l['explained_intensity']
-                precursor_representative['matched_ions'] = l['matched_ions']
+            precursor_representative['score'] = score
+            update_peptidoform = True
+        
+        #best EI does not pass #breaks threshold but current one does
+        if not best_pass_by and this_pass_by:
+            precursor_representative['explained_intensity'] = l['explained_intensity']
+            precursor_representative['matched_ions'] = l['matched_ions']
+        #two situations, either both pass #threshold or both fail #threshold
+        elif best_pass_by is this_pass_by and float(l['explained_intensity']) > float(precursor_representative.get('explained_intensity',0.0)):
+            precursor_representative['explained_intensity'] = l['explained_intensity']
+            precursor_representative['matched_ions'] = l['matched_ions']
+        #otherwise best already passed #threshold, so we skip it
 
         if best_cosine and float(l['cosine']) >= 0 and l['synthetic_usi'] != 'N/A':
             precursor_representative['cosine_filename'] = l['filename'] if from_psm else l['cosine_filename']
@@ -305,6 +340,21 @@ def update_precursor_representative(l,from_psm = True):
         else:
             precursor_representative['cosine_score_match'] = 'No'
 
+        if best_score:
+            precursor_representative['best_overall_psm_score'] = float(l['score'])
+
+        if (this_pass_ei or this_pass_cos) and this_pass_by:
+            precursor_representative['filtered_psms'] += int(l.get('filtered_psms',1))
+        precursor_representative['total_psms'] += int(l.get('total_psms',1))
+
+        precursor_representative['num_peptidoforms'] = len(variant_to_all_precursors[variant])
+
+        if update_peptidoform:
+            representative_per_precursor[(sequence, charge)] = representative_per_precursor.pop(variant_to_best_precursor[variant])
+            variant_to_best_precursor[variant] = (sequence, charge)
+
+        return variant_number[variant]
+
     def update_mappings(protein_coverage_file,update_precursor_representatives):
         if(protein_coverage_file.is_file()):
             print("{}: Loading {} ({} cumulative peptides @ {} peptides/second)".format(datetime.now().strftime("%H:%M:%S"),protein_coverage_file,len(pep_mapping_info),len(pep_mapping_info)/(1+(datetime.now()-start_time).seconds)))
@@ -315,7 +365,7 @@ def update_mappings(protein_coverage_file,update_precursor_representatives):
                 protein_mapping[protein].update(peptide_mapping)
             if update_precursor_representatives:
                 for l in output_peptides:
-                    update_precursor_representative(l,False)
+                    update_precursor_representative(l,False,variant_level=args.variant_output==1)
 
 
     start_time = datetime.now()
@@ -412,13 +462,29 @@ def protein_info(peptide, peptide_to_protein, protein_mappings, sequences_found,
                 outdict['hpp_match'] = 'No - failed quality thresholds'
         return outdict, output_proteins
 
-    row_pass_filters = lambda best_psm: (float(best_psm['explained_intensity']) >= args.explained_intensity_cutoff or float(best_psm['cosine']) >= args.cosine_cutoff) and int(best_psm['matched_ions']) >= args.annotated_ions_cutoff
+    row_pass_filters_input = lambda best_psm, explained_intensity_cutoff, cosine_cutoff, annotated_ions_cutoff: (
+            float(best_psm['explained_intensity']) >= explained_intensity_cutoff or 
+            float(best_psm['cosine']) >= cosine_cutoff
+        ) and int(best_psm['matched_ions']) >= annotated_ions_cutoff
+
+    row_pass_filters = lambda best_psm: row_pass_filters_input(
+        best_psm,
+        args.explained_intensity_cutoff,
+        args.cosine_cutoff,
+        args.annotated_ions_cutoff
+    )
+    row_pass_filters_compare = lambda best_psm: row_pass_filters_input(
+        best_psm,
+        float(best_psm['explained_intensity_cutoff']),
+        float(best_psm['cosine_cutoff']),
+        int(float(best_psm['annotated_ions_cutoff']))
+    )
 
     if args.output_psms:
         all_psms_with_score = []
         all_psm_rows = []
         with open(args.output_psms,'w') as w:
-            header = ['protein','protein_full','psm_fdr','protein_type','gene','all_proteins','decoy','pe','ms_evidence','filename','scan','sequence','sequence_unmodified','sequence_unmodified_il','charge','usi','score','modifications','pass','type','parent_mass','frag_tol','synthetic_filename','synthetic_scan','synthetic_usi','cosine','synthetic_match','explained_intensity','matched_ions','hpp_match','gene_unique','canonical_matches','all_proteins_w_coords','aa_start','aa_end', 'total_unique_exons_covered', 'exons_covered_no_junction', 'exon_junctions_covered', 'all_mapped_exons','datasets','tasks']
+            header = ['protein','protein_full','psm_fdr','protein_type','gene','all_proteins','decoy','pe','ms_evidence','filename','scan','variant_number','sequence','sequence_unmodified','sequence_unmodified_il','charge','usi','score','modifications','pass','type','parent_mass','frag_tol','synthetic_filename','synthetic_scan','synthetic_usi','cosine','synthetic_match','explained_intensity','matched_ions','hpp_match','gene_unique','canonical_matches','all_proteins_w_coords','aa_start','aa_end', 'total_unique_exons_covered', 'exons_covered_no_junction', 'exon_junctions_covered', 'all_mapped_exons','datasets','tasks']
             o = csv.DictWriter(w, delimiter='\t',fieldnames = header, restval='N/A', extrasaction='ignore')
             o.writeheader()
             for input_psm in args.input_psms.glob('*'):
@@ -446,8 +512,11 @@ def protein_info(peptide, peptide_to_protein, protein_mappings, sequences_found,
                         l.update(protein_info_dict)
                         proteins = l['protein'].split(' ###')[0].split(';')
                         # PSM-level FDR was inefficient at this scale - need to rethink
-                        #if row_pass_filters(l):
-                        all_psms_with_score.append(fdr.ScoredElement(l['usi'],'XXX_' in proteins[0],l['score']))
+                        all_targets = [p for p in proteins if 'XXX_' not in p]
+                        is_decoy = len(all_targets)==0
+                        l['decoy'] = is_decoy
+                        if row_pass_filters(l):
+                            all_psms_with_score.append(fdr.ScoredElement(l['usi'],is_decoy,l['score']))
                         l.pop('mapped_proteins')
                         l.pop('hpp')
                         l.pop('len')
@@ -457,10 +526,9 @@ def protein_info(peptide, peptide_to_protein, protein_mappings, sequences_found,
 
             for l in all_psm_rows:
                 l['psm_fdr'] = psm_fdr.get(l['usi'],1)
+                l['variant_number'] = update_precursor_representative(l, psm_fdr = l['psm_fdr'], variant_level=args.variant_output==1)
                 if args.output_psms_flag == "1" or (args.output_psms_flag == "0.5" and row_pass_filters(l)):
                     o.writerow(l)
-                if l['psm_fdr'] <= 0.01:
-                    update_precursor_representative(l)
 
 
     print("About to calculate precursor FDR")
@@ -469,69 +537,65 @@ def protein_info(peptide, peptide_to_protein, protein_mappings, sequences_found,
 
     for (sequence, charge), best_psm in representative_per_precursor.items():
         all_targets = [p for ps in best_psm['protein'].split(' ###') for p in ps.split(';') if 'XXX_' not in p]
-        if best_psm['protein'] != '':
+        if best_psm['protein'] != '' and best_psm['score'] > 0:
             all_hpp_precursors.append(fdr.ScoredElement((sequence, charge),len(all_targets)==0,best_psm['score']))
     
     precursor_fdr = {}
     if len(all_hpp_precursors) > 0:
         precursor_fdr = fdr.calculate_fdr(all_hpp_precursors)
 
-    precursors_per_protein_all = defaultdict(lambda: defaultdict(list))
-    precursors_per_protein_hpp = defaultdict(lambda: defaultdict(list))
-    precursors_per_protein_non_unique = defaultdict(lambda: defaultdict(list))
-
-
-    def output_protein_level_results(best_psm):
+    def output_protein_level_results(best_psm,row_pass_filters,update_mappings = True):
 
         sequence = best_psm['sequence_unmodified']
         sequence_il = best_psm['sequence_unmodified_il']
 
         proteins = [p for p in best_psm['protein'].split(' ###')[0].split(';') if p != '']
         if row_pass_filters(best_psm):
-            if float(best_psm['precursor_fdr']) <= args.precursor_fdr and len(proteins) == 1 and 'Canonical' in best_psm.get('protein_type',''):
+            if float(best_psm['precursor_fdr']) <= args.precursor_fdr and len(proteins) == 1 and ('Canonical' in best_psm.get('protein_type','') or 'Contaminant' in best_psm.get('protein_type','')):
                 pos = (int(best_psm['aa_start']),int(best_psm['aa_end']))
                 if best_psm.get('hpp_match','') == 'Yes':
-                    precursors_per_protein_hpp[proteins[0]][pos].append((float(best_psm['score']),theoretical_mz(best_psm['sequence'],int(best_psm['charge']))))
-                precursors_per_protein_all[proteins[0]][pos].append((float(best_psm['score']),theoretical_mz(best_psm['sequence'],int(best_psm['charge']))))
+                    precursors_per_protein_hpp[proteins[0]][pos].append((float(best_psm['score']),seq_theoretical_mass(best_psm['sequence']),best_psm['charge']))
+                precursors_per_protein_all[proteins[0]][pos].append((float(best_psm['score']),seq_theoretical_mass(best_psm['sequence']),best_psm['charge']))
             for protein in proteins:
-                precursors_per_protein_non_unique[protein][sequence_il].append((float(best_psm['score']),theoretical_mz(best_psm['sequence'],int(best_psm['charge']))))
-
-        proteins = peptide_to_protein.get(sequence_il,[])
-        cannonical_proteins = [protein for protein in proteins if proteome.proteins[protein].db == 'sp' and not proteome.proteins[protein].iso]
-        output_genes = set([proteome.proteins[protein].gene if proteome.proteins[protein].gene else 'N/A' for protein in proteins])
+                precursors_per_protein_non_unique[protein][sequence_il].append((float(best_psm['score']),seq_theoretical_mass(best_psm['sequence']),best_psm['charge']))
+        
+        if update_mappings:
+            proteins = peptide_to_protein.get(sequence_il,[])
+            cannonical_proteins = [protein for protein in proteins if (proteome.proteins[protein].db == 'sp' and not proteome.proteins[protein].iso) or proteome.proteins[protein].db == 'con']
+            output_genes = set([proteome.proteins[protein].gene if proteome.proteins[protein].gene else 'N/A' for protein in proteins])
 
-        if len(cannonical_proteins) <= 1 and best_psm.get('hpp_match','') == 'Yes':
-            is_hpp = pep_mapping_info[sequence]['hpp']
-            match = False
-            has_synthetic = False
-            has_synthetic_cosine = False
-            is_isoform_unique = False
-            if len(cannonical_proteins) == 1:
-                if row_pass_filters(best_psm):
-                    if float(best_psm['cosine']) >= 0:
-                        has_synthetic = True
-                        if float(best_psm['cosine']) >= args.cosine_cutoff:
-                            has_synthetic_cosine = True
-                    match = True
+            if len(cannonical_proteins) <= 1 :
+                is_hpp = pep_mapping_info[sequence]['hpp']
+                match = False
+                has_synthetic = False
+                has_synthetic_cosine = False
+                is_isoform_unique = False
+                if len(cannonical_proteins) == 1:
+                    if row_pass_filters(best_psm):
+                        if float(best_psm['cosine']) >= 0:
+                            has_synthetic = True
+                            if float(best_psm['cosine']) >= args.cosine_cutoff:
+                                has_synthetic_cosine = True
+                        match = True
                     for dataset in best_psm['datasets']:
                         all_datasets.add(dataset)
                         datasets_per_sequence[sequence_il].add(dataset)
                     for task in best_psm['tasks']:
                         all_tasks.add(task)
                         tasks_per_sequence[sequence_il].add(task)
-            if len(proteins) == 1:
-                if row_pass_filters(best_psm):
-                    is_isoform_unique = True
-            added = sequences_found[sequence_il].added
-            sequences_found[sequence_il] = sequences_found[sequence_il]._replace(
-                hpp=is_hpp,
-                isoform_unique=is_isoform_unique,
-                added = SeqOccurances(
-                    added.match or match,
-                    added.synthetic_match or has_synthetic,
-                    added.synthetic_match_cosine or has_synthetic_cosine
+                if len(proteins) == 1:
+                    if row_pass_filters(best_psm):
+                        is_isoform_unique = True
+                added = sequences_found[sequence_il].added
+                sequences_found[sequence_il] = sequences_found[sequence_il]._replace(
+                    hpp=is_hpp,
+                    isoform_unique=is_isoform_unique,
+                    added = SeqOccurances(
+                        added.match or match,
+                        added.synthetic_match or has_synthetic,
+                        added.synthetic_match_cosine or has_synthetic_cosine
+                        )
                     )
-                )
     print("About to output sequences")
 
     all_precursors = []
@@ -547,7 +611,7 @@ def output_protein_level_results(best_psm):
             best_psm['precursor_fdr'] = min(precursor_fdr.get((sequence, charge),1),1)
             best_psm['psm_fdr'] = -1
             best_psm['synthetic_sequence'] = sequence.replace('+229.163','').replace('+229.162932','')
-            output_protein_level_results(best_psm)
+            output_protein_level_results(best_psm,row_pass_filters=row_pass_filters,update_mappings=True)
             best_psm['datasets'] = ';'.join(best_psm['datasets'])
             best_psm['tasks'] = ';'.join(best_psm['tasks'])
             all_precursors.append(best_psm)
@@ -556,7 +620,45 @@ def output_protein_level_results(best_psm):
         with open(args.input_peptides) as f:
             r = csv.DictReader(f, delimiter='\t')
             for best_psm in r:
-                output_protein_level_results(best_psm)
+                output_protein_level_results(best_psm,row_pass_filters=row_pass_filters,update_mappings=True)
+
+    for protein, seqs in precursors_per_protein_all.items():
+        unique_sequences[protein] = len(seqs)
+
+    for protein, seqs in precursors_per_protein_non_unique.items():
+        all_sequences[protein] = len(seqs)
+
+    for comparison_peptide_file in args.protein_coverage_external_compare.glob('*'):
+        with open(comparison_peptide_file) as f:
+            r = csv.DictReader(f, delimiter='\t')
+            for l in r:
+                output_protein_level_results(l, row_pass_filters=row_pass_filters_compare, update_mappings=False)
+
+    with open(args.comparison_pep) as f:
+        r = csv.DictReader(f, delimiter='\t')
+        for l in r:
+            il_peptide = l['demodified'].replace('I','L')
+            has_synthetic = False
+            has_synthetic_cosine = False
+            if l['protein'] in proteome.proteins:
+                
+                if proteome.proteins[l['protein']].db == 'sp' and not proteome.proteins[l['protein']].iso:
+                    #peptides that are not uniquely matching will have both FDRs as 1
+                    comparison_picked_fdr[l['protein']] = min(float(l['all_protein_fdr']),comparison_picked_fdr.get(l['protein'],1))
+                    comparison_hpp_fdr[l['protein']] = min(float(l['hpp_protein_fdr']),comparison_hpp_fdr.get(l['protein'],1))
+
+                protein_mapping[l['protein']][il_peptide].add((int(l['aa_start']),int(l['aa_end']),None,None,None,None))
+                comparison = sequences_found[il_peptide].comparison
+                if comparison != SeqOccurances(True,True,True):
+                    if float(l.get('synthetic_cosine',-1)) >= 0:
+                        has_synthetic = True
+                        if float(l['synthetic_cosine']) > args.cosine_cutoff:
+                            has_synthetic_cosine = True
+                    comparison = SeqOccurances(True, comparison.synthetic_match or has_synthetic, comparison.synthetic_match_cosine or has_synthetic_cosine)
+                    sequences_found[il_peptide] = sequences_found[il_peptide]._replace(comparison = comparison)
+                if l['is_hpp'] == 'True':
+                    sequences_found[il_peptide] = sequences_found[il_peptide]._replace(hpp = True)
+
 
     hpp_protein_w_scores = []
     hpp_score_dict = {}
@@ -572,19 +674,19 @@ def output_protein_level_results(best_psm):
 
     seen_picked = set()
 
-    def greedy_sequence_precursor_score(precursor_list, mz_distance = 2.5):
+    def greedy_sequence_precursor_score(precursor_list, distance = 3, score_aggregation_func = lambda xs: sum(xs)):
         used_precursors = []
         for precursor in sorted(precursor_list, key = lambda x: x[0], reverse = True):
             found = False
             for seen_precursor in used_precursors:
-                if abs(precursor[1]-seen_precursor[1]) <= mz_distance:
+                if precursor[2]==seen_precursor[2] and abs(precursor[1]-seen_precursor[1]) <= distance:
                     found = True
             if not found:
                 used_precursors.append(precursor)
-        return sum([p[0] for p in used_precursors])
+        return score_aggregation_func([p[0] for p in used_precursors])
 
     for protein, precursors in precursors_per_protein_hpp.items():
-        score, count = mapping.non_nested_score([(*k,greedy_sequence_precursor_score(v)) for k,v in precursors.items()])
+        score, count = mapping.non_nested_score([(*k,greedy_sequence_precursor_score(v,score_aggregation_func=hpp_score_aggregation)) for k,v in precursors.items()])
         if score != 0:
             hpp_protein_w_scores.append(fdr.ScoredElement(protein,'XXX_' in protein, score))
             hpp_score_dict[protein] = score
@@ -629,14 +731,15 @@ def greedy_sequence_precursor_score(precursor_list, mz_distance = 2.5):
 
     if args.output_peptides:
         with open(args.output_peptides,'w') as w:
-            header = ['picked_protein_fdr','hpp_protein_fdr','precursor_fdr','psm_fdr','protein_full','protein','protein_type','gene','decoy','all_proteins','pe','ms_evidence','aa_total','database_filename','database_scan','database_usi','sequence','sequence_unmodified','sequence_unmodified_il','charge','score','modifications','pass','type','parent_mass','cosine_filename','cosine_scan','cosine_usi','synthetic_filename','synthetic_scan','synthetic_usi','synthetic_sequence','cosine','synthetic_match','cosine_score_match','explained_intensity','matched_ions','hpp_match','gene_unique','canonical_matches','all_proteins_w_coords','aa_start','aa_end','frag_tol', 'total_unique_exons_covered', 'exons_covered_no_junction', 'exon_junctions_covered', 'all_mapped_exons','datasets','tasks']
+            header = ['picked_protein_fdr','hpp_protein_fdr','precursor_fdr','psm_fdr','protein_full','protein','protein_type','gene','decoy','all_proteins','pe','ms_evidence','aa_total','database_filename','database_scan','database_usi','sequence','sequence_unmodified','sequence_unmodified_il','charge','score','modifications','pass','type','parent_mass','cosine_filename','cosine_scan','cosine_usi','synthetic_filename','synthetic_scan','synthetic_usi','synthetic_sequence','cosine','synthetic_match','cosine_score_match','explained_intensity','matched_ions','hpp_match','gene_unique','canonical_matches','all_proteins_w_coords','aa_start','aa_end','frag_tol', 'total_unique_exons_covered', 'exons_covered_no_junction','exon_junctions_covered', 'all_mapped_exons','datasets','tasks']
             header += ['precursor_fdr_cutoff', 'picked_protein_fdr_cutoff', 'hpp_protein_fdr_cutoff', 'cosine_cutoff', 'explained_intensity_cutoff', 'annotated_ions_cutoff']
+            header += ['num_peptidoforms','filtered_psms','total_psms','variant_number','best_overall_psm_score']
             r = csv.DictWriter(w, delimiter = '\t', fieldnames = header, restval='N/A', extrasaction='ignore')
             r.writeheader()
             for precursor in all_precursors:
                 proteins = [p for p in precursor['protein'].split(' ###')[0].split(';') if p != '']
-                if float(precursor['precursor_fdr']) < 1 and len(proteins) == 1 and 'Canonical' in precursor.get('protein_type',''):
-                    precursor['picked_protein_fdr'] = min(picked_fdr_dict.get(proteins[0],1),1)
+                if float(precursor['precursor_fdr']) < 1 and len(proteins) == 1 and ('Canonical' in precursor.get('protein_type','') or 'Contaminant' in precursor.get('protein_type','')):
+                    precursor['picked_protein_fdr'] = min(leftover_fdr_dict.get(proteins[0],1),1)
                     precursor['hpp_protein_fdr'] = min(hpp_fdr_dict.get(proteins[0],1),1)
                 else:
                     precursor['picked_protein_fdr'] = 1
@@ -765,12 +868,12 @@ def greedy_sequence_precursor_score(precursor_list, mz_distance = 2.5):
                 'common_fdr':min(1,common_fdr_dict.get(protein,1)),
                 'leftover_score':leftover_score_dict.get(protein,0),
                 'leftover_fdr':min(1,leftover_fdr_dict.get(protein,1)),
-                'num_sequences':len(precursors_per_protein_all.get(protein,[])),
-                'num_sequences_incl_shared':len(precursors_per_protein_non_unique.get(protein,[])),
+                'num_sequences':unique_sequences.get(protein,0),
+                'num_sequences_incl_shared':all_sequences.get(protein,0),
             }
 
             pass_comparison_picked_fdr,pass_comparison_hpp_fdr = comparison_picked_fdr.get(protein,1) <= args.picked_protein_fdr_comparison, comparison_hpp_fdr.get(protein,1) <= args.hpp_protein_fdr_comparison 
-            pass_picked_fdr, pass_hpp_fdr = protein_dict['picked_fdr'] <= args.picked_protein_fdr, protein_dict['hpp_fdr'] <= args.hpp_protein_fdr 
+            pass_picked_fdr, pass_hpp_fdr = protein_dict['leftover_fdr'] <= args.picked_protein_fdr, protein_dict['hpp_fdr'] <= args.hpp_protein_fdr 
             if args.main_fdr == 'traditional':
                 pass_picked_fdr, pass_hpp_fdr = protein_dict['common_fdr'] <= args.picked_protein_fdr, protein_dict['common_fdr'] <= args.hpp_protein_fdr 
 
diff --git a/tools/peptide_statistics_hpp/read_mappings.py b/tools/peptide_statistics_hpp/read_mappings.py
index 8fb0a9f..a64d7fb 100644
--- a/tools/peptide_statistics_hpp/read_mappings.py
+++ b/tools/peptide_statistics_hpp/read_mappings.py
@@ -36,7 +36,7 @@ def read_protein_coverage(protein_coverage_file,seen_sequences,proteome, filter
                 mapped_protein_str = l['mapped_proteins'] if 'mapped_proteins' in l else l['all_proteins_w_coords']
                 mapped_exon_str = l['mapped_exons'] if 'mapped_exons' in l else l['all_mapped_exons']
 
-                all_protein_fdr = float(l.get('picked_protein_fdr',-1))
+                all_protein_fdr = float(l.get('leftover_protein_fdr',-1))
                 hpp_protein_fdr = float(l.get('hpp_protein_fdr',-1))
                 cosine = float(l.get('cosine',-1))