Skip to content
Pierre Lindenbaum edited this page Jan 13, 2016 · 13 revisions

##Motivation

Compare two VCFs and print common/exclusive information for each sample/genotype

##Compilation

See also Compilation.

$  make vcfcomparecallers

##Synopsis

$ java -jar dist/vcfcomparecallers.jar file1.vcf(.gz) stdin 
$ java -jar dist/vcfcomparecallers.jar file1.vcf(.gz) file2.vcf(.gz) 

both vcf must be sorted on CHROM/POS/REF

##Options

Option Description
-o (filename) output file. default:stdout
-e (filename.xml) Save some examples of variant/genotype in each category. Optional
-n (int) Number of example to save with option '-e '
-c consider HomRef as NoCall : when merging multiple vcf with GATK. there is no homref because everything is no call. Problem when comparing with multiple called VCF
-h get help (this screen) and exit.
-v print version and exit.
-L (level) log level. One of java.util.logging.Level . Optional.

##Source Code

Main code is: https://github.com/lindenb/jvarkit/blob/master/src/main/java/com/github/lindenb/jvarkit/tools/vcfcmp/VcfCompareCallers.java

##Example

$ java -jar dist-1.128/vcfcomparecallers.jar  Proj1.samtools.vcf.gz  Proj1.varscan.vcf.gz
#Sample	unique_to_file_1	unique_to_file_1_snp	unique_to_file_1_indel	unique_to_file_2	unique_to_file_2_snp	unique_to_file_2_indel	both_missing	common_context	common_context_snp	common_context_indel	common_context_discordant_id	called_and_same	called_and_same_hom_ref	called_and_same_hom_var	called_and_same_het	called_but_discordant	called_but_discordant_hom1_het2	called_but_discordant_het1_hom2	called_but_discordant_hom1_hom2	called_but_discordant_het1_het2	called_but_discordant_others
B00G5XG	43739	15531	27518	0	10773	11730	2182	558753	535010	22508	55052	1043356	0	26920	41136	3047	698	1993	152	204	0
B00G74M	43629	15445	27503	0	10739	11747	2346	558716	534939	22526	55092	1043355	0	27962	40295	2910	742	1823	164	181	0
B00G5XF	43542	15344	27515	0	10742	11691	2185	559017	535236	22533	55089	1044311	0	26842	40961	2960	809	1821	157	173	0
B00G74L	43705	15461	27543	0	10765	11745	2356	558606	534872	22509	55053	1041955	0	26849	42430	2989	725	1904	175	185	0
B00G5XE	43589	15393	27515	0	10764	11708	2425	558691	534970	22481	55052	1042648	0	27088	41698	2974	746	1906	152	170	0
$ java -jar dist-1.128/vcfcomparecallers.jar  Proj1.samtools.vcf.gz  Proj1.varscan.vcf.gz | verticalize

>>> 2
$1	#Sample	B00G5XG
$2	unique_to_file_1	43739
$3	unique_to_file_1_snp	15531
$4	unique_to_file_1_indel	27518
$5	unique_to_file_2	0
$6	unique_to_file_2_snp	10773
$7	unique_to_file_2_indel	11730
$8	both_missing	2182
$9	common_context	558753
$10	common_context_snp	535010
$11	common_context_indel	22508
$12	common_context_discordant_id	55052
$13	called_and_same	1043356
$14	called_and_same_hom_ref	0
$15	called_and_same_hom_var	26920
$16	called_and_same_het	41136
$17	called_but_discordant	3047
$18	called_but_discordant_hom1_het2	698
$19	called_but_discordant_het1_hom2	1993
$20	called_but_discordant_hom1_hom2	152
$21	called_but_discordant_het1_het2	204
$22	called_but_discordant_others	0
<<< 2

>>> 3
$1	#Sample	B00G74M
$2	unique_to_file_1	43629
$3	unique_to_file_1_snp	15445
$4	unique_to_file_1_indel	27503
$5	unique_to_file_2	0
$6	unique_to_file_2_snp	10739
$7	unique_to_file_2_indel	11747
$8	both_missing	2346
$9	common_context	558716
$10	common_context_snp	534939
$11	common_context_indel	22526
$12	common_context_discordant_id	55092
$13	called_and_same	1043355
$14	called_and_same_hom_ref	0
$15	called_and_same_hom_var	27962
$16	called_and_same_het	40295
$17	called_but_discordant	2910
$18	called_but_discordant_hom1_het2	742
$19	called_but_discordant_het1_hom2	1823
$20	called_but_discordant_hom1_hom2	164
$21	called_but_discordant_het1_het2	181
$22	called_but_discordant_others	0
<<< 3

>>> 4
$1	#Sample	B00G5XF
$2	unique_to_file_1	43542
$3	unique_to_file_1_snp	15344
$4	unique_to_file_1_indel	27515
$5	unique_to_file_2	0
$6	unique_to_file_2_snp	10742
$7	unique_to_file_2_indel	11691
$8	both_missing	2185
$9	common_context	559017
$10	common_context_snp	535236
$11	common_context_indel	22533
$12	common_context_discordant_id	55089
$13	called_and_same	1044311
$14	called_and_same_hom_ref	0
$15	called_and_same_hom_var	26842
$16	called_and_same_het	40961
$17	called_but_discordant	2960
$18	called_but_discordant_hom1_het2	809
$19	called_but_discordant_het1_hom2	1821
$20	called_but_discordant_hom1_hom2	157
$21	called_but_discordant_het1_het2	173
$22	called_but_discordant_others	0
<<< 4


<<< 4

Examples

the following XSLT stylesheet can be used to produce a HTML table for a few differences:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml" version="1.0">
  <xsl:output method="xml"/>
  <xsl:template match="/">
  	<table>
  	<thead>
  		<tr><th>Category</th><th>Sample</th><th>Variant 1</th><th>Genotype 1</th><th>Variant 2</th><th>Genotype 2</th></tr>
  	</thead>
  	<tbody>
    <xsl:apply-templates select="compare-callers/diff">
    	 <xsl:sort select="@sample" />
    	 <xsl:sort select="@type" /> 
    </xsl:apply-templates>
    </tbody>
    </table>
  </xsl:template>
  
  <xsl:template match="diff">
  	<tr>
  		<td><xsl:value-of select="@type"/></td>
  		<td><xsl:value-of select="@sample"/></td>
  		<td><xsl:apply-templates select="variant[@file='1']"/></td>
  		<td><xsl:apply-templates select="variant[@file='1']/genotype"/></td>
  		<td><xsl:apply-templates select="variant[@file='2']"/></td>
  		<td><xsl:apply-templates select="variant[@file='2']/genotype"/></td>
  	</tr>
  	<xsl:text>
</xsl:text>
  </xsl:template>
 
   <xsl:template match="variant">
    <xsl:value-of select="concat('(',@type,') ',chrom,':',pos,' ',id,' ',ref,'/',alts)"/>
   </xsl:template>
 
  <xsl:template match="genotype">
  	<xsl:value-of select="concat('(',@type,')')"/>
  	<xsl:for-each select="allele">
  	<xsl:if test="position()>1">/</xsl:if>
  	<xsl:value-of select="."/>
  	</xsl:for-each>
  	<xsl:if test="dp"> DP:<xsl:value-of select="dp"/></xsl:if>
   </xsl:template>
 

 
</xsl:stylesheet>

This other stylesheet can be used to print the context of the variant for a given BAM (pipe the output to bash )

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml" version="1.0">

  <xsl:output method="text"/>
  <xsl:template match="/">

    <xsl:apply-templates select="compare-callers/diff">
    	 <xsl:sort select="@sample" />
    	 <xsl:sort select="@type" /> 
    </xsl:apply-templates>
  </xsl:template>
  
  <xsl:template match="diff">

  	
  	
  	<xsl:variable name="type" select="@type"/>  	
  	<xsl:variable name="sample" select="@sample"/>  	

  	
  	echo "Sample **<xsl:value-of select="@sample"/>** and category **<xsl:value-of select="@type"/>** at **<xsl:choose>
  		<xsl:when test="variant[@file='1']">
  			<xsl:apply-templates select="variant[@file='1']"/>
  		</xsl:when>
  		<xsl:when test="variant[@file='2']">
  			<xsl:apply-templates select="variant[@file='2']"/>
  		</xsl:when>
  	</xsl:choose>**"
  	
  	echo '```'
  	
  	COLUMNS=30 samtools tview -d T -p "<xsl:choose>
  		<xsl:when test="variant[@file='1']">
  			<xsl:apply-templates select="variant[@file='1']" mode="upstream"/>
  		</xsl:when>
  		<xsl:when test="variant[@file='2']">
  			<xsl:apply-templates select="variant[@file='2']" mode="upstream"/>
  		</xsl:when>
  	</xsl:choose>" path/to/file.bam ref.fasta
  	echo '```'
  	

  </xsl:template>
 
   <xsl:template match="variant">
    <xsl:value-of select="concat(chrom,':',pos)"/>
   </xsl:template>
 
    <xsl:template match="variant" mode="upstream">
    <xsl:value-of select="concat(chrom,':',number(pos)-10)"/>
   </xsl:template>
 
</xsl:stylesheet>

Contribute

##See also

##History

  • 2015 : Creation

License

The project is licensed under the MIT license.

Clone this wiki locally