Mercurial > repos > urgi-team > gandalfworkflow
diff VCFStorage_wrapper.xml @ 7:a6b557df86db draft
Uploaded
author | urgi-team |
---|---|
date | Tue, 15 Dec 2015 05:35:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFStorage_wrapper.xml Tue Dec 15 05:35:36 2015 -0500 @@ -0,0 +1,236 @@ +<tool id="VCFStorage" name="VCFStorage" version="0.01"> + <description> stores info from variant calling into a table. It will create a tabulate filed with SNP infos</description> + <requirements> + <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement> + </requirements> + <version_command> + VCFStorage.py --version + </version_command> + <command interpreter="python"> + VCFStorage_wrapper.py -f $inputFasta -o $outputVCFStorage + ## genome list + #for $VCF in $VCFFile + -l $VCF.strainName + -L $VCF.inputStrainVCF + #end for + </command> + <inputs> + <param name="inputFasta" type="data" format="fasta" label="Input genome sequence file name (fasta)"/> + <repeat name="VCFFile" title="VCF list" min="1"> + <param name="strainName" size="20" type="text" value="V1" label="strain name (no space allowed)"/> + <param name="inputStrainVCF" type="data" format="vcf" label="Select VCF file "/> + </repeat> + </inputs> + <outputs> + <data format="tabular" name="outputVCFStorage" label="${tool.name} on ${on_string} (tabular)"/> + </outputs> + <tests> + <test> + <param name="inputFasta" ftype="fasta" value="chr17.fa" /> + <param name="strainName" value="V1"/> + <param name="inputStrainVCF" ftype="vcf" value="chr17.VCF"/> + <output name="outputVCFStorage" ftype="tabular" file="Expchr17.tab"/> + </test> + </tests> + <help><![CDATA[ + **stores info from variant calling into a table. It will create a tabulate filed with SNP infos** + +----- + +**what it does :** + +VCFStorage.py is a python script that allows to store data from multiple VCF into a single tabular marker file. each VCF will be a new column on the final output. + +----- + +**input format :** + +Multiple files are necessary as input : + + - the fasta file of your genomic sequence + - multiple VCF files (1 per strain). It is strongly advised to use the column filter (col 7) for filtered positions instead of removing the lines from the VCF. + +----- + +**ouput format :** + +the result is a tab delimited format file where all genomic positions are in rows, and all strains are in columns (in the order you gave the VCF) + +For each position and each genome, a code is attributed : + +- for the reference : :: + + A,T,G,C for the corresponding nucleotidic acid + +- for the genomes : :: + + U if the position was not refered in the VCF file + R if the base is similar to the reference + F if the base has been filtered in the column FILTER (column 7) of the VCF + A,T,G,C if the genome has a validated SNP at the position + + +----- + +**example :** + +fasta input file (genomic sequence): :: + + >chr_17 + ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa + TACGCGCGCGCCTAACCCTACGACTTTAACCTACTCTAAACTCTCCTACTAGTACGTCTT + +VCF input file : :: + + ##fileformat=VCFv4.1 + ##fileDate=20140725 + ##source=freeBayes v0.9.13-2-ga830efd + ##reference=exmple.fsa + ##phasing=none + ##DetectedFormat=freebayes + ##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)"> + ##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)"> + ##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(15 - 35)"> + ##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected"> + ##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N"> + #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT V1 + chr_17 17 . A G 529.213 G_AF;G_DP AB=0.583333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=21;CIGAR=1X;DP=36;DPB=36;DPRA=0;EPP=3.1137;EPPR=3.15506;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=77.012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=751;QR=535;RO=15;RPP=5.59539;RPPR=4.31318;RUN=1;SAF=11;SAP=3.1137;SAR=10;SRF=5;SRP=6.62942;SRR=10;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.58;G_DP=36;G_Base=G GT:DP:RO:QR:AO:QA:GL 0/1:36:15:535:21:751:-10,0,-10 + chr_17 37 . C G 1082.38 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=34;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=3.26577;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=48.0391;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1243;QR=0;RO=0;RPP=15.5282;RPPR=0;RUN=1;SAF=18;SAP=3.26577;SAR=16;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=34;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:34:0:0:34:1243:-10,-9.23017,0 + chr_17 40 . T T 825.518 G_AF AB=0;ABP=0;AC=2;AF=1;AN=2;AO=29;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=6.67934;EPPR=13.8677;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=8.92992;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1082;QR=178;RO=5;RPP=9.07545;RPPR=13.8677;RUN=1;SAF=13;SAP=3.68421;SAR=16;SRF=5;SRP=13.8677;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.85;G_DP=34;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:34:5:178:29:1082:-10,0,-6.82575 + chr_17 60 . A . 699.741 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=22;CIGAR=1X;DP=22;DPB=22;DPRA=0;EPP=17.2236;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=32.2544;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=823;QR=0;RO=0;RPP=9.32731;RPPR=0;RUN=1;SAF=12;SAP=3.40511;SAR=10;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=22;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:22:0:0:22:823:-10,-5.98732,0 + chr_17 73 . T . 846.299 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=27;CIGAR=1X;DP=27;DPB=27;DPRA=0;EPP=16.6021;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=38.84;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1002;QR=0;RO=0;RPP=5.02092;RPPR=0;RUN=1;SAF=21;SAP=21.1059;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=27;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:27:0:0:27:1002:-10,-7.34226,0 + chr_17 81 . C T 764.464 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=25;CIGAR=1X;DP=25;DPB=25;DPRA=0;EPP=13.5202;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=36.1324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=902;QR=0;RO=0;RPP=3.79203;RPPR=0;RUN=1;SAF=19;SAP=17.6895;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=25;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:25:0:0:25:902:-10,-6.76842,0 + chr_17 105 . C T 1154 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=37;CIGAR=1X;DP=37;DPB=37;DPRA=0;EPP=5.88603;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=52.0047;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1336;QR=0;RO=0;RPP=19.9713;RPPR=0;RUN=1;SAF=23;SAP=7.76406;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=37;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:37:0:0:37:1336:-10,-10,0 + chr_17 112 . G A 1276.25 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=40;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=10.8276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=55.9501;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1471;QR=0;RO=0;RPP=10.8276;RPPR=0;RUN=1;SAF=26;SAP=10.8276;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=40;G_Base=A GT:DP:RO:QR:AO:QA:GL 1/1:40:0:0:40:1471:-10,-10,0 + +expected result : :: + + CHROM POS reference V1 + chr_17 1 C U + chr_17 2 C U + chr_17 3 C U + chr_17 4 T U + chr_17 5 A U + chr_17 6 A U + chr_17 7 C U + chr_17 8 C U + chr_17 9 C U + chr_17 10 T U + chr_17 11 A U + chr_17 12 A U + chr_17 13 C U + chr_17 14 C U + chr_17 15 C U + chr_17 16 T U + chr_17 17 A F + chr_17 18 A U + chr_17 19 C U + chr_17 20 C U + chr_17 21 C U + chr_17 22 T U + chr_17 23 A U + chr_17 24 A U + chr_17 25 C U + chr_17 26 C U + chr_17 27 C U + chr_17 28 T U + chr_17 29 A U + chr_17 30 A U + chr_17 31 C U + chr_17 32 C U + chr_17 33 C U + chr_17 34 T U + chr_17 35 A U + chr_17 36 A U + chr_17 37 C G + chr_17 38 C U + chr_17 39 C U + chr_17 40 T F + chr_17 41 A U + chr_17 42 A U + chr_17 43 C U + chr_17 44 C U + chr_17 45 C U + chr_17 46 T U + chr_17 47 A U + chr_17 48 A U + chr_17 49 C U + chr_17 50 C U + chr_17 51 C U + chr_17 52 T U + chr_17 53 A U + chr_17 54 A U + chr_17 55 C U + chr_17 56 C U + chr_17 57 C U + chr_17 58 T U + chr_17 59 A U + chr_17 60 A R + chr_17 61 T U + chr_17 62 A U + chr_17 63 C U + chr_17 64 G U + chr_17 65 C U + chr_17 66 G U + chr_17 67 C U + chr_17 68 G U + chr_17 69 C U + chr_17 70 G U + chr_17 71 C U + chr_17 72 C U + chr_17 73 T R + chr_17 74 A U + chr_17 75 A U + chr_17 76 C U + chr_17 77 C U + chr_17 78 C U + chr_17 79 T U + chr_17 80 A U + chr_17 81 C T + chr_17 82 G U + chr_17 83 A U + chr_17 84 C U + chr_17 85 T U + chr_17 86 T U + chr_17 87 T U + chr_17 88 A U + chr_17 89 A U + chr_17 90 C U + chr_17 91 C U + chr_17 92 T U + chr_17 93 A U + chr_17 94 C U + chr_17 95 T U + chr_17 96 C U + chr_17 97 T U + chr_17 98 A U + chr_17 99 A U + chr_17 100 A U + chr_17 101 C U + chr_17 102 T U + chr_17 103 C U + chr_17 104 T U + chr_17 105 C F + chr_17 106 C U + chr_17 107 T U + chr_17 108 A U + chr_17 109 C U + chr_17 110 T U + chr_17 111 A U + chr_17 112 G F + chr_17 113 T U + chr_17 114 A U + chr_17 115 C U + chr_17 116 G U + chr_17 117 T U + chr_17 118 C U + chr_17 119 T U + chr_17 120 T U + +----- + +**reference :** + +]]> + </help> +</tool>