Mercurial > repos > miller-lab > genome_diversity
comparison gd_snp2vcf.xml @ 31:a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:25:27 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
30:4188853b940b | 31:a631c2f6d913 |
---|---|
1 <tool id="gd_gd_snp2vcf" name="gd_snp to VCF" version="1.1.0" force_history_refresh="True"> | |
2 <description>: Convert from gd_snp or gd_genotype to VCF format, for submission to dbSNP</description> | |
3 | |
4 <command interpreter="perl"> | |
5 gd_snp2vcf.pl "$input" -handle=$hand -batch=$batch -ref=$ref -metaOut=$output2 | |
6 #if $individuals.choice == '0': | |
7 #set $geno = '' | |
8 #for $individual_col in $input.dataset.metadata.individual_columns | |
9 ##need to check to number of cols per individual | |
10 #if $input.ext == "gd_snp": | |
11 #set $t = $individual_col + 2 | |
12 #else if $input.ext == "gd_genotype": | |
13 #set $t = $individual_col | |
14 #else: | |
15 #set $t = $individual_col | |
16 #end if | |
17 #set $geno += "%d," % ($t) | |
18 #end for | |
19 #if $individuals.pall_id != '': | |
20 -population=$individuals.pall_id | |
21 #end if | |
22 #else if $individuals.choice == '1': | |
23 #set $geno = '' | |
24 #set $pop = '' | |
25 #if $input.ext == "gd_snp": | |
26 -off=2 | |
27 #else if $input.ext == "gd_genotype": | |
28 -off=0 | |
29 #else: | |
30 -off=2 | |
31 #end if | |
32 #for $population in $individuals.populations | |
33 #set $geno += "%s," % ($population.p1_input) | |
34 #set $pop += "%s," % ($population.p1_id) | |
35 #end for | |
36 -population=$pop | |
37 #else if $individuals.choice == '2': | |
38 #set $geno = $individuals.geno | |
39 #end if | |
40 -geno=$geno | |
41 #if $bioproj.value != '': | |
42 -bioproj=$bioproj | |
43 #end if | |
44 #if $biosamp.value != '': | |
45 -biosamp=$biosamp | |
46 #end if | |
47 > $output | |
48 </command> | |
49 | |
50 <inputs> | |
51 <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP dataset" /> | |
52 <conditional name="individuals"> | |
53 <param name="choice" type="select" label="Generate dataset for"> | |
54 <option value="0" selected="true">All individuals</option> | |
55 <option value="1">Individuals in populations</option> | |
56 <option value="2">A single individual</option> | |
57 </param> | |
58 <when value="0"> | |
59 <param name="pall_id" type="text" size="20" label="ID for this population" help="Leaving this blank will omit allele counts from the output" /> | |
60 </when> | |
61 <when value="1"> | |
62 <repeat name="populations" title="Population" min="1"> | |
63 <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" /> | |
64 <param name="p1_id" type="text" size="20" label="ID for this population" help="Leaving this blank will omit allele counts from the output" /> | |
65 </repeat> | |
66 </when> | |
67 <when value="2"> | |
68 <param name="geno" type="data_column" data_ref="input" label="Column containing genotype" value="8" /> | |
69 </when> | |
70 </conditional> | |
71 <param name="hand" type="text" size="20" label="dbSNP handle" help="If you do not have a handle, request one at http://www.ncbi.nlm.nih.gov/projects/SNP/handle.html" /> | |
72 <param name="batch" type="text" size="20" label="Batch ID" help="ID used to tie dbSNP metadata to the VCF submission" /> | |
73 <param name="ref" type="text" size="20" label="Reference sequence ID" help="The RefSeq assembly accession.version on which the SNP positions are based (see http://www.ncbi.nlm.nih.gov/assembly/)" /> | |
74 <param name="bioproj" type="text" size="20" label="Optional: Registered BioProject ID" /> | |
75 <param name="biosamp" type="text" size="20" label="Optional: Comma-separated list of registered BioSample IDs" /> | |
76 </inputs> | |
77 | |
78 <outputs> | |
79 <data name="output" format="vcf" /> | |
80 <data name="output2" format="text" /> | |
81 </outputs> | |
82 | |
83 <tests> | |
84 <test> | |
85 <param name="input" value="sample.gd_snp" ftype="gd_snp" /> | |
86 <param name="choice" value="2" /> | |
87 <param name="geno" value="11" /> | |
88 <param name="hand" value="MyHandle" /> | |
89 <param name="batch" value="Test1" /> | |
90 <param name="ref" value="pb_000001.1" /> | |
91 <output name="output" file="snpsForSubmission.vcf" ftype="vcf" compare="diff" /> | |
92 <output name="output2" file="snpsForSubmission.text" ftype="text" compare="diff" /> | |
93 </test> | |
94 </tests> | |
95 | |
96 <help> | |
97 | |
98 **Dataset formats** | |
99 | |
100 The input dataset is in gd_snp_ or gd_genotype_ format. | |
101 The output consists of two datasets needed for submitting SNPs: | |
102 a VCF_ file in the specific format required by dbSNP, and a partially | |
103 completed text_ file for the associated dbSNP metadata. | |
104 (`Dataset missing?`_) | |
105 | |
106 .. _gd_snp: ./static/formatHelp.html#gd_snp | |
107 .. _gd_genotype: ./static/formatHelp.html#gd_genotype | |
108 .. _VCF: ./static/formatHelp.html#vcf | |
109 .. _text: ./static/formatHelp.html#text | |
110 .. _Dataset missing?: ./static/formatHelp.html | |
111 | |
112 ----- | |
113 | |
114 **What it does** | |
115 | |
116 This tool converts a dataset in gd_snp or gd_genotype format to a VCF file formatted | |
117 for submission to the dbSNP database at NCBI. It also creates a partially | |
118 filled-in template to assist you in preparing the required "metadata" file | |
119 describing the SNP submission. | |
120 | |
121 ----- | |
122 | |
123 **Example** | |
124 | |
125 - input:: | |
126 | |
127 #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist", | |
128 #"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} | |
129 Contig161 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 | |
130 Contig48 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 | |
131 Contig20 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 | |
132 etc. | |
133 | |
134 - VCF output (for all individuals, and giving a population ID):: | |
135 | |
136 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT PB | |
137 Contig161 115 Contig161;115 C T 73.5 . VRT=6 NA:AC 8:0 | |
138 Contig48 11 Contig48;11 A G 94.3 . VRT=6 NA:AC 8:0 | |
139 Contig 66 Contig20;66 C T 54.0 . VRT=6 NA:AC 8:0 | |
140 etc. | |
141 | |
142 Note: This excerpt from the output does not show all of the headers. Also, | |
143 if the population ID had not been given, then the last two columns would not | |
144 appear in the output. | |
145 | |
146 ----- | |
147 | |
148 **Reference** | |
149 | |
150 Sherry ST, Ward MH, Kholodov M, Baker J, Phan L, Smigielski EM, Sirotkin K. | |
151 dbSNP: the NCBI database of genetic variation. Nucleic Acids Res. 2001 | |
152 Jan 1;29(1):308-11. | |
153 | |
154 </help> | |
155 </tool> |