Mercurial > repos > miller-lab > genome_diversity
comparison make_phylip.xml @ 31:a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:25:27 -0400 |
parents | |
children | ea52b23f1141 |
comparison
equal
deleted
inserted
replaced
30:4188853b940b | 31:a631c2f6d913 |
---|---|
1 <tool id="gd_make_phylip" name="Phylip" version="1.0.0" force_history_refresh="True"> | |
2 <description>: prepare data for phylogenetic analysis</description> | |
3 | |
4 <command interpreter="python"> | |
5 #set $zero_based = 1 | |
6 #set $gen_chrClmn = int($input.metadata.scaffold) - $zero_based | |
7 #set $gen_posClmn = int($input.metadata.pos) - $zero_based | |
8 #set $gen_refClmn = int($input.metadata.pos) - $zero_based + 1 | |
9 #set $gen_altrClmn = int($input.metadata.pos) - $zero_based + 2 | |
10 make_phylip.py '--altrClmn=$gen_altrClmn' '--chrClmn=$gen_chrClmn' '--gd_indivs=$indivs_input' '--input=$input' '--output=$output1' '--output_id=$output1.id' '--output_dir=$__new_file_path__' '--posClmn=$gen_posClmn' '--refClmn=$gen_refClmn' | |
11 #if $input_type.choice == '0' | |
12 #set $cov_chrClmn = int($input_type.coverage_input.metadata.scaffold) - $zero_based | |
13 #set $cov_posClmn = int($input_type.coverage_input.metadata.pos) - $zero_based | |
14 #set $cov_refClmn = int($input_type.coverage_input.metadata.pos) - $zero_based + 1 | |
15 #set $cov_altrClmn = int($input_type.coverage_input.metadata.pos) - $zero_based + 2 | |
16 '--altrClmnCvrg=$cov_altrClmn' '--chrClmnCvrg=$cov_chrClmn' '--cvrgTreshold=$input_type.coverage_threshold' '--gd_indivs_cover=$indivs_input' '--indvlsPrctTrshld=$input_type.indivs_threshold' '--inputCover=$input_type.coverage_input' '--posClmnCvrg=$cov_posClmn' '--refClmnCvrg=$cov_refClmn' | |
17 #else if $input_type.choice == '1' | |
18 #set $fchrClmn = int($input_type.annotation_input.metadata.chromCol) - $zero_based | |
19 #set $strandClmn = int($input_type.annotation_input.metadata.strandCol) - $zero_based | |
20 #set $geneNameClmn = int($input_type.annotation_input.metadata.nameCol) - $zero_based | |
21 #set $txStartClmn = int(str($input_type.tx_start_col)) - $zero_based | |
22 #set $txEndClmn = int(str($input_type.tx_end_col)) - $zero_based | |
23 #set $cdsStartClmn = int(str($input_type.cds_start_col)) - $zero_based | |
24 #set $cdsEndClmn = int(str($input_type.cds_end_col)) - $zero_based | |
25 #set $startExsClmn = int(str($input_type.exs_start_col)) - $zero_based | |
26 #set $endExsClmn = int(str($input_type.exs_end_col)) - $zero_based | |
27 '--cdsEndClmn=$cdsEndClmn' '--cdsStartClmn=$cdsStartClmn' '--endExsClmn=$endExsClmn' '--fchrClmn=$fchrClmn' '--geneNameClmn=$geneNameClmn' '--gene_info=$input_type.annotation_input' '--sequence=$input_type.fasta_input' '--startExsClmn=$startExsClmn' '--strandClmn=$strandClmn' '--txEndClmn=$txEndClmn' '--txStartClmn=$txStartClmn' | |
28 #end if | |
29 </command> | |
30 | |
31 <inputs> | |
32 <param name="input" type="data" format="gd_genotype,gd_snp" label="Genotype/SNP dataset"> | |
33 <validator type="metadata" check="scaffold" message="scaffold missing" /> | |
34 <validator type="metadata" check="pos" message="pos missing" /> | |
35 </param> | |
36 <param name="indivs_input" type="data" format="gd_indivs" label="Individuals dataset" /> | |
37 <conditional name="input_type"> | |
38 <param name="choice" type="select" format="integer" label="Input type"> | |
39 <option value="0" selected="true">Coverage</option> | |
40 <option value="1">Genes</option> | |
41 </param> | |
42 <when value="0"> | |
43 <param name="coverage_input" type="data" format="gd_genotype,gd_snp" label="Coverage dataset"> | |
44 <validator type="metadata" check="scaffold" message="scaffold missing" /> | |
45 <validator type="metadata" check="pos" message="pos missing" /> | |
46 </param> | |
47 <param name="coverage_threshold" type="integer" min="1" value="1" label="Coverage threshold" /> | |
48 <param name="indivs_threshold" type="float" value="0.5" min="0.0" max="1.0" label="Individuals genotype percentage threshold" /> | |
49 </when> | |
50 <when value="1"> | |
51 <param name="annotation_input" type="data" format="interval" label="Genes dataset"> | |
52 <validator type="metadata" check="chromCol" message="chromCol missing" /> | |
53 <validator type="metadata" check="strandCol" message="strandCol missing" /> | |
54 <validator type="metadata" check="nameCol" message="nameCol missing" /> | |
55 </param> | |
56 <param name="tx_start_col" type="data_column" data_ref="input" label="Genes transcript start column" /> | |
57 <param name="tx_end_col" type="data_column" data_ref="input" label="Genes transcript end column" /> | |
58 <param name="cds_start_col" type="data_column" data_ref="input" label="Genes coding sequence start column" /> | |
59 <param name="cds_end_col" type="data_column" data_ref="input" label="Genes coding sequence end column" /> | |
60 <param name="exs_start_col" type="data_column" data_ref="input" label="Genes exon starts column" /> | |
61 <param name="exs_end_col" type="data_column" data_ref="input" label="Genes exon ends column" /> | |
62 <param name="fasta_input" type="data" format="fasta" label="FASTA dataset" /> | |
63 </when> | |
64 </conditional> | |
65 </inputs> | |
66 | |
67 <outputs> | |
68 <data name="output1" format="txt" /> | |
69 </outputs> | |
70 | |
71 <help> | |
72 **What it does** | |
73 | |
74 This tool creates phylip formatted files from two different input types: | |
75 coverage and genes. | |
76 | |
77 If the coverage option is selected the inputs for the program are: | |
78 | |
79 1. a gd_indivs table | |
80 2. a gd_genotype file with the coverage information for individuals in the gd_indivs table | |
81 3. a gd_genotype file with the genotype information for individuals in the gd_indivs table | |
82 4. a coverage threshold (optional) | |
83 5. a percentage of individuals (threshold). | |
84 | |
85 The program produces a phylip formatted file using the sequence in the | |
86 genotype file as a template. In this sequence nucleotides for each | |
87 sequence that are below the coverage threshold, or the positions with | |
88 a percentage of individuals below the selected value are replaced by "N". | |
89 | |
90 If the gene option is selected the inputs for the program are: | |
91 | |
92 1. a gd_indivs table | |
93 2. a gene dataset table with a gene name in the first column | |
94 3. the column with transcript start in the gene dataset table | |
95 4. the column with transcript end in the gene dataset table | |
96 5. the column with coding start in the gene dataset table | |
97 6. the column with coding end in the gene dataset table | |
98 7. the column with exon starts (comma-separated) in the gene dataset table | |
99 8. the column with exon ends (comma-separated) in the gene dataset table | |
100 9. a FASTA formatted file for all the genes of interest with their names as headers (NOTE: these names should be the same in the input gene dataset table). | |
101 | |
102 The program produces as output one phylip formatted file for each gene | |
103 in the gene dataset table. | |
104 | |
105 ----- | |
106 | |
107 **Example** | |
108 | |
109 In a case were the option coverage is selected, for the inputs: | |
110 | |
111 - gd_indivs:: | |
112 | |
113 7 W_Java | |
114 10 E_Java | |
115 16 Pen_Ma | |
116 ... | |
117 | |
118 - Genotype table:: | |
119 | |
120 chrM 15 T C -1 -1 2 -1 -1 2 -1 -1 -1 -1 -1 2 -1 -1 -1 -1 0 -1 -1 | |
121 chrM 18 G A -1 -1 0 -1 -1 0 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1 | |
122 chrM 20 C T -1 -1 0 -1 -1 2 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1 | |
123 ... | |
124 | |
125 - Coverage table:: | |
126 | |
127 chrM 0 G G 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
128 chrM 1 T T 0 0 3 0 0 50 0 0 0 0 0 2 0 | |
129 chrM 2 T T 0 0 5 0 0 50 0 0 0 0 0 2 0 | |
130 ... | |
131 | |
132 - Coverage threshold = 0 | |
133 | |
134 - Percentage of individuals = 0.0 | |
135 | |
136 - The output is:: | |
137 | |
138 4 19 15428 | |
139 W_Java GTTCATCATGTTCATCGAAT | |
140 E_Java GTTCATCATGTTCATCGAAC | |
141 Pen_Ma GTTCATCATGTTCATCGAAT | |
142 | |
143 In a case were option genotype is selected with the inputs: | |
144 | |
145 - Gene dataset table input:: | |
146 | |
147 1 ENSLAFT00000017123 chrM + 1002 1061 1002 1061 1 1002, 1061, 0 ENSLAFG00000017122 cmpl incmpl 0, BTRC ENSLAFT00000017123 ENSLAFP00000014355 | |
148 1 ENSLAFT00000037164 chrM - 1058 1092 1062 1073 1 1062,1068 1065,1073 0 ENSLAFG00000007680 cmpl cmpl 0, MYOF ENSLAFT00000037164 ENSLAFP00000025175 26509 | |
149 1 ENSLAFT00000008925 chrM + 990 1000 990 1000 1 990, 1000, 0 ENSLAFG00000008924 incmpl incmpl 0, PRKG1 ENSLAFT00000008925 ENSLAFP00000007492 | |
150 ... | |
151 | |
152 In this table: | |
153 | |
154 column with transcript start = 5 | |
155 column with transcript end = 6 | |
156 column with coding start = 7 | |
157 column with coding end = 8 | |
158 column with exon starts = 10 | |
159 column with exon ends = 11 | |
160 | |
161 - gd_indivs:: | |
162 | |
163 7 W_Java | |
164 10 E_Java | |
165 16 Pen_Ma | |
166 ... | |
167 | |
168 - Genotype table:: | |
169 | |
170 chrM 1005 T C -1 -1 2 -1 -1 2 -1 -1 -1 -1 -1 2 -1 -1 -1 -1 0 -1 -1 | |
171 chrM 1060 G A -1 -1 0 -1 -1 0 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1 | |
172 chrM 991 C T -1 -1 0 -1 -1 2 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 0 -1 -1 | |
173 ... | |
174 | |
175 The outputs are going to one file for each sequence in the input gene | |
176 dataset table (as long as they are included in the input FASTA file). | |
177 </help> | |
178 </tool> |