Mercurial > repos > miller-lab > genome_diversity
comparison find_intervals.xml @ 17:a3af29edcce2
Uploaded Miller Lab Devshed version a51c894f5bed
author | miller-lab |
---|---|
date | Fri, 28 Sep 2012 11:57:18 -0400 |
parents | 8ae67e9fb6ff |
children | d6b961721037 |
comparison
equal
deleted
inserted
replaced
16:be0e2223c531 | 17:a3af29edcce2 |
---|---|
1 <tool id="gd_find_intervals" name="Remarkable Intervals" version="1.0.0"> | |
2 <description>: Find high-scoring runs of SNPs</description> | |
3 | |
4 <command interpreter="python"> | |
5 find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.files_path" | |
6 | |
7 #if $override_metadata.choice == "0" | |
8 "$input.metadata.ref" "$input.metadata.rPos" | |
9 #else | |
10 "$override_metadata.ref_col" "$override_metadata.rpos_col" | |
11 #end if | |
12 | |
13 "$score_col" "$shuffles" | |
14 | |
15 #if $cutoff.type == 'percentage' | |
16 "$cutoff.cutoff_pct" | |
17 #else | |
18 "=$cutoff.cutoff_val" | |
19 #end if | |
20 | |
21 "$out_format" | |
22 </command> | |
23 | |
24 <inputs> | |
25 <param name="input" type="data" format="tabular" label="Input"> | |
26 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> | |
27 </param> | |
28 | |
29 <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/> | |
30 | |
31 <conditional name="cutoff"> | |
32 <param name="type" type="select" label="Cutoff type"> | |
33 <option value="percentage">percentage</option> | |
34 <option value="value">value</option> | |
35 </param> | |
36 <when value="percentage"> | |
37 <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/> | |
38 </when> | |
39 <when value="value"> | |
40 <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/> | |
41 </when> | |
42 </conditional> | |
43 | |
44 <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/> | |
45 | |
46 <param name="out_format" type="select" format="integer" label="Report individual positions"> | |
47 <option value="0" selected="true">No</option> | |
48 <option value="1">Yes</option> | |
49 </param> | |
50 | |
51 <conditional name="override_metadata"> | |
52 <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you need to choose the columns if the input dataset is not gd_snp"> | |
53 <option value="0" selected="true">No, get columns from metadata</option> | |
54 <option value="1" >Yes, choose columns</option> | |
55 </param> | |
56 <when value="0" /> | |
57 <when value="1"> | |
58 <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure the build in the metadata is the same as using here."/> | |
59 <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero or one based positions will work"/> | |
60 </when> | |
61 </conditional> | |
62 </inputs> | |
63 | |
64 <outputs> | |
65 <data name="output" format="interval"> | |
66 <change_format> | |
67 <when input="out_format" value="1" format="bigwigpos" /> | |
68 </change_format> | |
69 </data> | |
70 </outputs> | |
71 | |
72 <tests> | |
73 <test> | |
74 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> | |
75 <param name="score_col" value="5" /> | |
76 <param name="type" value="value" /> | |
77 <param name="cutoff_val" value="700.0" /> | |
78 <param name="shuffles" value="10" /> | |
79 <param name="out_format" value="0" /> | |
80 <param name="choice" value="0" /> | |
81 | |
82 <output name="output" file="test_out/find_intervals/find_intervals.interval" /> | |
83 </test> | |
84 </tests> | |
85 | |
86 <help> | |
87 | |
88 **Dataset formats** | |
89 | |
90 The input dataset is tabular_, with required columns of chromosome, position, | |
91 and score (in any column). | |
92 The output dataset is interval_. (`Dataset missing?`_) | |
93 | |
94 .. _interval: ./static/formatHelp.html#interval | |
95 .. _tabular: ./static/formatHelp.html#tab | |
96 .. _Dataset missing?: ./static/formatHelp.html | |
97 | |
98 ----- | |
99 | |
100 **What it does** | |
101 | |
102 The user selects a tabular dataset (such as a gd_snp dataset) and | |
103 if the dataset is not also gd_snp format, specifies | |
104 the columns containing chromosome, position, and scores (such as an Fst-value for the SNP). | |
105 For gd_snp format the metadata can be used to specify the chromosome and | |
106 position. | |
107 Other inputs include | |
108 a percentage or raw score for the "cutoff" which should be greater than the | |
109 average value for the scores column. A higher value will give smaller intervals | |
110 in the output. | |
111 If a percentage (e.g. 95%) is specified | |
112 then that percentile of the scores is used as the cutoff; | |
113 percentile may not work well if many rows or SNPs have the same score | |
114 (in that case use a raw score). The program subtracts the | |
115 cutoff from every score, then finds genomic intervals (i.e., consecutive runs | |
116 of SNPs) whose total score cannot be increased by adding or subtracting one | |
117 or more adjusted scores at the ends of the interval. | |
118 Another input is the number of times the | |
119 data should be randomized (only intervals with score exceeding the maximum for | |
120 the randomized data are reported). | |
121 If 100 shuffles are requested, then any interval reported by the tool has a | |
122 score with probability less than 0.01 of being equaled or exceeded by chance. | |
123 | |
124 ----- | |
125 | |
126 **Example** | |
127 | |
128 - input (gd_snp):: | |
129 | |
130 Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 | |
131 Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 | |
132 ... | |
133 Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0 | |
134 Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1 | |
135 etc. | |
136 | |
137 - output not reporting individual positions:: | |
138 | |
139 chr2 9817960 67331624 1272.2000 | |
140 | |
141 </help> | |
142 </tool> |