Mercurial > repos > miller-lab > genome_diversity
diff find_intervals.xml @ 17:a3af29edcce2
Uploaded Miller Lab Devshed version a51c894f5bed
author | miller-lab |
---|---|
date | Fri, 28 Sep 2012 11:57:18 -0400 |
parents | 8ae67e9fb6ff |
children | d6b961721037 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_intervals.xml Fri Sep 28 11:57:18 2012 -0400 @@ -0,0 +1,142 @@ +<tool id="gd_find_intervals" name="Remarkable Intervals" version="1.0.0"> + <description>: Find high-scoring runs of SNPs</description> + + <command interpreter="python"> + find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.files_path" + + #if $override_metadata.choice == "0" + "$input.metadata.ref" "$input.metadata.rPos" + #else + "$override_metadata.ref_col" "$override_metadata.rpos_col" + #end if + + "$score_col" "$shuffles" + + #if $cutoff.type == 'percentage' + "$cutoff.cutoff_pct" + #else + "=$cutoff.cutoff_val" + #end if + + "$out_format" + </command> + + <inputs> + <param name="input" type="data" format="tabular" label="Input"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + + <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/> + + <conditional name="cutoff"> + <param name="type" type="select" label="Cutoff type"> + <option value="percentage">percentage</option> + <option value="value">value</option> + </param> + <when value="percentage"> + <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/> + </when> + <when value="value"> + <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/> + </when> + </conditional> + + <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/> + + <param name="out_format" type="select" format="integer" label="Report individual positions"> + <option value="0" selected="true">No</option> + <option value="1">Yes</option> + </param> + + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you need to choose the columns if the input dataset is not gd_snp"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure the build in the metadata is the same as using here."/> + <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero or one based positions will work"/> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="interval"> + <change_format> + <when input="out_format" value="1" format="bigwigpos" /> + </change_format> + </data> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="score_col" value="5" /> + <param name="type" value="value" /> + <param name="cutoff_val" value="700.0" /> + <param name="shuffles" value="10" /> + <param name="out_format" value="0" /> + <param name="choice" value="0" /> + + <output name="output" file="test_out/find_intervals/find_intervals.interval" /> + </test> + </tests> + + <help> + +**Dataset formats** + +The input dataset is tabular_, with required columns of chromosome, position, +and score (in any column). +The output dataset is interval_. (`Dataset missing?`_) + +.. _interval: ./static/formatHelp.html#interval +.. _tabular: ./static/formatHelp.html#tab +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The user selects a tabular dataset (such as a gd_snp dataset) and +if the dataset is not also gd_snp format, specifies +the columns containing chromosome, position, and scores (such as an Fst-value for the SNP). +For gd_snp format the metadata can be used to specify the chromosome and +position. +Other inputs include +a percentage or raw score for the "cutoff" which should be greater than the +average value for the scores column. A higher value will give smaller intervals +in the output. +If a percentage (e.g. 95%) is specified +then that percentile of the scores is used as the cutoff; +percentile may not work well if many rows or SNPs have the same score +(in that case use a raw score). The program subtracts the +cutoff from every score, then finds genomic intervals (i.e., consecutive runs +of SNPs) whose total score cannot be increased by adding or subtracting one +or more adjusted scores at the ends of the interval. +Another input is the number of times the +data should be randomized (only intervals with score exceeding the maximum for +the randomized data are reported). +If 100 shuffles are requested, then any interval reported by the tool has a +score with probability less than 0.01 of being equaled or exceeded by chance. + +----- + +**Example** + +- input (gd_snp):: + + Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 + Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 + ... + Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0 + Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1 + etc. + +- output not reporting individual positions:: + + chr2 9817960 67331624 1272.2000 + + </help> +</tool>