annotate microsatellite.xml @ 3:3d58c22ea6c9 draft

Uploaded
author arkarachai-fungtammasan
date Sat, 22 Aug 2015 12:12:35 -0400
parents d5ed5c2e25c3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
1 <tool id="microsatellite" name="STR detection" version="1.0.0">
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
2 <description>for short read, reference, and mapped data</description>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
3 <command interpreter="python2.7"> microsatellite.py
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
4 "${filePath}"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
5 #if $inputFileSource.inputFileType == "fasta"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
6 --fasta
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
7 #elif $inputFileSource.inputFileType == "fastq"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
8 --fastq
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
9 #elif $inputFileSource.inputFileType == "fastq_noquals"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
10 --fastq:noquals
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
11 #elif $inputFileSource.inputFileType == "sam"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
12 --sam
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
13 #end if
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
14
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
15 #if $inputFileSource.inputFileType == "sam"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
16 #if $inputFileSource.referenceFileSource.requireReference
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
17 --r --ref="${inputFileSource.referenceFileSource.referencePath}"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
18 #end if
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
19 #end if
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
20
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
21 --period="${period}"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
22
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
23 #if $partialmotifs == "true"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
24 --partialmotifs
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
25 #end if
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
26
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
27 --minlength="${minlength}"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
28
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
29
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
30 --prefix="${prefix}"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
31 --suffix="${surfix}"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
32
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
33 --hamming="${hammingThreshold}"
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
34
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
35 #if $multipleruns
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
36 --multipleruns
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
37 #end if
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
38
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
39 #if $flankSetting.noflankdisplay
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
40 --noflankdisplay
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
41 #else
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
42 --flankdisplay=${flankSetting.flankdisplay}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
43 #end if
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
44 &gt; $stdout
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
45 </command>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
46
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
47 <inputs>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
48 <param name="filePath" label="Select input file" type="data"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
49 <conditional name="inputFileSource">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
50 <param name="inputFileType" type="select" label="Select input file type">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
51 <option value="fasta">Fasta File</option>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
52 <option value="fastq">Fastq File</option>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
53 <option value="fastq_noquals">Fastq File without Quality Information</option>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
54 <option value="sam">SAM File</option>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
55 </param>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
56 <when value="sam">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
57 <conditional name="referenceFileSource">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
58 <param name="requireReference" label="Do you want to extract correspond microsatellites in reference for comparison?" type="boolean">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
59 </param>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
60 <when value="true">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
61 <param name="referencePath" label="Select reference file" type="data"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
62 </when>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
63 </conditional>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
64 </when>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
65 </conditional>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
66
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
67 <param name="period" label="Motif size of microsatellites of interest (e.g. Mononucleotide microsatellite =1) (must be less than 10)" type="integer" size="2" value="1"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
68 <param name="partialmotifs" label="Consider microsatellites with a partial motif?" type="boolean" checked="True"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
69 <param name="minlength" label="Minimal length (bp) of microsatellite sequence reported" type="integer" size="2" value="5"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
70
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
71
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
72 <param name="prefix" label="Do not report candidate repeat intervals that have left flanking region less than (bp):" type="integer" size="4" value="20"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
73 <param name="surfix" label="Do not report candidate repeat intervals that have left flanking region less than (bp):" type="integer" size="4" value="20"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
74
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
75
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
76 <param name="hammingThreshold" label="Hamming threshold of microsatellite, If greater than 0, interrupted microsatellites will also be reported" type="integer" size="2" value="0"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
77 <param name="multipleruns" label="Consider all candidate intervals in a sequence. If not check, only the longest one will be considered" type="boolean" checked="True"> </param>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
78 <conditional name="flankSetting">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
79 <param name="noflankdisplay" label="Show the entire flanking regions" type="boolean" checked="True"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
80 <when value="false">
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
81 <param name="flankdisplay" label="Limit length (bp) of flanking regions shown" type="integer" size="4" value="5"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
82 </when>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
83 </conditional>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
84
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
85 </inputs>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
86 <outputs>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
87 <data name="stdout" format="tabular"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
88 </outputs>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
89 <tests>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
90 <!-- Test data with valid values -->
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
91 <test>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
92 <param name="filePath" value="C_sample_fastq"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
93 <param name="period" value="1"/>
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
94 <param name="inputFileType" value="fastq"/>
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
95 <param name="partialmotifs" value="true" />
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
96 <param name="minlength" value="3" />
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
97 <param name="prefix" value="5"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
98 <param name="surfix" value="5"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
99 <param name="hammingThreshold" value="0"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
100 <param name="multipleruns" value="true"> </param>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
101 <output name="microsatellite" file="C_sample_snoope"/>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
102 </test>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
103
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
104 </tests>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
105 <help>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
106
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
107
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
108 .. class:: infomark
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
109
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
110 **What it does**
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
111
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
112 This tool identifies simple as well interrupted STRs. Choosing a hamming distance of zero will return simple STRs.
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
113 Choosing a hamming distance of greater than zero will return both simple and interrupted STRs.
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
114 The algorithms used to identify simple and interrupted STRs are described oin the manuscript cited below (see TABLE XXXX).
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
115
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
116 **Citation**
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
117
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
118 When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
119 This tool is developed by Chen Sun (cxs1031@cse.psu.edu) and Bob Harris (rsharris@bx.psu.edu)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
120
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
121 **Input**
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
122
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
123 - The input files can be fastq, fasta, fastq without quality score, and SAM format.
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
124
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
125 **Output**
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
126
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
127 For fastq, the output will contain the following columns:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
128
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
129 - Column 1 = length of STR (bp)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
130 - Column 2 = length of left flanking region (bp)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
131 - Column 3 = length of right flanking region (bp)
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
132 - Column 4 = repeat motif (bp)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
133 - Column 5 = hamming distance
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
134 - Column 6 = read name
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
135 - Column 7 = read sequence with soft masking of STR
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
136 - Column 8 = read quality (the same Phred score scale as input)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
137
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
138 For fasta, fastq without quality score and sam format, column 8 will be replaced with dot(.).
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
139
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
140 If the users have mapped file (SAM) and would like to profile STRs from premapped data instead of using flank-based mapping approach, they can select SAM format input and specify that they want correspond STRs in reference for comparison. The output will be as follow:
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
141
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
142 - Column 1 = length of STR (bp)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
143 - Column 2 = length of left flanking region (bp)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
144 - Column 3 = length of right flanking region (bp)
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
145 - Column 4 = repeat motif (bp)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
146 - Column 5 = hamming distance
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
147 - Column 6 = read name
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
148 - Column 7 = read sequence with soft masking of STR
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
149 - Column 8 = read quality (the same Phred score scale as input)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
150 - Column 9 = read name (The same as column 6)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
151 - Column 10 = chromosome
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
152 - Column 11 = left flanking region start
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
153 - Column 12 = left flanking region stop
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
154 - Column 13 = STR start as infer from pair-end
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
155 - Column 14 = STR stop as infer from pair-end
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
156 - Column 15 = right flanking region start
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
157 - Column 16 = right flanking region stop
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
158 - Column 17 = STR length in reference
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
159 - Column 18 = STR sequence in reference
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
160
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
161 </help>
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
162 </tool>