annotate tandem_repeats_finder_wrapper.xml @ 0:a2e1d1f25e35 draft default tip

Uploaded
author urgi-team
date Thu, 10 Jul 2014 09:32:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
1 <tool id="tandem_repeats_finder" name="Tandem Repeats Finder" version="1.0.0">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
2 <description>locates and displays tandem repeats in DNA sequences</description>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
3 <requirements>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
4 <requirement type="package" version="4.0">tandem_repeats_finder</requirement>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
5 </requirements>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
6 <version_command>trf | grep Version</version_command>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
7 <command interpreter="python">tandem_repeats_finder_wrapper.py --file $file --match $match --mismatch $mismatch --delta $delta --pm $pm --pi $pi --minscore $minscore --maxperiod $maxperiod
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
8 #if $nohtml
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
9 --txt "$output_txt"
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
10 #else
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
11 --html "$output_html" --dirhtml "$output_html.files_path" --txt "$output_txt"
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
12 #end if
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
13 #if $flanking
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
14 --flanking
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
15 #end if
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
16 #if $mask
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
17 --mask "$output_mask"
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
18 #end if
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
19
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
20 </command>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
21 <inputs>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
22 <param name="file" type="data" format="fasta" label="DNA sequences in Fasta format"/>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
23 <param name="match" type="integer" value="2" label="Matching weight" help="default value 2">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
24 <validator type="in_range" min="1" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
25 </param>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
26 <param name="mismatch" type="integer" value="7" label="Mismatching penalty" help="default value 7">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
27 <validator type="in_range" min="0" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
28 </param>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
29 <param name="delta" type="integer" value="7" label="Indel penalty" help="default value 7">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
30 <validator type="in_range" min="0" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
31 </param>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
32 <param name="pm" type="integer" value="80" label="Matching probability" help="default value 80">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
33 <validator type="in_range" min="1" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
34 </param>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
35 <param name="pi" type="integer" value="10" label="Indel probability" help="default value 10">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
36 <validator type="in_range" min="1" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
37 </param>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
38 <param name="minscore" type="integer" value="50" label="Minimum alignment score to report" help="">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
39 <validator type="in_range" min="30" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
40 </param>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
41 <param name="maxperiod" type="integer" value="500" label="Maximum period size to report" help="">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
42 <validator type="in_range" min="1" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
43 </param>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
44 <param name="nohtml" type="boolean" checked="false" label="No html output" help="Export dat file only" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
45 <param name="flanking" type="boolean" checked="false" label="Flanking sequence" help="Flanking sequence consists of the 500 nucleotides on each side of a repeat. Flanking sequence is recorded in the alignment file. This may be useful for PCR primer determination." />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
46 <param name="mask" type="boolean" checked="false" label="Masked sequence file" help="The masked sequence file is a FASTA format file containing a copy of the sequence with every character that occurred in a tandem repeat changed to the letter 'N'. The word 'masked' is added to the sequence description line just after the '>' character." />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
47 </inputs>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
48 <outputs>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
49 <data format="html" name="output_html" label="TRF_summary_${match}_${mismatch}_${delta}_${pm}_${pi}_${minscore}_${maxperiod}.html">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
50 <filter>(nohtml == False)</filter>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
51 </data>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
52 <data format="txt" name="output_mask" label="TRF_summary_${match}_${mismatch}_${delta}_${pm}_${pi}_${minscore}_${maxperiod}.mask">
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
53 <filter>(mask == True)</filter>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
54 </data>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
55 <data format="txt" name="output_txt" label="TRF_summary_${match}_${mismatch}_${delta}_${pm}_${pi}_${minscore}_${maxperiod}.txt"/>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
56 </outputs>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
57 <tests>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
58 <test>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
59 <param name="file" value="sequence_trf_test.fasta" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
60 <param name="nohtml" value="True" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
61 <output name="output_txt" file="TRF_summary_2_7_80_10_50_500.txt" ftype="txt" />
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
62 </test>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
63 </tests>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
64 <help>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
65
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
66
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
67 **What it does**
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
68
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
69 A tandem repeat in DNA is two or more adjacent, approximate copies of a pattern of nucleotides. Tandem Repeats Finder is a program to locate and display tandem repeats in DNA sequences. In order to use the program, the user submits a sequence in FASTA format. There is no need to specify the pattern, the size of the pattern or any other parameter. The output consists of two files: a repeat table file and an alignment file. The repeat table contains information about each repeat, including its location, size, number of copies and nucleotide content. Clicking on the location indices for one of the table entries opens a second web browser that shows an alignment of the copies against a consensus pattern. The program is very fast, analyzing sequences on the order of .5Mb in just a few seconds. Submitted sequences may be of arbitrary length. Repeats with pattern size in the range from 1 to 2000 bases are detected.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
70
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
71 -------
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
72
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
73 **Input format**
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
74
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
75 The FASTA format is a plain text format which looks something like this:
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
76
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
77 >myseq
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
78 AGTCGTCGCT AGCTAGCTAG CATCGAGTCT TTTCGATCGA GGACTAGACT TCTAGCTAGC TAGCATAGCA TACGAGCATA TCGGTCATGA GACTGATTGG GCTTTAGCTA GCTAGCATAG CATACGAGCA TATCGGTAGA CTGATTGGGT TTAGGTTACC
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
79
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
80 The first line starts with a greater than sign ">" and contains a name or other identifier for the sequence. This is the sequence header and must be in a single line. The remaining lines contain the sequence data. The sequence can be in upper or lower case letters. Anything other than letters (numbers for example) is ignored. Multiple sequences can be present in the same file as long as each sequence has its own header.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
81
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
82 -------
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
83
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
84 **Output format**
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
85
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
86 Table Explanation:
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
87
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
88 The summary table includes the following information::
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
89
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
90 1 Indices of the repeat relative to the start of the sequence.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
91 2 Period size of the repeat.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
92 3 Number of copies aligned with the consensus pattern.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
93 4 Size of consensus pattern (may differ slightly from the period size).
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
94 5 Percent of matches between adjacent copies overall.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
95 6 Percent of indels between adjacent copies overall.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
96 7 Alignment score.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
97 8 Percent composition for each of the four nucleotides.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
98 9 Entropy measure based on percent composition.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
99
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
100 If the output contains more than 120 repeats, multiple linked tables are produced. The links to the other tables appear at the top and bottom of each table.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
101
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
102 Note: If you save multiple linked summary table files, use the default names supplied by your browser to preserve the automatic linking.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
103
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
104 Alignment Explanation:
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
105
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
106 The alignment is presented as follows::
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
107
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
108 1 In each pair of lines, the actual sequence is on the top and a consensus sequence for all the copies is on the bottom.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
109 2 Each pair of lines is one period except for very small patterns.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
110 3 The 10 sequence characters before and after a repeat are shown.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
111 4 Symbol * indicates a mismatch.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
112 5 Symbol - indicates an insertion or deletion.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
113 6 Statistics refers to the matches, mismatches and indels overall between adjacent copies in the sequence, not between the sequence and the consensus pattern.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
114 7 Distances between matching characters at corresponding positions are listed as distance, number at that distance, percentage of all matches.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
115 8 ACGTcount is percentage of each nucleotide in the repeat sequence.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
116 9 Consensus sequence is shown by itself.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
117 10 If chosen as an option, 500 characters of flanking sequence on each side of the repeat are shown.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
118
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
119 Note: If you save the alignment file, use the default name supplied by your browser to preserve the automatic cross-referencing with the summary table.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
120
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
121 The data file is a text file which contains the same information, in the same order, as the repeat table file, plus consensus and repeat sequences. This file contains no labeling and is suitable for additional processing, for example with a perl script, outside of the program.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
122
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
123
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
124 -------
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
125
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
126 **References**
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
127
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
128 If you use this Galaxy tool in work leading to a scientific publication please
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
129 cite the following papers:
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
130
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
131 G. Benson,
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
132 "Tandem repeats finder: a program to analyze DNA sequences"
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
133 Nucleic Acids Research (1999)
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
134 Vol. 27, No. 2, pp. 573-580.
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
135 </help>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
136 </tool>
a2e1d1f25e35 Uploaded
urgi-team
parents:
diff changeset
137