annotate regex.xml @ 0:9ea374bb0350 draft default tip

Uploaded
author jjohnson
date Sat, 29 Mar 2014 13:41:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="regex1" name="Regex Find And Replace" version="0.1.0">
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
2 <description></description>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
3 <command interpreter="python">regex.py --input $input --output $out_file1
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
4 #for $check in $checks:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
5 --pattern='$check.pattern' --replacement='$check.replacement'
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
6 #end for
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
7 </command>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
8 <inputs>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
9 <param format="txt" name="input" type="data" label="Select lines from"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
10 <repeat name="checks" title="Check">
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
11 <param name="pattern" size="40" type="text" value="chr([0-9A-Za-z])+" label="Find Regex" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
12 <sanitizer>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
13 <valid>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
14 <add preset="string.printable"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
15 <remove value="&#92;" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
16 <remove value="&apos;" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
17 </valid>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
18 <mapping initial="none">
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
19 <add source="&#92;" target="__backslash__" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
20 <add source="&apos;" target="__sq__"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
21 </mapping>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
22 </sanitizer>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
23 </param>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
24 <param name="replacement" size="40" type="text" value="newchr\1" label="Replacement">
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
25 <sanitizer>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
26 <valid>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
27 <add preset="string.printable"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
28 <remove value="&#92;" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
29 <remove value="&apos;" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
30 </valid>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
31 <mapping initial="none">
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
32 <add source="&#92;" target="__backslash__" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
33 <add source="&apos;" target="__sq__"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
34 </mapping>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
35 </sanitizer>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
36 </param>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
37 </repeat>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
38 </inputs>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
39 <outputs>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
40 <data format="input" name="out_file1" metadata_source="input"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
41 </outputs>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
42 <tests>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
43 <test>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
44 <param name="input" value="find1.txt"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
45 <param name="pattern" value="(T\w+)"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
46 <param name="replacement" value="\1 \1" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
47 <output name="out_file1" file="replace1.txt"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
48 </test>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
49 <test>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
50 <param name="input" value="find1.txt"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
51 <param name="pattern" value="f"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
52 <param name="replacement" value="'&quot;" />
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
53 <output name="out_file1" file="replace2.txt"/>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
54 </test>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
55 </tests>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
56 <help>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
57 This tool goes line by line through the specified input file and
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
58 replaces text which matches the specified regular expression patterns
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
59 with its corresponding specified replacement.
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
60
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
61 This tool uses Python regular expressions. More information about
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
62 Python regular expressions can be found here:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
63 http://docs.python.org/library/re.html.
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
64
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
65 To convert an Ilumina FATSQ sequence id from the CAVASA 8 format::
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
66
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
67 @EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
68 GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
69 +EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
70 IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
71
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
72 To the CASAVA 7 format::
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
73
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
74 @EAS139_FC706VJ:2:2104:15343:197393#0/1
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
75 GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
76 +EAS139_FC706VJ:2:2104:15343:197393#0/1
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
77 IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
78
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
79 Use Settings::
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
80
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
81 Find Regex: ^([@+][A-Z0-9]+):\d+:(\S+)\s(\d).*$
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
82 Replacement: \1_\2#0/\3
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
83
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
84 Note that the parentheses **()** capture patterns in the text that can be used in the replacement text by using a backslash-number reference: **\\1**
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
85
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
86 The regex **^([@+][A-Z0-9]+):\d+:(\S+) (\d).*$** means::
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
87
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
88 ^ - start the match at the beginning of the line of text
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
89 ( - start a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
90 [@+] - matches either a @ or + character
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
91 [A-Z0-9]+ - matches an uppercase letter or a digit, the plus sign means to match 1 or more such characters
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
92 ) - end a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
93 :\d+: - matches a colon followed by one or more digits followed by a colon character
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
94 (\S+) - matches one or more non-whitespace charcters, the enclosing parentheses make this a group (2) that can back-referenced in the replacement text as \2
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
95 \s - matches a whitespace character
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
96 (\d) - matches a single digit character, the enclosing parentheses make this a group (3) that can back-referenced in the replacement text as \3
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
97 .* - dot means match any character, asterisk means zero more more matches
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
98 $ - the regex must match to the end of the line of text
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
99
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
100
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
101
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
102 Galaxy aggressively escapes input supplied to tools, so if something
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
103 is not working please let us know and we can look into whether this is
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
104 the cause. Also if you would like help constructing regular
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
105 expressions for your inputs, please let us know at help@msi.umn.edu.
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
106 </help>
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
107 </tool>