comparison regex.xml @ 0:9ea374bb0350 draft default tip

Uploaded
author jjohnson
date Sat, 29 Mar 2014 13:41:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9ea374bb0350
1 <tool id="regex1" name="Regex Find And Replace" version="0.1.0">
2 <description></description>
3 <command interpreter="python">regex.py --input $input --output $out_file1
4 #for $check in $checks:
5 --pattern='$check.pattern' --replacement='$check.replacement'
6 #end for
7 </command>
8 <inputs>
9 <param format="txt" name="input" type="data" label="Select lines from"/>
10 <repeat name="checks" title="Check">
11 <param name="pattern" size="40" type="text" value="chr([0-9A-Za-z])+" label="Find Regex" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
12 <sanitizer>
13 <valid>
14 <add preset="string.printable"/>
15 <remove value="&#92;" />
16 <remove value="&apos;" />
17 </valid>
18 <mapping initial="none">
19 <add source="&#92;" target="__backslash__" />
20 <add source="&apos;" target="__sq__"/>
21 </mapping>
22 </sanitizer>
23 </param>
24 <param name="replacement" size="40" type="text" value="newchr\1" label="Replacement">
25 <sanitizer>
26 <valid>
27 <add preset="string.printable"/>
28 <remove value="&#92;" />
29 <remove value="&apos;" />
30 </valid>
31 <mapping initial="none">
32 <add source="&#92;" target="__backslash__" />
33 <add source="&apos;" target="__sq__"/>
34 </mapping>
35 </sanitizer>
36 </param>
37 </repeat>
38 </inputs>
39 <outputs>
40 <data format="input" name="out_file1" metadata_source="input"/>
41 </outputs>
42 <tests>
43 <test>
44 <param name="input" value="find1.txt"/>
45 <param name="pattern" value="(T\w+)"/>
46 <param name="replacement" value="\1 \1" />
47 <output name="out_file1" file="replace1.txt"/>
48 </test>
49 <test>
50 <param name="input" value="find1.txt"/>
51 <param name="pattern" value="f"/>
52 <param name="replacement" value="'&quot;" />
53 <output name="out_file1" file="replace2.txt"/>
54 </test>
55 </tests>
56 <help>
57 This tool goes line by line through the specified input file and
58 replaces text which matches the specified regular expression patterns
59 with its corresponding specified replacement.
60
61 This tool uses Python regular expressions. More information about
62 Python regular expressions can be found here:
63 http://docs.python.org/library/re.html.
64
65 To convert an Ilumina FATSQ sequence id from the CAVASA 8 format::
66
67 @EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
68 GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
69 +EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
70 IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
71
72 To the CASAVA 7 format::
73
74 @EAS139_FC706VJ:2:2104:15343:197393#0/1
75 GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
76 +EAS139_FC706VJ:2:2104:15343:197393#0/1
77 IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
78
79 Use Settings::
80
81 Find Regex: ^([@+][A-Z0-9]+):\d+:(\S+)\s(\d).*$
82 Replacement: \1_\2#0/\3
83
84 Note that the parentheses **()** capture patterns in the text that can be used in the replacement text by using a backslash-number reference: **\\1**
85
86 The regex **^([@+][A-Z0-9]+):\d+:(\S+) (\d).*$** means::
87
88 ^ - start the match at the beginning of the line of text
89 ( - start a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
90 [@+] - matches either a @ or + character
91 [A-Z0-9]+ - matches an uppercase letter or a digit, the plus sign means to match 1 or more such characters
92 ) - end a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
93 :\d+: - matches a colon followed by one or more digits followed by a colon character
94 (\S+) - matches one or more non-whitespace charcters, the enclosing parentheses make this a group (2) that can back-referenced in the replacement text as \2
95 \s - matches a whitespace character
96 (\d) - matches a single digit character, the enclosing parentheses make this a group (3) that can back-referenced in the replacement text as \3
97 .* - dot means match any character, asterisk means zero more more matches
98 $ - the regex must match to the end of the line of text
99
100
101
102 Galaxy aggressively escapes input supplied to tools, so if something
103 is not working please let us know and we can look into whether this is
104 the cause. Also if you would like help constructing regular
105 expressions for your inputs, please let us know at help@msi.umn.edu.
106 </help>
107 </tool>