diff regex.xml @ 0:9ea374bb0350 draft default tip

Uploaded
author jjohnson
date Sat, 29 Mar 2014 13:41:51 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex.xml	Sat Mar 29 13:41:51 2014 -0400
@@ -0,0 +1,107 @@
+<tool id="regex1" name="Regex Find And Replace" version="0.1.0">
+  <description></description>
+  <command interpreter="python">regex.py --input $input --output $out_file1
+    #for $check in $checks:
+    --pattern='$check.pattern' --replacement='$check.replacement'
+    #end for
+  </command>
+  <inputs>
+    <param format="txt" name="input" type="data" label="Select lines from"/>
+    <repeat name="checks" title="Check">
+      <param name="pattern" size="40" type="text" value="chr([0-9A-Za-z])+" label="Find Regex" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
+        <sanitizer>
+          <valid>
+            <add preset="string.printable"/>
+            <remove value="&#92;" />
+            <remove value="&apos;" />
+          </valid>
+          <mapping initial="none">
+            <add source="&#92;" target="__backslash__" />
+            <add source="&apos;" target="__sq__"/>
+          </mapping>
+        </sanitizer>
+      </param>
+      <param name="replacement" size="40" type="text" value="newchr\1" label="Replacement">
+        <sanitizer>
+          <valid>
+            <add preset="string.printable"/>
+            <remove value="&#92;" />
+            <remove value="&apos;" />
+          </valid>
+          <mapping initial="none">
+            <add source="&#92;" target="__backslash__" />
+            <add source="&apos;" target="__sq__"/>
+          </mapping>
+        </sanitizer>      
+      </param>
+    </repeat>
+  </inputs>
+  <outputs>
+    <data format="input" name="out_file1" metadata_source="input"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="find1.txt"/>
+      <param name="pattern" value="(T\w+)"/>
+      <param name="replacement" value="\1 \1" />
+      <output name="out_file1" file="replace1.txt"/>
+    </test>
+    <test>
+      <param name="input" value="find1.txt"/>
+      <param name="pattern" value="f"/>
+      <param name="replacement" value="'&quot;" />
+      <output name="out_file1" file="replace2.txt"/>
+    </test>
+  </tests>
+  <help>
+This tool goes line by line through the specified input file and
+replaces text which matches the specified regular expression patterns
+with its corresponding specified replacement.
+
+This tool uses Python regular expressions. More information about
+Python regular expressions can be found here:
+http://docs.python.org/library/re.html.
+
+To convert an Ilumina FATSQ sequence id from the CAVASA 8 format::
+
+ @EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
+ GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
+ +EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
+ IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
+
+To the CASAVA 7 format::
+
+ @EAS139_FC706VJ:2:2104:15343:197393#0/1
+ GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
+ +EAS139_FC706VJ:2:2104:15343:197393#0/1
+ IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
+
+Use Settings::
+
+ Find Regex: ^([@+][A-Z0-9]+):\d+:(\S+)\s(\d).*$
+ Replacement: \1_\2#0/\3
+
+Note that the parentheses **()** capture patterns in the text that can be used in the replacement text by using a backslash-number reference:  **\\1**
+
+The regex **^([@+][A-Z0-9]+):\d+:(\S+) (\d).*$** means::
+
+  ^  - start the match at the beginning of the line of text
+  (  - start a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
+  [@+]  - matches either a @ or + character
+  [A-Z0-9]+  - matches an uppercase letter or a digit, the plus sign means to match 1 or more such characters
+  )  - end a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
+  :\d+:   - matches a colon followed by one or more digits followed by a colon character
+  (\S+)  - matches one or more non-whitespace charcters,  the enclosing parentheses make this a group (2) that can back-referenced in the replacement text as \2
+  \s  - matches a whitespace character
+  (\d)  - matches a single digit character,  the enclosing parentheses make this a group (3) that can back-referenced in the replacement text as \3
+  .*  - dot means match any character, asterisk means zero more more matches
+  $  - the regex must match to the end of the line of text
+
+
+
+Galaxy aggressively escapes input supplied to tools, so if something
+is not working please let us know and we can look into whether this is
+the cause. Also if you would like help constructing regular
+expressions for your inputs, please let us know at help@msi.umn.edu.
+</help>
+</tool>