Mercurial > repos > galaxyp > regex_find_replace

diff regex.xml @ 0:60d04307b027 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author: galaxyp
date: Wed, 18 Jan 2017 17:45:20 -0500
children: 209b7c5ee9d7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regex.xml	Wed Jan 18 17:45:20 2017 -0500
@@ -0,0 +1,136 @@
+<tool id="regex1" name="Regex Find And Replace" version="1.0.0">
+  <description></description>
+  <command interpreter="python">regex.py --input '$input' --output '$out_file1' --input_display_name '$input.display_name'
+    #for $check in $checks:
+    --pattern='$check.pattern' --replacement='$check.replacement'
+    #end for
+  </command>
+  <inputs>
+    <param format="txt" name="input" type="data" label="Select lines from"/>
+    <repeat name="checks" title="Check">
+      <param name="pattern" size="40" type="text" value="chr([0-9A-Za-z])+" label="Find Regex" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
+        <sanitizer>
+          <valid>
+            <add preset="string.printable"/>
+            <remove value="&#92;" />
+            <remove value="&apos;" />
+          </valid>
+          <mapping initial="none">
+            <add source="&#92;" target="__backslash__" />
+            <add source="&apos;" target="__sq__"/>
+          </mapping>
+        </sanitizer>
+      </param>
+      <param name="replacement" size="40" type="text" value="newchr\1" label="Replacement">
+        <sanitizer>
+          <valid>
+            <add preset="string.printable"/>
+            <remove value="&#92;" />
+            <remove value="&apos;" />
+          </valid>
+          <mapping initial="none">
+            <add source="&#92;" target="__backslash__" />
+            <add source="&apos;" target="__sq__"/>
+          </mapping>
+        </sanitizer>      
+      </param>
+    </repeat>
+  </inputs>
+  <outputs>
+    <data format="input" name="out_file1" metadata_source="input"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="find1.txt"/>
+      <param name="pattern" value="(T\w+)"/>
+      <param name="replacement" value="\1 \1" />
+      <output name="out_file1" file="replace1.txt"/>
+    </test>
+    <test>
+      <param name="input" value="find1.txt"/>
+      <param name="pattern" value="f"/>
+      <param name="replacement" value="'&quot;" />
+      <output name="out_file1" file="replace2.txt"/>
+    </test>
+    <test>
+      <param name="input" value="find1.txt"/>
+      <param name="checks_0|pattern" value="a test file"/>
+      <param name="checks_0|replacement" value="a file named #{input_name}" />
+      <param name="checks_1|pattern" value="see here"/>
+      <param name="checks_1|replacement" value="see #{input_name}" />
+      <param name="checks_2|pattern" value="see (find1).txt"/>
+      <param name="checks_2|replacement" value="see \1" />
+      <output name="out_file1" file="replace3.txt"/>
+    </test>
+  </tests>
+  <help>
+This tool goes line by line through the specified input file and
+replaces text which matches the specified regular expression patterns
+with its corresponding specified replacement.
+
+This tool uses Python regular expressions. More information about
+Python regular expressions can be found here:
+http://docs.python.org/library/re.html.
+
+To convert an Ilumina FATSQ sequence id from the CAVASA 8 format::
+
+ @EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
+ GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
+ +EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
+ IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
+
+To the CASAVA 7 format::
+
+ @EAS139_FC706VJ:2:2104:15343:197393#0/1
+ GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
+ +EAS139_FC706VJ:2:2104:15343:197393#0/1
+ IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
+
+Use Settings::
+
+ Find Regex: ^([@+][A-Z0-9]+):\d+:(\S+)\s(\d).*$
+ Replacement: \1_\2#0/\3
+
+Note that the parentheses **()** capture patterns in the text that can be used in the replacement text by using a backslash-number reference:  **\\1**
+
+The regex **^([@+][A-Z0-9]+):\d+:(\S+) (\d).*$** means::
+
+  ^  - start the match at the beginning of the line of text
+  (  - start a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
+  [@+]  - matches either a @ or + character
+  [A-Z0-9]+  - matches an uppercase letter or a digit, the plus sign means to match 1 or more such characters
+  )  - end a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
+  :\d+:   - matches a colon followed by one or more digits followed by a colon character
+  (\S+)  - matches one or more non-whitespace charcters,  the enclosing parentheses make this a group (2) that can back-referenced in the replacement text as \2
+  \s  - matches a whitespace character
+  (\d)  - matches a single digit character,  the enclosing parentheses make this a group (3) that can back-referenced in the replacement text as \3
+  .*  - dot means match any character, asterisk means zero more more matches
+  $  - the regex must match to the end of the line of text
+
+In the replacement pattern, use the special token #{input_name} to insert the input dataset's display name.
+The name can be modified by a second find/replace check. Suppose you want to insert the sample id of your dataset,
+named **Sample ABC123**, into the dataset itself, which currently contains the lines::
+Data 1
+Data 2
+Data 3
+
+You can use the following checks::
+Find Regex: Data
+Replacement: #{input_name} Data
+
+Find Regex: Sample (\S+)
+Replacement: \1
+
+The result will be::
+ABC123 Data 1
+ABC123 Data 2
+ABC123 Data 3
+
+
+
+Galaxy aggressively escapes input supplied to tools, so if something
+is not working please let us know and we can look into whether this is
+the cause. Also if you would like help constructing regular
+expressions for your inputs, please let us know at help@msi.umn.edu.
+</help>
+</tool>
author	galaxyp
date	Wed, 18 Jan 2017 17:45:20 -0500
parents
children	209b7c5ee9d7