0
|
1 <tool id="regexColumn1" name="Column Regex Find And Replace" version="0.1.0">
|
|
2 <description></description>
|
|
3 <command interpreter="python">regex.py --input $input --output $out_file1 --column $field
|
|
4 #for $check in $checks:
|
|
5 --pattern='$check.pattern' --replacement='$check.replacement'
|
|
6 #end for
|
|
7 </command>
|
|
8 <inputs>
|
|
9 <param format="tabular" name="input" type="data" label="Select cells from"/>
|
|
10 <param name="field" label="using column" type="data_column" data_ref="input" />
|
|
11 <repeat name="checks" title="Check">
|
|
12 <param name="pattern" size="40" type="text" value="chr([0-9A-Za-z])+" label="Find Regex" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
|
|
13 <sanitizer>
|
|
14 <valid>
|
|
15 <add preset="string.printable"/>
|
|
16 <remove value="\" />
|
|
17 <remove value="'" />
|
|
18 </valid>
|
|
19 <mapping initial="none">
|
|
20 <add source="\" target="__backslash__" />
|
|
21 <add source="'" target="__sq__"/>
|
|
22 </mapping>
|
|
23 </sanitizer>
|
|
24 </param>
|
|
25 <param name="replacement" size="40" type="text" value="newchr\1" label="Replacement">
|
|
26 <sanitizer>
|
|
27 <valid>
|
|
28 <add preset="string.printable"/>
|
|
29 <remove value="\" />
|
|
30 <remove value="'" />
|
|
31 </valid>
|
|
32 <mapping initial="none">
|
|
33 <add source="\" target="__backslash__" />
|
|
34 <add source="'" target="__sq__"/>
|
|
35 </mapping>
|
|
36 </sanitizer>
|
|
37 </param>
|
|
38 </repeat>
|
|
39 </inputs>
|
|
40 <outputs>
|
|
41 <data format="input" name="out_file1" metadata_source="input" />
|
|
42 </outputs>
|
|
43 <tests>
|
|
44 <test>
|
|
45 <param name="input" value="find_tabular_1.txt" ftype="tabular" />
|
|
46 <param name="field" value="1" />
|
|
47 <param name="pattern" value="moo"/>
|
|
48 <param name="replacement" value="cow" />
|
|
49 <output name="out_file1" file="replace_tabular_1.txt"/>
|
|
50 </test>
|
|
51 </tests>
|
|
52 <help>
|
|
53
|
|
54 .. class:: warningmark
|
|
55
|
|
56 **This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
|
|
57
|
|
58 .. class:: infomark
|
|
59
|
|
60 **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
|
|
61
|
|
62 -----
|
|
63
|
|
64 This tool goes line by line through the specified input file and
|
|
65 if the text in the selected column matches a specified regular expression pattern
|
|
66 replaces the text with the corresponding specified replacement.
|
|
67
|
|
68 This tool can be used to change between the chromosome naming conventions of UCSC and Ensembl.
|
|
69
|
|
70 For example to remove the **chr** part of the reference sequence name in the first column of this GFF file::
|
|
71
|
|
72 ##gff-version 2
|
|
73 ##Date: Thu Mar 23 11:21:17 2006
|
|
74 ##bed2gff.pl $Rev: 601 $
|
|
75 ##Input file: ./database/files/61c6c604e0ef50b280e2fd9f1aa7da61.dat
|
|
76 chr1 bed2gff CCDS1000.1_cds_0_0_chr1_148325916_f 148325916 148325975 . + . score "0";
|
|
77 chr21 bed2gff CCDS13614.1_cds_0_0_chr21_32707033_f 32707033 32707192 . + . score "0";
|
|
78 chrX bed2gff CCDS14606.1_cds_0_0_chrX_122745048_f 122745048 122745924 . + . score "0";
|
|
79
|
|
80 Setting::
|
|
81
|
|
82 using column: c1
|
|
83 Find Regex: chr([0-9]+|X|Y|M[Tt]?)
|
|
84 Replacement: \1
|
|
85
|
|
86 produces::
|
|
87
|
|
88 ##gff-version 2
|
|
89 ##Date: Thu Mar 23 11:21:17 2006
|
|
90 ##bed2gff.pl $Rev: 601 $
|
|
91 ##Input file: ./database/files/61c6c604e0ef50b280e2fd9f1aa7da61.dat
|
|
92 1 bed2gff CCDS1000.1_cds_0_0_chr1_148325916_f 148325916 148325975 . + . score "0";
|
|
93 21 bed2gff CCDS13614.1_cds_0_0_chr21_32707033_f 32707033 32707192 . + . score "0";
|
|
94 X bed2gff CCDS14606.1_cds_0_0_chrX_122745048_f 122745048 122745924 . + . score "0";
|
|
95
|
|
96
|
|
97 This tool uses Python regular expressions with the **re.sub()** function.
|
|
98 More information about Python regular expressions can be found here:
|
|
99 http://docs.python.org/library/re.html.
|
|
100
|
|
101 The regex **chr([0-9]+|X|Y|M)** means start with text **chr** followed by either: one or more digits, or the letter X, or the letter Y, or the letter M (optionally followed by a single letter T or t).
|
|
102 Note that the parentheses **()** capture patterns in the text that can be used in the replacement text by using a backslash-number reference: **\\1**
|
|
103
|
|
104
|
|
105
|
|
106 Galaxy aggressively escapes input supplied to tools, so if something
|
|
107 is not working please let us know and we can look into whether this is
|
|
108 the cause. Also if you would like help constructing regular
|
|
109 expressions for your inputs, please let us know at help@msi.umn.edu.
|
|
110
|
|
111 </help>
|
|
112 </tool>
|