annotate tools/unix_tools/find_and_replace.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="cshl_find_and_replace" name="Find and Replace">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>text</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <command interpreter="perl">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 find_and_replace.pl
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 #if $searchwhere.choice == "column":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 -c $searchwhere.column
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 -o $output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 $caseinsensitive
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 $wholewords
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 $skip_first_line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 $is_regex
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 '$url_paste'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 '$file_data'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 '$input'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <param format="txt" name="input" type="data" label="File to process" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <!-- Note: the parameter ane MUST BE 'url_paste' -
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 This is a hack in the galaxy library (see ./lib/galaxy/util/__init__.py line 142)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 If the name is 'url_paste' the string won't be sanitized, and all the non-alphanumeric characters
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 will be passed to the shell script -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 <param name="url_paste" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 <validator type="expression" message="Invalid Program!">value.find('\'')==-1</validator>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 <param name="file_data" type="text" size="20" label="Replace with" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <validator type="expression" message="Invalid Program!">value.find('\'')==-1</validator>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Find-Pattern is a regular expression"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 help="see help section for details." />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 help="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 help="ignore partial matches (e.g. 'apple' will not match 'snapple') " />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <conditional name="searchwhere">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <param name="choice" type="select" label="Replace text in">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 <option value="line" selected="true">entire line</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <option value="column">specific column</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <when value="line">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 <when value="column">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <data format="input" name="output" metadata_source="input" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 This tool finds &amp; replaces text in an input dataset.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 This tool uses Perl regular expression syntax.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 **Examples of *regular-expression* Find Patterns**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 - **HELLO** The word 'HELLO' (case sensitive).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 - **A{4,}** Four or more consecutive A's.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 **Examples of Replace Patterns**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 - **FOO-&amp;-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&amp;** (ampersand) represents the matched find pattern.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 - **\\1** The text which matched the first parenthesis in the Find Pattern.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 **Example 1**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 **Find Pattern:** HELLO
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 **Replace Pattern:** WORLD
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 **Regular Expression:** no
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 **Replace what:** entire line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 Every time the word HELLO is found, it will be replaced with the word WORLD.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 **Example 2**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 **Find Pattern:** ^chr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 **Replace Pattern:** (empty)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 **Regular Expression:** yes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 **Replace what:** column 11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 **Perl's Regular Expression Syntax**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 The Find &amp; Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 - **^** matches the beginning of a string(but not an internal line).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 - **(** .. **)** groups a particular pattern.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 - **{n}** The preceding item is matched exactly n times.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 - **{n,}** The preceding item ismatched n or more times.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 - **{n,m}** The preceding item is matched at least n times but not more than m times.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 - **.** Matches any single character except a newline.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 - ***** The preceding item will be matched zero or more times.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 - **?** The preceding item is optional and matched at most once.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 - **+** The preceding item will be matched one or more times.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 - **^** has two meaning:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 - matches the beginning of a line or string.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 - **$** matches the end of a line or string.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 - **\\|** Separates alternate possibilities.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 - **\\d** matches a single digit
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 - **\\w** matches a single letter or digit or an underscore.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 - **\\s** matches a single white-space (space or tabs).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 </tool>