view tools/unix_tools/word_list_grep.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
line wrap: on
line source

<tool id="cshl_word_list_grep" name="Select lines">
<description>by word list</description>
<command interpreter="perl">
	word_list_grep.pl 
	#if $searchwhere.choice == "column":
		-c $searchwhere.column
	#end if
	-o $output 
	$inverse 
	$caseinsensitive 
	$wholewords 
	$skip_first_line
	$wordlist 
	$input
</command>

<inputs>
	<param name="input" format="txt" type="data" label="input file" />
	<param name="wordlist" format="txt" type="data" label="word list file" />


	<param name="inverse" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Inverse filter" 
		help="Report lines NOT matching the word list" />

	<param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search" 
		help="" />

	<param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words" 
		help="ignore partial matches (e.g. 'apple' will not match 'snapple') " />

	<param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line" 
		help="Select this option if the first line contains column headers. First line will not be filtered. " />

	<conditional name="searchwhere">
		<param name="choice" type="select" label="Search words in">
			<option value="line" selected="true">entire line</option>
			<option value="column">specific column</option>
		</param>

		<when value="line">
		</when>

		<when value="column">
    			<param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" />
		</when>
	</conditional>

</inputs>

<outputs>
	<data name="output" format="input" metadata_source="input" />
</outputs>

<help>
**What it does**

This tool selects lines that match words from a word list.

--------

**Example**

Input file (UCSC's rmsk track from dm3)::

    585	787	66	241	11	chrXHet	2860	3009	-201103	-	DNAREP1_DM	LINE	Penelope	0	594	435	1
    585	1383	78	220	0	chrXHet	3012	3320	-200792	-	DNAREP1_DM	LINE	Penelope	-217	377	2	1
    585	244	103	0	0	chrXHet	3737	3776	-200336	-	DNAREP1_DM	LINE	Penelope	-555	39	1	1
    585	2270	83	144	0	chrXHet	7907	8426	-195686	+	DNAREP1_DM	LINE	Penelope	1	594	0	1
    585	617	189	73	68	chrXHet	10466	10671	-193441	+	DNAREP1_DM	LINE	Penelope	368	573	-21	1
    586	1122	71	185	0	chrXHet	173138	173322	-30790	-	PROTOP	DNA	P	-4033	447	230	1
    ...
    ...


Word list file::

  STALKER
  PROTOP

 

Output sequence (searching in column 11)::

    586	1122	71	185	0	chrXHet	173138	173322	-30790	        -	PROTOP	DNA	P	-4033	447	230	1
    586	228	162	0	0	chrXHet	181026	181063	-23049	        +	STALKER4_I	LTR	Gypsy	9	45	-6485	1
    585	245	105	26	0	chr3R	41609	41647	-27863406	+	PROTOP_B	DNA	P	507	545	-608	4
    586	238	91	0	0	chr3R	140224	140257	-27764796	-	PROTOP_B	DNA	P	-617	536	504	4
    ...
    ...

( With **find whole-words** not selected, *PROTOP* matched *PROTOP_B*, *STALKER* matched *STALKER4_I* )




Output sequence (searching in column 11, and whole-words only)::

    586	670	90	38	57	chrXHet	168356	168462	-35650	-	PROTOP	DNA	P	-459	4021	3918	1
    586	413	139	70	0	chrXHet	168462	168548	-35564	-	PROTOP	DNA	P	-3406	1074	983	1
    586	1122	71	185	0	chrXHet	173138	173322	-30790	-	PROTOP	DNA	P	-4033	447	230	1
    ...
    ...

</help>

</tool>