comparison fastaregexfinder.xml @ 0:269c627ae9f4 draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/fasta_regex_finder commit 8e118a4d24047e2c62912b962e854f789d6ff559
author mbernt
date Wed, 20 Jun 2018 11:06:57 -0400
parents
children 9a811adb714f
comparison
equal deleted inserted replaced
-1:000000000000 0:269c627ae9f4
1 <tool id="fasta_regex_finder" name="fasta_regex_finder" version="0.1.0">
2 <description>
3 Search in fasta for regexp match
4 </description>
5 <requirements>
6 </requirements>
7 <version_command>python $__tool_directory__/fastaregexfinder.py --version</version_command>
8 <command detect_errors="exit_code"><![CDATA[
9 python $__tool_directory__/fastaregexfinder.py
10 --fasta "$input"
11 --regex "$regex"
12 #if $settings.advanced == "advanced"
13 $settings.matchcase
14 $settings.noreverse
15 --maxstr $settings.maxstr
16 #if $settings.seqnames != ""
17 --seqnames $settings.seqnames
18 #end if
19 #end if
20 --quiet
21 > $output
22 ]]></command>
23 <inputs>
24 <param type="data" name="input" format="fasta" />
25 <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)">
26 <sanitizer>
27 <valid initial="string.printable">
28 <remove value="&apos;"/>
29 </valid>
30 <mapping initial="none">
31 <add source="&apos;" target="__sq__"/>
32 </mapping>
33 </sanitizer>
34 </param>
35 <conditional name="settings">
36 <param name="advanced" type="select" label="Specify advanced parameters">
37 <option value="simple" selected="true">No, use program defaults.</option>
38 <option value="advanced">Yes, see full parameter list.</option>
39 </param>
40 <when value="simple">
41 </when>
42 <when value="advanced">
43 <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" />
44 <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" />
45 <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" />
46 <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/>
47 </when>
48 </conditional>
49 </inputs>
50 <outputs>
51 <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" />
52 </outputs>
53 <tests>
54 <test>
55 <param name="input" value="TestSeqGroup-G4.fasta"/>
56 <output name="output" file="TestSeqGroup-G4.bed"/>
57 </test>
58 <test>
59 <param name="input" value="test.fas"/>
60 <param name="regex" value="ACTG"/>
61 <output name="output" file="test-1.bed"/>
62 </test>
63 <test>
64 <param name="input" value="test.fas"/>
65 <param name="regex" value="ACTG"/>
66 <param name="advanced" value="advanced"/>
67 <param name="matchcase" value="--matchcase"/>
68 <output name="output" file="test-2.bed"/>
69 </test>
70 <test>
71 <param name="input" value="test.fas"/>
72 <param name="regex" value="ACTG"/>
73 <param name="advanced" value="advanced"/>
74 <param name="noreverse" value="--noreverse"/>
75 <output name="output" file="test-3.bed"/>
76 </test>
77 <test>
78 <param name="input" value="test.fas"/>
79 <param name="regex" value="ACTG"/>
80 <param name="advanced" value="advanced"/>
81 <param name="maxstr" value="3"/>
82 <output name="output" file="test-4.bed"/>
83 </test>
84 <test>
85 <param name="input" value="TestSeqGroup-G4.fasta"/>
86 <param name="advanced" value="advanced"/>
87 <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/>
88 <output name="output" file="TestSeqGroup-G4-sub.bed"/>
89 </test>
90 </tests>
91 <help><![CDATA[
92 DESCRIPTION
93
94 Search a fasta file for matches to a regular expression and return a bed file with the
95 coordinates of the match and the matched sequence itself.
96
97 Output bed file has columns:
98
99 1. Name of fasta sequence (e.g. chromosome)
100 2. Start of the match
101 3. End of the match
102 4. ID of the match
103 5. Length of the match
104 6. Strand
105 7. Matched sequence as it appears on the forward strand
106
107 For matches on the reverse strand it is reported the start and end position on the
108 forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT'
109 present on the reverse strand is reported as ACCCTCCC).
110
111
112 Note: Fasta sequences (chroms) are read in memory one at a time along with the
113 matches for that chromosome.
114 The order of the output is: chroms as they are found in the inut fasta, matches
115 sorted within chroms by positions.
116
117 ARGUMENTS:
118
119 - regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes.
120 - matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg').
121 - noreverse Do not search the reverse complement of the input fasta. Use this flag to search protein sequences.
122 - maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]
123 - seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input.
124
125 EXAMPLE:
126
127 Test data::
128 >mychr
129 ACTGnACTGnACTGnTGAC
130
131 Example1 regex=ACTG::
132
133 mychr 0 4 mychr_0_4_for 4 + ACTG
134 mychr 5 9 mychr_5_9_for 4 + ACTG
135 mychr 10 14 mychr_10_14_for 4 + ACTG
136
137 Example2 regex=ACTG maxstr=3::
138
139 mychr 0 4 mychr_0_4_for 4 + ACT[3,4]
140 mychr 5 9 mychr_5_9_for 4 + ACT[3,4]
141 mychr 10 14 mychr_10_14_for 4 + ACT[3,4]
142
143 Example3 regex=A\w\wG::
144
145 mychr 0 5 mychr_0_5_for 5 + ACTGn
146 mychr 5 10 mychr_5_10_for 5 + ACTGn
147 mychr 10 15 mychr_10_15_for 5 + ACTGn
148
149 ]]></help>
150 <citations>
151 <citation type="bibtex">
152 @misc{githubfastaRegexFinder,
153 author = {Dario Beraldi},
154 year = {2017},
155 title = {fastaRegexFinder},
156 publisher = {GitHub},
157 journal = {GitHub repository},
158 url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder},
159 }</citation>
160 </citations>
161 </tool>