Mercurial > repos > mbernt > fasta_regex_finder
comparison fastaregexfinder.xml @ 0:269c627ae9f4 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/fasta_regex_finder commit 8e118a4d24047e2c62912b962e854f789d6ff559
author | mbernt |
---|---|
date | Wed, 20 Jun 2018 11:06:57 -0400 |
parents | |
children | 9a811adb714f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:269c627ae9f4 |
---|---|
1 <tool id="fasta_regex_finder" name="fasta_regex_finder" version="0.1.0"> | |
2 <description> | |
3 Search in fasta for regexp match | |
4 </description> | |
5 <requirements> | |
6 </requirements> | |
7 <version_command>python $__tool_directory__/fastaregexfinder.py --version</version_command> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 python $__tool_directory__/fastaregexfinder.py | |
10 --fasta "$input" | |
11 --regex "$regex" | |
12 #if $settings.advanced == "advanced" | |
13 $settings.matchcase | |
14 $settings.noreverse | |
15 --maxstr $settings.maxstr | |
16 #if $settings.seqnames != "" | |
17 --seqnames $settings.seqnames | |
18 #end if | |
19 #end if | |
20 --quiet | |
21 > $output | |
22 ]]></command> | |
23 <inputs> | |
24 <param type="data" name="input" format="fasta" /> | |
25 <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)"> | |
26 <sanitizer> | |
27 <valid initial="string.printable"> | |
28 <remove value="'"/> | |
29 </valid> | |
30 <mapping initial="none"> | |
31 <add source="'" target="__sq__"/> | |
32 </mapping> | |
33 </sanitizer> | |
34 </param> | |
35 <conditional name="settings"> | |
36 <param name="advanced" type="select" label="Specify advanced parameters"> | |
37 <option value="simple" selected="true">No, use program defaults.</option> | |
38 <option value="advanced">Yes, see full parameter list.</option> | |
39 </param> | |
40 <when value="simple"> | |
41 </when> | |
42 <when value="advanced"> | |
43 <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" /> | |
44 <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" /> | |
45 <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" /> | |
46 <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/> | |
47 </when> | |
48 </conditional> | |
49 </inputs> | |
50 <outputs> | |
51 <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" /> | |
52 </outputs> | |
53 <tests> | |
54 <test> | |
55 <param name="input" value="TestSeqGroup-G4.fasta"/> | |
56 <output name="output" file="TestSeqGroup-G4.bed"/> | |
57 </test> | |
58 <test> | |
59 <param name="input" value="test.fas"/> | |
60 <param name="regex" value="ACTG"/> | |
61 <output name="output" file="test-1.bed"/> | |
62 </test> | |
63 <test> | |
64 <param name="input" value="test.fas"/> | |
65 <param name="regex" value="ACTG"/> | |
66 <param name="advanced" value="advanced"/> | |
67 <param name="matchcase" value="--matchcase"/> | |
68 <output name="output" file="test-2.bed"/> | |
69 </test> | |
70 <test> | |
71 <param name="input" value="test.fas"/> | |
72 <param name="regex" value="ACTG"/> | |
73 <param name="advanced" value="advanced"/> | |
74 <param name="noreverse" value="--noreverse"/> | |
75 <output name="output" file="test-3.bed"/> | |
76 </test> | |
77 <test> | |
78 <param name="input" value="test.fas"/> | |
79 <param name="regex" value="ACTG"/> | |
80 <param name="advanced" value="advanced"/> | |
81 <param name="maxstr" value="3"/> | |
82 <output name="output" file="test-4.bed"/> | |
83 </test> | |
84 <test> | |
85 <param name="input" value="TestSeqGroup-G4.fasta"/> | |
86 <param name="advanced" value="advanced"/> | |
87 <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/> | |
88 <output name="output" file="TestSeqGroup-G4-sub.bed"/> | |
89 </test> | |
90 </tests> | |
91 <help><![CDATA[ | |
92 DESCRIPTION | |
93 | |
94 Search a fasta file for matches to a regular expression and return a bed file with the | |
95 coordinates of the match and the matched sequence itself. | |
96 | |
97 Output bed file has columns: | |
98 | |
99 1. Name of fasta sequence (e.g. chromosome) | |
100 2. Start of the match | |
101 3. End of the match | |
102 4. ID of the match | |
103 5. Length of the match | |
104 6. Strand | |
105 7. Matched sequence as it appears on the forward strand | |
106 | |
107 For matches on the reverse strand it is reported the start and end position on the | |
108 forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT' | |
109 present on the reverse strand is reported as ACCCTCCC). | |
110 | |
111 | |
112 Note: Fasta sequences (chroms) are read in memory one at a time along with the | |
113 matches for that chromosome. | |
114 The order of the output is: chroms as they are found in the inut fasta, matches | |
115 sorted within chroms by positions. | |
116 | |
117 ARGUMENTS: | |
118 | |
119 - regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes. | |
120 - matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg'). | |
121 - noreverse Do not search the reverse complement of the input fasta. Use this flag to search protein sequences. | |
122 - maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>] | |
123 - seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input. | |
124 | |
125 EXAMPLE: | |
126 | |
127 Test data:: | |
128 >mychr | |
129 ACTGnACTGnACTGnTGAC | |
130 | |
131 Example1 regex=ACTG:: | |
132 | |
133 mychr 0 4 mychr_0_4_for 4 + ACTG | |
134 mychr 5 9 mychr_5_9_for 4 + ACTG | |
135 mychr 10 14 mychr_10_14_for 4 + ACTG | |
136 | |
137 Example2 regex=ACTG maxstr=3:: | |
138 | |
139 mychr 0 4 mychr_0_4_for 4 + ACT[3,4] | |
140 mychr 5 9 mychr_5_9_for 4 + ACT[3,4] | |
141 mychr 10 14 mychr_10_14_for 4 + ACT[3,4] | |
142 | |
143 Example3 regex=A\w\wG:: | |
144 | |
145 mychr 0 5 mychr_0_5_for 5 + ACTGn | |
146 mychr 5 10 mychr_5_10_for 5 + ACTGn | |
147 mychr 10 15 mychr_10_15_for 5 + ACTGn | |
148 | |
149 ]]></help> | |
150 <citations> | |
151 <citation type="bibtex"> | |
152 @misc{githubfastaRegexFinder, | |
153 author = {Dario Beraldi}, | |
154 year = {2017}, | |
155 title = {fastaRegexFinder}, | |
156 publisher = {GitHub}, | |
157 journal = {GitHub repository}, | |
158 url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder}, | |
159 }</citation> | |
160 </citations> | |
161 </tool> |