diff fastaregexfinder.xml @ 0:269c627ae9f4 draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/fasta_regex_finder commit 8e118a4d24047e2c62912b962e854f789d6ff559
author mbernt
date Wed, 20 Jun 2018 11:06:57 -0400
parents
children 9a811adb714f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaregexfinder.xml	Wed Jun 20 11:06:57 2018 -0400
@@ -0,0 +1,161 @@
+<tool id="fasta_regex_finder" name="fasta_regex_finder" version="0.1.0">
+    <description>
+        Search in fasta for regexp match
+    </description>
+    <requirements>
+    </requirements>
+    <version_command>python $__tool_directory__/fastaregexfinder.py --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+python $__tool_directory__/fastaregexfinder.py 
+--fasta "$input"
+--regex "$regex"
+#if $settings.advanced == "advanced"
+    $settings.matchcase
+    $settings.noreverse
+    --maxstr $settings.maxstr
+    #if $settings.seqnames != ""
+        --seqnames $settings.seqnames
+    #end if
+#end if
+--quiet
+> $output
+    ]]></command>
+    <inputs>
+        <param type="data" name="input" format="fasta" />
+        <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+                <mapping initial="none">
+                    <add source="&apos;" target="__sq__"/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <conditional name="settings">
+            <param name="advanced" type="select" label="Specify advanced parameters">
+                <option value="simple" selected="true">No, use program defaults.</option>
+                <option value="advanced">Yes, see full parameter list.</option>
+            </param>
+            <when value="simple">
+            </when>
+            <when value="advanced">
+                <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" />
+                <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" />
+                <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" />
+                <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <output name="output" file="TestSeqGroup-G4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <output name="output" file="test-1.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="matchcase" value="--matchcase"/>
+            <output name="output" file="test-2.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="noreverse" value="--noreverse"/>
+            <output name="output" file="test-3.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="maxstr" value="3"/>
+            <output name="output" file="test-4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <param name="advanced" value="advanced"/>
+            <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/>
+            <output name="output" file="TestSeqGroup-G4-sub.bed"/>
+        </test>
+</tests>
+    <help><![CDATA[
+DESCRIPTION
+    
+Search a fasta file for matches to a regular expression and return a bed file with the
+coordinates of the match and the matched sequence itself. 
+    
+Output bed file has columns:
+
+1. Name of fasta sequence (e.g. chromosome)
+2. Start of the match
+3. End of the match
+4. ID of the match
+5. Length of the match
+6. Strand 
+7. Matched sequence as it appears on the forward strand
+    
+For matches on the reverse strand it is reported the start and end position on the
+forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT'
+present on the reverse strand is reported as ACCCTCCC).
+    
+
+Note: Fasta sequences (chroms) are read in memory one at a time along with the
+matches for that chromosome.
+The order of the output is: chroms as they are found in the inut fasta, matches
+sorted within chroms by positions.
+
+ARGUMENTS:
+
+- regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes.
+- matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg').
+- noreverse           Do not search the reverse complement of the input fasta. Use this flag to search protein sequences.                                   
+- maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]
+- seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input.
+
+EXAMPLE:
+
+Test data::
+>mychr
+ACTGnACTGnACTGnTGAC
+
+Example1 regex=ACTG::
+
+        mychr	0	4	mychr_0_4_for	4	+	ACTG
+        mychr	5	9	mychr_5_9_for	4	+	ACTG
+        mychr	10	14	mychr_10_14_for	4	+	ACTG
+
+Example2 regex=ACTG maxstr=3::
+
+        mychr	0	4	mychr_0_4_for	4	+	ACT[3,4]
+        mychr	5	9	mychr_5_9_for	4	+	ACT[3,4]
+        mychr	10	14	mychr_10_14_for	4	+	ACT[3,4]
+    
+Example3 regex=A\w\wG::
+
+        mychr	0	5	mychr_0_5_for	5	+	ACTGn
+        mychr	5	10	mychr_5_10_for	5	+	ACTGn
+        mychr	10	15	mychr_10_15_for	5	+	ACTGn
+
+   ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubfastaRegexFinder,
+  author = {Dario Beraldi},
+  year = {2017},
+  title = {fastaRegexFinder},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder},
+}</citation>
+    </citations>
+</tool>