Mercurial > repos > mbernt > fasta_regex_finder
view fastaregexfinder.xml @ 1:9a811adb714f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/bioinformatics-cafe commit c318cd3f894f89c13d7eb257678673da70f72212
author | iuc |
---|---|
date | Wed, 25 Jan 2023 14:03:52 +0000 |
parents | 269c627ae9f4 |
children |
line wrap: on
line source
<tool id="fasta_regex_finder" name="Fasta regular expression finder" version="0.1.0"> <description> Search in fasta for regexp match </description> <requirements> <requirement type="package" version="3.8">python</requirement> </requirements> <version_command>python '$__tool_directory__/fastaregexfinder.py' --version</version_command> <command detect_errors="exit_code"><![CDATA[ python '$__tool_directory__/fastaregexfinder.py' --fasta '$input' --regex '$regex' #if $settings.advanced == "advanced" $settings.matchcase $settings.noreverse --maxstr $settings.maxstr #if $settings.seqnames != "" --seqnames $settings.seqnames #end if #end if --quiet > '$output' ]]></command> <inputs> <param type="data" name="input" format="fasta" /> <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> <mapping initial="none"> <add source="'" target="__sq__"/> </mapping> </sanitizer> </param> <conditional name="settings"> <param name="advanced" type="select" label="Specify advanced parameters"> <option value="simple" selected="true">No, use program defaults.</option> <option value="advanced">Yes, see full parameter list.</option> </param> <when value="simple"> </when> <when value="advanced"> <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" /> <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" /> <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" /> <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/> </when> </conditional> </inputs> <outputs> <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" /> </outputs> <tests> <test> <param name="input" value="TestSeqGroup-G4.fasta"/> <output name="output" file="TestSeqGroup-G4.bed"/> </test> <test> <param name="input" value="test.fas"/> <param name="regex" value="ACTG"/> <output name="output" file="test-1.bed"/> </test> <test> <param name="input" value="test.fas"/> <param name="regex" value="ACTG"/> <param name="advanced" value="advanced"/> <param name="matchcase" value="--matchcase"/> <output name="output" file="test-2.bed"/> </test> <test> <param name="input" value="test.fas"/> <param name="regex" value="ACTG"/> <param name="advanced" value="advanced"/> <param name="noreverse" value="--noreverse"/> <output name="output" file="test-3.bed"/> </test> <test> <param name="input" value="test.fas"/> <param name="regex" value="ACTG"/> <param name="advanced" value="advanced"/> <param name="maxstr" value="3"/> <output name="output" file="test-4.bed"/> </test> <test> <param name="input" value="TestSeqGroup-G4.fasta"/> <param name="advanced" value="advanced"/> <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/> <output name="output" file="TestSeqGroup-G4-sub.bed"/> </test> </tests> <help><![CDATA[ DESCRIPTION Search a fasta file for matches to a regular expression and return a bed file with the coordinates of the match and the matched sequence itself. Output bed file has columns: 1. Name of fasta sequence (e.g. chromosome) 2. Start of the match 3. End of the match 4. ID of the match 5. Length of the match 6. Strand 7. Matched sequence as it appears on the forward strand For matches on the reverse strand it is reported the start and end position on the forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT' present on the reverse strand is reported as ACCCTCCC). Note: Fasta sequences (chroms) are read in memory one at a time along with the matches for that chromosome. The order of the output is: chroms as they are found in the inut fasta, matches sorted within chroms by positions. ARGUMENTS: - regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes. - matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg'). - noreverse Do not search the reverse complement of the input fasta. Use this flag to search protein sequences. - maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>] - seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input. EXAMPLE: Test data:: >mychr ACTGnACTGnACTGnTGAC Example1 regex=ACTG:: mychr 0 4 mychr_0_4_for 4 + ACTG mychr 5 9 mychr_5_9_for 4 + ACTG mychr 10 14 mychr_10_14_for 4 + ACTG Example2 regex=ACTG maxstr=3:: mychr 0 4 mychr_0_4_for 4 + ACT[3,4] mychr 5 9 mychr_5_9_for 4 + ACT[3,4] mychr 10 14 mychr_10_14_for 4 + ACT[3,4] Example3 regex=A\w\wG:: mychr 0 5 mychr_0_5_for 5 + ACTGn mychr 5 10 mychr_5_10_for 5 + ACTGn mychr 10 15 mychr_10_15_for 5 + ACTGn ]]></help> <citations> <citation type="bibtex"> @misc{githubfastaRegexFinder, author = {Dario Beraldi}, year = {2017}, title = {fastaRegexFinder}, publisher = {GitHub}, journal = {GitHub repository}, url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder}, }</citation> </citations> </tool>