Mercurial > repos > mbernt > fasta_regex_finder
diff fastaregexfinder.xml @ 0:269c627ae9f4 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/fasta_regex_finder commit 8e118a4d24047e2c62912b962e854f789d6ff559
author | mbernt |
---|---|
date | Wed, 20 Jun 2018 11:06:57 -0400 |
parents | |
children | 9a811adb714f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastaregexfinder.xml Wed Jun 20 11:06:57 2018 -0400 @@ -0,0 +1,161 @@ +<tool id="fasta_regex_finder" name="fasta_regex_finder" version="0.1.0"> + <description> + Search in fasta for regexp match + </description> + <requirements> + </requirements> + <version_command>python $__tool_directory__/fastaregexfinder.py --version</version_command> + <command detect_errors="exit_code"><![CDATA[ +python $__tool_directory__/fastaregexfinder.py +--fasta "$input" +--regex "$regex" +#if $settings.advanced == "advanced" + $settings.matchcase + $settings.noreverse + --maxstr $settings.maxstr + #if $settings.seqnames != "" + --seqnames $settings.seqnames + #end if +#end if +--quiet +> $output + ]]></command> + <inputs> + <param type="data" name="input" format="fasta" /> + <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + <conditional name="settings"> + <param name="advanced" type="select" label="Specify advanced parameters"> + <option value="simple" selected="true">No, use program defaults.</option> + <option value="advanced">Yes, see full parameter list.</option> + </param> + <when value="simple"> + </when> + <when value="advanced"> + <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" /> + <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" /> + <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" /> + <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" /> + </outputs> + <tests> + <test> + <param name="input" value="TestSeqGroup-G4.fasta"/> + <output name="output" file="TestSeqGroup-G4.bed"/> + </test> + <test> + <param name="input" value="test.fas"/> + <param name="regex" value="ACTG"/> + <output name="output" file="test-1.bed"/> + </test> + <test> + <param name="input" value="test.fas"/> + <param name="regex" value="ACTG"/> + <param name="advanced" value="advanced"/> + <param name="matchcase" value="--matchcase"/> + <output name="output" file="test-2.bed"/> + </test> + <test> + <param name="input" value="test.fas"/> + <param name="regex" value="ACTG"/> + <param name="advanced" value="advanced"/> + <param name="noreverse" value="--noreverse"/> + <output name="output" file="test-3.bed"/> + </test> + <test> + <param name="input" value="test.fas"/> + <param name="regex" value="ACTG"/> + <param name="advanced" value="advanced"/> + <param name="maxstr" value="3"/> + <output name="output" file="test-4.bed"/> + </test> + <test> + <param name="input" value="TestSeqGroup-G4.fasta"/> + <param name="advanced" value="advanced"/> + <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/> + <output name="output" file="TestSeqGroup-G4-sub.bed"/> + </test> +</tests> + <help><![CDATA[ +DESCRIPTION + +Search a fasta file for matches to a regular expression and return a bed file with the +coordinates of the match and the matched sequence itself. + +Output bed file has columns: + +1. Name of fasta sequence (e.g. chromosome) +2. Start of the match +3. End of the match +4. ID of the match +5. Length of the match +6. Strand +7. Matched sequence as it appears on the forward strand + +For matches on the reverse strand it is reported the start and end position on the +forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT' +present on the reverse strand is reported as ACCCTCCC). + + +Note: Fasta sequences (chroms) are read in memory one at a time along with the +matches for that chromosome. +The order of the output is: chroms as they are found in the inut fasta, matches +sorted within chroms by positions. + +ARGUMENTS: + +- regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes. +- matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg'). +- noreverse Do not search the reverse complement of the input fasta. Use this flag to search protein sequences. +- maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>] +- seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input. + +EXAMPLE: + +Test data:: +>mychr +ACTGnACTGnACTGnTGAC + +Example1 regex=ACTG:: + + mychr 0 4 mychr_0_4_for 4 + ACTG + mychr 5 9 mychr_5_9_for 4 + ACTG + mychr 10 14 mychr_10_14_for 4 + ACTG + +Example2 regex=ACTG maxstr=3:: + + mychr 0 4 mychr_0_4_for 4 + ACT[3,4] + mychr 5 9 mychr_5_9_for 4 + ACT[3,4] + mychr 10 14 mychr_10_14_for 4 + ACT[3,4] + +Example3 regex=A\w\wG:: + + mychr 0 5 mychr_0_5_for 5 + ACTGn + mychr 5 10 mychr_5_10_for 5 + ACTGn + mychr 10 15 mychr_10_15_for 5 + ACTGn + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubfastaRegexFinder, + author = {Dario Beraldi}, + year = {2017}, + title = {fastaRegexFinder}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder}, +}</citation> + </citations> +</tool>