Mercurial > repos > iuc > bbtools_bbmap
diff bbmap.xml @ 0:07a6e49c7d74 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bbtools commit 3682ff4e2e47438e975fc04f92469eca7814fcfa"
author | iuc |
---|---|
date | Mon, 04 Oct 2021 12:14:47 +0000 |
parents | |
children | e0ca2ec4f5d9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bbmap.xml Mon Oct 04 12:14:47 2021 +0000 @@ -0,0 +1,157 @@ +<tool id="bbtools_bbmap" name="BBTools: BBMap" version="@WRAPPER_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>short-read aligner</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +#import os +#import re + +#if str($ref_source_cond.ref_source) == 'cached' + #set ref = str($ref_source_cond.reference.fields.path) +#else: + #set ref = $ref_source_cond.reference +#end if + +#if str($input_type_cond.input_type) in ['single', 'pair']: + #set read1 = $input_type_cond.read1 + #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) + ## bbmap uses the file extension to determine the input format. + #set ext = $read1_identifier + '.fastq' + #if $read1.ext.endswith('.gz'): + #set ext = $ext + '.gz' + #end if + #set read1_file = $read1_identifier + $ext + ln -s '${read1}' '${read1_file}' && + #if str($input_type_cond.input_type) == 'pair': + #set read2 = $input_type_cond.read2 + #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) + #set read2_file = $read2_identifier + $ext + ln -s '${read2}' '${read2_file}' && + #end if +#else: + #set read1 = $input_type_cond.reads_collection['forward'] + #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name)) + ## bbmap uses the file extension to determine the input format. + #set ext = $read1_identifier + '.fastq' + #if $read1.ext.endswith('.gz'): + #set ext = $ext + '.gz' + #end if + #set read1_file = $read1_identifier + $ext + ln -s '${read1}' '${read1_file}' && + #set read2 = $input_type_cond.reads_collection['reverse'] + #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name)) + #set read2_file = $read2_identifier + $ext + ln -s '${read2}' '${read2_file}' && +#end if + +bbmap.sh t=\${GALAXY_SLOTS:-4} ref='${ref}' +#if str($input_type_cond.input_type) == 'single': + in='${read1_file}' +#else: + in1='${read1_file}' in2='${read2_file}' +#end if +#if str($output_sort) == 'coordinate': + out='mapped.bam'; samtools sort -@\${GALAXY_SLOTS:-4} -T "\${TMPDIR:-.}" -O bam -o '$output' 'mapped.bam' +#elif str($output_sort) == 'name': + out='mapped.bam'; samtools sort -n -@\${GALAXY_SLOTS:-4} -T '\${TMPDIR:-.}' -O bam -o '$output' 'mapped.bam' +#else: + out='mapped.bam' && mv 'mapped.bam' '$output' +#end if +]]></command> + <inputs> + <conditional name="input_type_cond"> + <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> + <option value="single" selected="true">Single dataset</option> + <option value="pair">Dataset pair</option> + <option value="paired">List of dataset pairs</option> + </param> + <when value="single"> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + </when> + <when value="pair"> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> + </when> + <when value="paired"> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> + </when> + </conditional> + <expand macro="reference_source_cond"/> + <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can significantly extend the run time of the tool (it runs using a single thread)."> + <option value="coordinate" selected="True">Sort by chromosomal coordinates</option> + <option value="name">Sort by read names</option> + <option value="unsorted">Not sorted (sorted as input)</option> + </param> + </inputs> + <outputs> + <data format="bam" name="output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> + <expand macro="dbKeyActionsBBMap"/> + <change_format> + <when input="output_sort" value="name" format="qname_sorted.bam" /> + <when input="output_sort" value="unsorted" format="qname_input_sorted.bam" /> + </change_format> + </data> + </outputs> + <tests> + <!-- Single file, cached reference, output coordinate sorted --> + <test expect_num_outputs="1"> + <param name="input_type" value="single"/> + <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> + <output name="output" file="output1.bam" ftype="bam" lines_diff="4"> + <metadata name="dbkey" value="89" /> + </output> + </test> + <!-- Paired reads in separate datasets, cached reference, output name sorted --> + <test expect_num_outputs="1"> + <param name="input_type" value="pair"/> + <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> + <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/> + <param name="output_sort" value="name"/> + <output name="output" file="output2.bam" ftype="qname_sorted.bam" lines_diff="4"> + <metadata name="dbkey" value="89" /> + </output> + </test> + <!-- Collection of Paired reads, history reference, output unsorted --> + <test expect_num_outputs="1"> + <param name="input_type" value="paired"/> + <param name="reads_collection"> + <collection type="paired"> + <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> + <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/> + </collection> + </param> + <param name="ref_source" value="history"/> + <param name="reference" value="NC_002945v4.fasta" dbkey="89" ftype="fasta"/> + <param name="output_sort" value="unsorted"/> + <output name="output" file="output3.bam" ftype="qname_input_sorted.bam" lines_diff="4"> + <metadata name="dbkey" value="89" /> + </output> + </test> + </tests> + <help> +**What it does** + +BBMap is a splice-aware global aligner for DNA and RNA sequencing reads. It is fast and extremely accurate, particularly +with highly mutated genomes or reads with long indels, even whole-gene deletions over 100kbp long. It has no upper limit +to genome size or number of contigs and has been successfully used for mapping to an 85 gigabase soil metagenome with over +200 million contigs. the indexing phase is very fast compared to other aligners. + +BBMap can output many different statistics files; an empirical read quality histogram, insert-size distribution, and genome +coverage with or without generating a sam file. It is useful in quality control of libraries and sequencing runs or +evaluating new sequencing platforms. + +**Options** + + *Bam sorting mode* - the generated bam files can be sorted according to three criteria: coordinates, names and input order. + + * Sort by chromosomal coordinates - the file is sorted by coordinates (i.e., the reads from the beginning of the first + chromosome are first in the file. + * Sort by read names - the file is sorted by the reference ID (i.e., the QNAME field). + * Not sorted (sorted as input) - the file is sorted in the order of the reads in the input file. + + </help> + <expand macro="citations"/> +</tool> +