Mercurial > repos > devteam > samtools_slice_bam
changeset 4:344fc91e1bfd draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_slice_bam commit 0f75269223c0821c6c82acf98fde947d0f816f2b"
author | iuc |
---|---|
date | Tue, 28 Sep 2021 16:16:30 +0000 |
parents | a4a10c7924d1 |
children | b5c88b7b153e |
files | macros.xml samtools_slice_bam.xml test-data/bam-slice-test1.bam test-data/bam-slice-test2.bam test-data/bam-slice-test3.bam |
diffstat | 5 files changed, 182 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue May 09 11:17:27 2017 -0400 +++ b/macros.xml Tue Sep 28 16:16:30 2021 +0000 @@ -1,11 +1,182 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="1.3.1">samtools</requirement> + <requirement type="package" version="@TOOL_VERSION@">samtools</requirement> <yield/> </requirements> </xml> - <token name="@TOOL_VERSION@">1.3.1</token> + <token name="@TOOL_VERSION@">1.13</token> + <token name="@PROFILE@">20.05</token> + <token name="@FLAGS@"><![CDATA[ + #set $flags = 0 + #if $filter + #set $flags = sum(map(int, str($filter).split(','))) + #end if + ]]></token> + <token name="@PREPARE_IDX@"><![CDATA[ + ##prepare input and indices + ln -s '$input' infile && + #if $input.is_of_type('bam'): + #if str( $input.metadata.bam_index ) != "None": + ln -s '${input.metadata.bam_index}' infile.bai && + #else: + samtools index infile infile.bai && + #end if + #elif $input.is_of_type('cram'): + #if str( $input.metadata.cram_index ) != "None": + ln -s '${input.metadata.cram_index}' infile.crai && + #else: + samtools index infile infile.crai && + #end if + #end if + ]]></token> + <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[ + ##prepare input and indices + #for $i, $bam in enumerate( $input_bams ): + ln -s '$bam' '${i}' && + #if $bam.is_of_type('bam'): + #if str( $bam.metadata.bam_index ) != "None": + ln -s '${bam.metadata.bam_index}' '${i}.bai' && + #else: + samtools index '${i}' '${i}.bai' && + #end if + #elif $bam.is_of_type('cram'): + #if str( $bam.metadata.cram_index ) != "None": + ln -s '${bam.metadata.cram_index}' '${i}.crai' && + #else: + samtools index '${i}' '${i}.crai' && + #end if + #end if + #end for + ]]></token> + <token name="@PREPARE_FASTA_IDX@"><![CDATA[ + ##checks for reference data ($addref_cond.addref_select=="history" or =="cached") + ##and sets the -t/-T parameters accordingly: + ##- in case of history a symbolic link is used because samtools (view) will generate + ## the index which might not be possible in the directory containing the fasta file + ##- in case of cached the absolute path is used which allows to read the cram file + ## without specifying the reference + #if $addref_cond.addref_select == "history": + ln -s '${addref_cond.ref}' reference.fa && + samtools faidx reference.fa && + #set reffa="reference.fa" + #set reffai="reference.fa.fai" + #elif $addref_cond.addref_select == "cached": + #set reffa=str($addref_cond.ref.fields.path) + #set reffai=str($addref_cond.ref.fields.path)+".fai" + #else + #set reffa=None + #set reffai=None + #end if + ]]></token> + + <xml name="optional_reference"> + <conditional name="addref_cond"> + <param name="addref_select" type="select" label="Use a reference sequence"> + <help>@HELP@</help> + <option value="no">No</option> + <option value="history">Use a genome/index from the history</option> + <option value="cached">Use a built-in genome</option> + </param> + <when value="no"/> + <when value="history"> + <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/> + </when> + <when value="cached"> + <param name="ref" argument="@ARGUMENT@" type="select" label="Reference"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/> + </options> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/> + </param> + </when> + </conditional> + </xml> + <xml name="mandatory_reference" token_help="" token_argument=""> + <conditional name="addref_cond"> + <param name="addref_select" type="select" label="Use a reference sequence"> + <help>@HELP@</help> + <option value="history">Use a genome/index from the history</option> + <option value="cached">Use a built-in genome</option> + </param> + <when value="history"> + <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/> + </when> + <when value="cached"> + <param name="ref" argument="@ARGUMENT@" type="select" label="Reference"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/> + <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> + </options> + </param> + </when> + </conditional> + </xml> + + + <token name="@ADDTHREADS@"><![CDATA[ + ##compute the number of ADDITIONAL threads to be used by samtools (-@) + addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) && + ]]></token> + <token name="@ADDMEMORY@"><![CDATA[ + ##compute the number of memory available to samtools sort (-m) + ##use only 75% of available: https://github.com/samtools/samtools/issues/831 + addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && + ((addmemory=addmemory*75/100)) && + ]]></token> + <xml name="seed_input"> + <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> + </xml> + <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false"> + <option value="1" selected="@S1@">Read is paired</option> + <option value="2" selected="@S2@">Read is mapped in a proper pair</option> + <option value="4" selected="@S4@">Read is unmapped</option> + <option value="8" selected="@S8@">Mate is unmapped</option> + <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option> + <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option> + <option value="64" selected="@S64@">Read is the first in a pair</option> + <option value="128" selected="@S128@">Read is the second in a pair</option> + <option value="256" selected="@S256@">Alignment of the read is not primary</option> + <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option> + <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option> + <option value="2048" selected="@S2048@">Alignment is supplementary</option> + </xml> + + <!-- region specification macros and tokens for tools that allow the specification + of region by bed file / space separated list of regions --> + <token name="@REGIONS_FILE@"><![CDATA[ + #if $cond_region.select_region == 'tab': + -t '$cond_region.targetregions' + #end if + ]]></token> + <token name="@REGIONS_MANUAL@"><![CDATA[ + #if $cond_region.select_region == 'text': + #for $i, $x in enumerate($cond_region.regions_repeat): + '${x.region}' + #end for + #end if + ]]></token> + <xml name="regions_macro"> + <conditional name="cond_region"> + <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)"> + <option value="no" selected="True">No</option> + <option value="text">Manualy specify regions</option> + <option value="tab">Regions from tabular file</option> + </param> + <when value="no"/> + <when value="text"> + <repeat name="regions_repeat" min="1" default="1" title="Regions"> + <param name="region" type="text" label="region" help="format chr:from-to"> + <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator> + </param> + </repeat> + </when> + <when value="tab"> + <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" /> + </when> + </conditional> + </xml> + <xml name="citations"> <citations> <citation type="bibtex"> @@ -49,21 +220,4 @@ <exit_code range="1:" level="fatal" description="Error" /> </stdio> </xml> - <token name="@no-chrom-options@"> ------ - -.. class:: warningmark - -**No options available? How to re-detect metadata** - -If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: - -1. Click on the **pencil** icon adjacent to the dataset in the history -2. A new menu will appear in the center pane of the interface -3. Click **Datatype** tab -4. Set **New Type** to **BAM** -5. Click **Save** - -The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. - </token> </macros>
--- a/samtools_slice_bam.xml Tue May 09 11:17:27 2017 -0400 +++ b/samtools_slice_bam.xml Tue Sep 28 16:16:30 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="samtools_slice_bam" name="Slice" version="2.0.1"> +<tool id="samtools_slice_bam" name="Slice" version="2.0.2" profile="@PROFILE@"> <description>BAM by genomic regions</description> <macros> <import>macros.xml</import> @@ -13,12 +13,12 @@ ln -s '${input_bam.metadata.bam_index}' temp_input.bam.bai && #if str($slice_method.slice_method_selector) == "bed": - samtools view -@ \${GALAXY_SLOTS:-1} -b -L "${input_interval}" -o unsorted_output.bam temp_input.bam && + samtools view -@ \${GALAXY_SLOTS:-1} -b -L "${input_interval}" -o unsorted_output.bam temp_input.bam && #elif str($slice_method.slice_method_selector) == "chr": - samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam + samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam ${ ' '.join( map( lambda x:'"%s"' % ( x ), str( $slice_method.refs ).split(",") ) ) } && #elif str($slice_method.slice_method_selector) == "man": - samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam + samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam #for $region in $slice_method.regions: "${region.chrom}:${region.start}-${region.end}" @@ -28,7 +28,7 @@ samtools sort -O bam - -T sorted + -T "\${TMPDIR:-.}" -@ \${GALAXY_SLOTS:-1} -o '${output_bam}' unsorted_output.bam @@ -83,13 +83,13 @@ <param name="input_bam" ftype="bam" value="bam-slice-input.bam" /> <param name="slice_method_selector" value="bed"/> <param name="input_interval" ftype="bed" value="bam-slice.bed" /> - <output name="output_bam" file="bam-slice-test1.bam" ftype="bam" /> + <output name="output_bam" file="bam-slice-test1.bam" ftype="bam" lines_diff="4" /> </test> <test> <param name="input_bam" ftype="bam" value="bam-slice-input.bam" /> <param name="slice_method_selector" value="chr"/> <param name="refs" value="chrM" /> - <output name="output_bam" file="bam-slice-test2.bam" ftype="bam" /> + <output name="output_bam" file="bam-slice-test2.bam" ftype="bam" lines_diff="4" /> </test> <test> <param name="input_bam" ftype="bam" value="bam-slice-input.bam" /> @@ -97,7 +97,7 @@ <param name="chrom" value="chrM" /> <param name="start" value="1" /> <param name="end" value="1000" /> - <output name="output_bam" file="bam-slice-test3.bam" ftype="bam" /> + <output name="output_bam" file="bam-slice-test3.bam" ftype="bam" lines_diff="4"/> </test> </tests> <help><![CDATA[ @@ -106,7 +106,7 @@ Allows to restrict (slice) input BAM dataset to a list of intervals defined in a BED file, individual chromosomes, or manually set list of coordinates. BED datasets can be obtained from **Get Data -> UCSC Main**. -This tool is based on ``samtools view`` command. +This tool is based on ``samtools view`` command. @no-chrom-options@ ]]></help>