Mercurial > repos > devteam > samtools_slice_bam

--- a/macros.xml	Tue May 09 11:17:27 2017 -0400
+++ b/macros.xml	Tue Sep 28 16:16:30 2021 +0000
@@ -1,11 +1,182 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="1.3.1">samtools</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">samtools</requirement>
             <yield/>
         </requirements>
     </xml>
-    <token name="@TOOL_VERSION@">1.3.1</token>
+    <token name="@TOOL_VERSION@">1.13</token>
+    <token name="@PROFILE@">20.05</token>
+    <token name="@FLAGS@"><![CDATA[
+        #set $flags = 0
+        #if $filter
+            #set $flags = sum(map(int, str($filter).split(',')))
+        #end if
+    ]]></token>
+    <token name="@PREPARE_IDX@"><![CDATA[
+        ##prepare input and indices
+        ln -s '$input' infile &&
+        #if $input.is_of_type('bam'):
+            #if str( $input.metadata.bam_index ) != "None":
+                ln -s '${input.metadata.bam_index}' infile.bai &&
+            #else:
+                samtools index infile infile.bai &&
+            #end if
+        #elif $input.is_of_type('cram'):
+            #if str( $input.metadata.cram_index ) != "None":
+                ln -s '${input.metadata.cram_index}' infile.crai &&
+            #else:
+                samtools index infile infile.crai &&
+            #end if
+        #end if
+    ]]></token>
+    <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[
+        ##prepare input and indices
+        #for $i, $bam in enumerate( $input_bams ):
+            ln -s '$bam' '${i}' &&
+            #if $bam.is_of_type('bam'):
+                #if str( $bam.metadata.bam_index ) != "None":
+                    ln -s '${bam.metadata.bam_index}' '${i}.bai' &&
+                #else:
+                    samtools index '${i}' '${i}.bai' &&
+                #end if
+            #elif $bam.is_of_type('cram'):
+                #if str( $bam.metadata.cram_index ) != "None":
+                    ln -s '${bam.metadata.cram_index}' '${i}.crai' &&
+                #else:
+                    samtools index '${i}' '${i}.crai' &&
+                #end if
+            #end if
+        #end for
+    ]]></token>
+    <token name="@PREPARE_FASTA_IDX@"><![CDATA[
+        ##checks for reference data ($addref_cond.addref_select=="history" or =="cached")
+        ##and sets the -t/-T parameters accordingly:
+        ##- in case of history a symbolic link is used because samtools (view) will generate
+        ##  the index which might not be possible in the directory containing the fasta file
+        ##- in case of cached the absolute path is used which allows to read the cram file
+        ##  without specifying the reference
+        #if $addref_cond.addref_select == "history":
+            ln -s '${addref_cond.ref}' reference.fa &&
+            samtools faidx reference.fa &&
+            #set reffa="reference.fa"
+            #set reffai="reference.fa.fai"
+        #elif $addref_cond.addref_select == "cached":
+            #set reffa=str($addref_cond.ref.fields.path)
+            #set reffai=str($addref_cond.ref.fields.path)+".fai"
+        #else
+            #set reffa=None
+            #set reffai=None
+        #end if
+    ]]></token>
+
+    <xml name="optional_reference">
+        <conditional name="addref_cond">
+            <param name="addref_select" type="select" label="Use a reference sequence">
+                <help>@HELP@</help>
+                <option value="no">No</option>
+                <option value="history">Use a genome/index from the history</option>
+                <option value="cached">Use a built-in genome</option>
+            </param>
+            <when value="no"/>
+            <when value="history">
+                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
+            </when>
+            <when value="cached">
+                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
+                    </options>
+                    <validator  type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="mandatory_reference" token_help="" token_argument="">
+        <conditional name="addref_cond">
+            <param name="addref_select" type="select" label="Use a reference sequence">
+                <help>@HELP@</help>
+                <option value="history">Use a genome/index from the history</option>
+                <option value="cached">Use a built-in genome</option>
+            </param>
+            <when value="history">
+                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
+            </when>
+            <when value="cached">
+                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
+                        <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
+                    </options>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+
+    <token name="@ADDTHREADS@"><![CDATA[
+        ##compute the number of ADDITIONAL threads to be used by samtools (-@)
+        addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
+    ]]></token>
+    <token name="@ADDMEMORY@"><![CDATA[
+        ##compute the number of memory available to samtools sort (-m)
+        ##use only 75% of available: https://github.com/samtools/samtools/issues/831
+        addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
+        ((addmemory=addmemory*75/100)) &&
+    ]]></token>
+    <xml name="seed_input">
+       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
+    </xml>
+    <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false">
+        <option value="1" selected="@S1@">Read is paired</option>
+        <option value="2" selected="@S2@">Read is mapped in a proper pair</option>
+        <option value="4" selected="@S4@">Read is unmapped</option>
+        <option value="8" selected="@S8@">Mate is unmapped</option>
+        <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option>
+        <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option>
+        <option value="64" selected="@S64@">Read is the first in a pair</option>
+        <option value="128" selected="@S128@">Read is the second in a pair</option>
+        <option value="256" selected="@S256@">Alignment of the read is not primary</option>
+        <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option>
+        <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option>
+        <option value="2048" selected="@S2048@">Alignment is supplementary</option>
+    </xml>
+
+    <!-- region specification macros and tokens for tools that allow the specification
+         of region by bed file / space separated list of regions -->
+    <token name="@REGIONS_FILE@"><![CDATA[
+        #if $cond_region.select_region == 'tab':
+            -t '$cond_region.targetregions'
+        #end if
+    ]]></token>
+    <token name="@REGIONS_MANUAL@"><![CDATA[
+        #if $cond_region.select_region == 'text':
+            #for $i, $x in enumerate($cond_region.regions_repeat):
+               '${x.region}'
+            #end for
+        #end if
+    ]]></token>
+    <xml name="regions_macro">
+        <conditional name="cond_region">
+            <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)">
+                <option value="no" selected="True">No</option>
+                <option value="text">Manualy specify regions</option>
+                <option value="tab">Regions from tabular file</option>
+            </param>
+            <when value="no"/>
+            <when value="text">
+                <repeat name="regions_repeat" min="1" default="1" title="Regions">
+                    <param name="region" type="text" label="region" help="format chr:from-to">
+                        <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator>
+                    </param>
+                </repeat>
+            </when>
+            <when value="tab">
+                <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" />
+            </when>
+        </conditional>
+    </xml>
+
     <xml name="citations">
         <citations>
             <citation type="bibtex">
@@ -49,21 +220,4 @@
             <exit_code range="1:" level="fatal" description="Error" />
         </stdio>
     </xml>
-    <token name="@no-chrom-options@">
------
-
-.. class:: warningmark
-
-**No options available? How to re-detect metadata**
-
-If you see a &quot;No options available&quot; within the &quot;**Select references (chromosomes and contigs) you would like to restrict bam to**&quot; drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps:
-
-1. Click on the **pencil** icon adjacent to the dataset in the history
-2. A new menu will appear in the center pane of the interface
-3. Click **Datatype** tab
-4. Set **New Type** to **BAM**
-5. Click **Save**
-
-The medatada will be re-detected and you will be able to see the list of reference sequences in the &quot;**Select references (chromosomes and contigs) you would like to restrict bam to**&quot; drop-down.
-    </token>
 </macros>
--- a/samtools_slice_bam.xml	Tue May 09 11:17:27 2017 -0400
+++ b/samtools_slice_bam.xml	Tue Sep 28 16:16:30 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="samtools_slice_bam" name="Slice" version="2.0.1">
+<tool id="samtools_slice_bam" name="Slice" version="2.0.2" profile="@PROFILE@">
     <description>BAM by genomic regions</description>
     <macros>
         <import>macros.xml</import>
@@ -13,12 +13,12 @@
     ln -s '${input_bam.metadata.bam_index}' temp_input.bam.bai &&

     #if str($slice_method.slice_method_selector) == "bed":
-        samtools view -@ \${GALAXY_SLOTS:-1} -b -L "${input_interval}" -o unsorted_output.bam temp_input.bam &&
+        samtools view -@ \${GALAXY_SLOTS:-1} -b -L "${input_interval}" -o unsorted_output.bam temp_input.bam &&
     #elif str($slice_method.slice_method_selector) == "chr":
-        samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam
+        samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam
         ${ ' '.join( map( lambda x:'"%s"' % ( x ), str( $slice_method.refs ).split(",") ) ) } &&
     #elif str($slice_method.slice_method_selector) == "man":
-        samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam
+        samtools view -@ \${GALAXY_SLOTS:-1} -b -o unsorted_output.bam temp_input.bam

         #for $region in $slice_method.regions:
             "${region.chrom}:${region.start}-${region.end}"
@@ -28,7 +28,7 @@

     samtools sort
         -O bam
-        -T sorted
+        -T "\${TMPDIR:-.}"
         -@ \${GALAXY_SLOTS:-1}
         -o '${output_bam}'
         unsorted_output.bam
@@ -83,13 +83,13 @@
             <param name="input_bam" ftype="bam" value="bam-slice-input.bam" />
             <param name="slice_method_selector" value="bed"/>
             <param name="input_interval" ftype="bed" value="bam-slice.bed" />
-            <output name="output_bam" file="bam-slice-test1.bam" ftype="bam" />
+            <output name="output_bam" file="bam-slice-test1.bam" ftype="bam" lines_diff="4" />
         </test>
         <test>
             <param name="input_bam" ftype="bam" value="bam-slice-input.bam" />
             <param name="slice_method_selector" value="chr"/>
             <param name="refs" value="chrM" />
-            <output name="output_bam" file="bam-slice-test2.bam" ftype="bam" />
+            <output name="output_bam" file="bam-slice-test2.bam" ftype="bam" lines_diff="4" />
         </test>
         <test>
             <param name="input_bam" ftype="bam" value="bam-slice-input.bam" />
@@ -97,7 +97,7 @@
             <param name="chrom" value="chrM" />
             <param name="start" value="1" />
             <param name="end" value="1000" />
-            <output name="output_bam" file="bam-slice-test3.bam" ftype="bam" />
+            <output name="output_bam" file="bam-slice-test3.bam" ftype="bam" lines_diff="4"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -106,7 +106,7 @@
 Allows to restrict (slice) input BAM dataset to a list of intervals defined in a BED file, individual chromosomes, or manually set list of coordinates.
 BED datasets can be obtained from **Get Data -> UCSC Main**.

-This tool is based on ``samtools view`` command.
+This tool is based on ``samtools view`` command.

 @no-chrom-options@
     ]]></help>
Binary file test-data/bam-slice-test1.bam has changed
Binary file test-data/bam-slice-test2.bam has changed
Binary file test-data/bam-slice-test3.bam has changed