# HG changeset patch # User iuc # Date 1673438633 0 # Node ID 9dbf707bebb0323cd1f0e91d8d65944fdbeabeda # Parent 09470ab960f165e35ed1ac6e0472633ab9d2902d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/bamtools/bamtools_split_ref commit a14db40361bcb2ee608bccd9222e1654aaea3324-dirty diff -r 09470ab960f1 -r 9dbf707bebb0 bamtools_split.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools_split.xml Wed Jan 11 12:03:53 2023 +0000 @@ -0,0 +1,119 @@ + + BAM datasets on variety of attributes + + macros.xml + + + + $report && + #for $bam_count, $input_bam in enumerate( $input_bams ): + ln -s "${input_bam}" "localbam_${bam_count}.bam" && + ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && + #end for + bamtools + split + #if str ( $analysis_type.analysis_type_selector ) == "-tag" : + ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}" + #else + ${analysis_type.analysis_type_selector} + #end if + -stub split_bam + #for $bam_count, $input_bam in enumerate( $input_bams ): + -in "localbam_${bam_count}.bam" + #end for + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + +**DANGER: Multiple Outputs** + +As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing. + +----- + +**How it works** + +The following options can be specified via "**Split BAM dataset(s) by**" dropdown:: + + Mapping status (-mapped) split mapped/unmapped and generate two output files + named (MAPPED) and (UNMAPPED) containing mapped and unmapped + reads, respectively. + + Pairing status (-paired) split single-end/paired-end alignments and generate two output files + named (SINGLE_END) and (PAIRED_END) containing paired and unpaired + reads, respectively. + + Reference name (-reference) split alignments by reference name. In cases of unfinished genomes with + very large number of reference sequences (scaffolds) it can generate + thousands (if not millions) of output datasets. + + Specific tag (-tag) split alignments based on all values of TAG encountered. Choosing this + option from the menu will allow you to enter the tag name. As was the + case with the reference splitting above, this option can produce very + large number of outputs if a tag has a large number of unique values. + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 09470ab960f1 -r 9dbf707bebb0 bamtools_split_mapped.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools_split_mapped.xml Wed Jan 11 12:03:53 2023 +0000 @@ -0,0 +1,56 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + + +**How it works** + +Splits the input BAM file into 2 output files named (MAPPED) and (UNMAPPED) containing mapped and unmapped reads, respectively. + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 09470ab960f1 -r 9dbf707bebb0 bamtools_split_paired.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools_split_paired.xml Wed Jan 11 12:03:53 2023 +0000 @@ -0,0 +1,57 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + + +**How it works** + + +Splits the input BAM file into 2 output files named (SINGLE_END) and (PAIRED_END) containing single_end and paired_end reads, respectively. + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 09470ab960f1 -r 9dbf707bebb0 bamtools_split_ref.xml --- a/bamtools_split_ref.xml Sat Nov 27 10:03:33 2021 +0000 +++ b/bamtools_split_ref.xml Wed Jan 11 12:03:53 2023 +0000 @@ -1,32 +1,50 @@ - + into dataset list collection macros.xml - - - = 0 else n for n in str($input_bam.metadata.reference_names).split(',')]) - #end if - && mkdir -p outputs - && (export I=0; - for i in $ref_list; - do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`; - if [ -e \$SN ]; then FN=`printf "outputs/split_bam%05d%s.%s.bam" \$((I)) '$name' "\$i"`; mv \$SN \$FN; fi; - done) - ]]> - + + samtools + + + + #import re +## need to extract ref names from Galaxy's safe string representation +#set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')] +#if str($refs) != 'None' +#set $refs_selected = set(str($refs).split(",")) +## sort the selected refs by their order in the bam metadata +#echo ','.join([r for r in $ref_list if r in refs_selected]) +#else +#echo ','.join($ref_list) +#end if + + + + #import re +## need to extract ref names from Galaxy's safe string representation +#set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')] +#if str($refs) != 'None' +#set $refs_selected = set(str($refs).split(",")) +#set $ref_list = [r for r in $ref_list if r in refs_selected] +#end if +#set $elems = [{'name': '%s: %s' % ($input_bam.name, r), 'filename': 'split_bam.REF_%s.bam' % r, 'dbkey': str($input_bam.dbkey)} for r in $ref_list] +#import json +#echo json.dumps({'output_bams': {'elements': $elems}}) + @@ -39,40 +57,44 @@ - - + + - - - + + + + + + + + + + + + + + **What is does** -BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). - ------ - -.. class:: warningmark - -**DANGER: Multiple Outputs** - -As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing. +BAMTools split is a utility for splitting BAM files. It is based on the BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). ----- **How it works** -Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references. +Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references and will consist of as many elements as there are references selected or listed in the input BAM header. -In cases of unfinished genomes with very large number of reference sequences (scaffolds) -it can generate thousands (if not millions) of output datasets. +.. class:: warningmark + In cases of unfinished genomes with very large number of reference sequences (scaffolds) + this could generate thousands (if not millions) of output datasets. ----- diff -r 09470ab960f1 -r 9dbf707bebb0 bamtools_split_tag.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamtools_split_tag.xml Wed Jan 11 12:03:53 2023 +0000 @@ -0,0 +1,71 @@ + + into dataset list collection + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + +**What is does** + +BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). + +----- + +.. class:: warningmark + +**DANGER: Multiple Outputs** + +As described below, splitting a BAM dataset(s) on tag value can produce very large numbers of outputs. Read below and know what you are doing. + +----- + +**How it works** + +Split alignments by tag name into a dataset list collection. + +This can generate a huge number of output datasets depending on the number of distinct values of the TAG. + + +----- + +.. class:: infomark + +**More information** + +Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki + + + + 10.1093/bioinformatics/btr174 + + diff -r 09470ab960f1 -r 9dbf707bebb0 macros.xml --- a/macros.xml Sat Nov 27 10:03:33 2021 +0000 +++ b/macros.xml Wed Jan 11 12:03:53 2023 +0000 @@ -1,11 +1,11 @@ - 2.5.1 + 2.5.2 0 bamtools - samtools + @@ -13,4 +13,4 @@ 10.1093/bioinformatics/btr174 - \ No newline at end of file + diff -r 09470ab960f1 -r 9dbf707bebb0 test-data/bamtools_input2.chr21.bam Binary file test-data/bamtools_input2.chr21.bam has changed diff -r 09470ab960f1 -r 9dbf707bebb0 test-data/bamtools_input2.header.bam Binary file test-data/bamtools_input2.header.bam has changed