Mercurial > repos > iuc > bamtools_split_ref
comparison bamtools_split_ref.xml @ 1:9dbf707bebb0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/bamtools/bamtools_split_ref commit a14db40361bcb2ee608bccd9222e1654aaea3324-dirty
author | iuc |
---|---|
date | Wed, 11 Jan 2023 12:03:53 +0000 |
parents | 09470ab960f1 |
children | 9b520009db81 |
comparison
equal
deleted
inserted
replaced
0:09470ab960f1 | 1:9dbf707bebb0 |
---|---|
1 <tool id="bamtools_split_ref" name="Split BAM by Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | 1 <tool id="bamtools_split_ref" name="Split BAM by Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09"> |
2 <description>into dataset list collection</description> | 2 <description>into dataset list collection</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements" /> | 6 <expand macro="requirements"> |
7 <command> | 7 <requirement type="package" version="1.16.1">samtools</requirement> |
8 <![CDATA[ | 8 </expand> |
9 ln -s '${input_bam}' 'localbam.bam' && | 9 <command><![CDATA[ |
10 ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' && | 10 ln -s '${input_bam}' localbam.bam && |
11 bamtools split -reference | 11 ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' && |
12 -in localbam.bam | 12 samtools view -bH localbam.bam --no-PG -o header.bam && |
13 -stub split_bam | 13 bamtools split -reference |
14 ## Preserve order from metadata in the output collection | 14 -in localbam.bam |
15 #import re | 15 -stub split_bam |
16 #set $name = $re.sub('\W','_',$re.sub('\.bam$','',$input_bam.name)) | 16 && (IFS=','; |
17 #if str($refs) != 'None': | 17 for i in \$REFS_FROM_BAM_METADATA; |
18 #set $ref_list = ' '.join(str($refs).split(",")) | 18 do FN=`printf "split_bam.REF_%s.bam" "\$i"`; |
19 #else | 19 if [ ! -f \$FN ]; then cp header.bam "\$FN"; fi; |
20 #set $ref_list = ' '.join([$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]) | 20 done) |
21 #end if | 21 && cp '$c1' galaxy.json |
22 && mkdir -p outputs | 22 ]]></command> |
23 && (export I=0; | 23 <environment_variables> |
24 for i in $ref_list; | 24 <environment_variable name="REFS_FROM_BAM_METADATA">#import re |
25 do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`; | 25 ## need to extract ref names from Galaxy's safe string representation |
26 if [ -e \$SN ]; then FN=`printf "outputs/split_bam%05d%s.%s.bam" \$((I)) '$name' "\$i"`; mv \$SN \$FN; fi; | 26 #set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')] |
27 done) | 27 #if str($refs) != 'None' |
28 ]]> | 28 #set $refs_selected = set(str($refs).split(",")) |
29 </command> | 29 ## sort the selected refs by their order in the bam metadata |
30 #echo ','.join([r for r in $ref_list if r in refs_selected]) | |
31 #else | |
32 #echo ','.join($ref_list) | |
33 #end if | |
34 </environment_variable> | |
35 </environment_variables> | |
36 <configfiles> | |
37 <configfile name="c1">#import re | |
38 ## need to extract ref names from Galaxy's safe string representation | |
39 #set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')] | |
40 #if str($refs) != 'None' | |
41 #set $refs_selected = set(str($refs).split(",")) | |
42 #set $ref_list = [r for r in $ref_list if r in refs_selected] | |
43 #end if | |
44 #set $elems = [{'name': '%s: %s' % ($input_bam.name, r), 'filename': 'split_bam.REF_%s.bam' % r, 'dbkey': str($input_bam.dbkey)} for r in $ref_list] | |
45 #import json | |
46 #echo json.dumps({'output_bams': {'elements': $elems}})</configfile> | |
47 </configfiles> | |
30 <inputs> | 48 <inputs> |
31 <param name="input_bam" type="data" format="bam" label="BAM dataset to split by reference"/> | 49 <param name="input_bam" type="data" format="bam" label="BAM dataset to split by reference"/> |
32 <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" > | 50 <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" > |
33 <help><![CDATA[Click and type in the box above to see options. You can select multiple entries. | 51 <help><![CDATA[Click and type in the box above to see options. You can select multiple entries. |
34 If "No options available" is displayed, you need to re-detect metadata on the input dataset. | 52 If "No options available" is displayed, you need to re-detect metadata on the input dataset. |
37 <filter type="data_meta" ref="input_bam" key="reference_names" /> | 55 <filter type="data_meta" ref="input_bam" key="reference_names" /> |
38 </options> | 56 </options> |
39 </param> | 57 </param> |
40 </inputs> | 58 </inputs> |
41 <outputs> | 59 <outputs> |
42 <collection name="output_bams" type="list" label="${input_bam.name} Split List"> | 60 <collection name="output_bams" type="list"> |
43 <discover_datasets pattern="split_bam\d*(?P<designation>.+)\.bam" ext="bam" directory="outputs" visible="false"/> | 61 <discover_datasets from_provided_metadata="true" ext="bam" visible="false" /> |
44 </collection> | 62 </collection> |
45 </outputs> | 63 </outputs> |
46 <tests> | 64 <tests> |
47 <test> | 65 <test> |
48 <param name="input_bam" ftype="bam" value="bamtools-input2.bam"/> | 66 <param name="input_bam" ftype="bam" value="bamtools-input2.bam" /> |
49 <output_collection name="output_bams" type="list"> | 67 <output_collection name="output_bams" type="list" count="25"> |
50 <element name="bamtools_input2.chr1" file="bamtools_input2.chr1" compare="sim_size" delta="500" /> | 68 <element name="bamtools-input2.bam: chrM" file="bamtools_input2.header.bam" ftype="bam" /> |
69 <element name="bamtools-input2.bam: chr1" file="bamtools_input2.chr1" ftype="bam" /> | |
70 <element name="bamtools-input2.bam: chr21" file="bamtools_input2.chr21.bam" ftype="bam" /> | |
71 </output_collection> | |
72 </test> | |
73 <test> | |
74 <param name="input_bam" ftype="bam" value="bamtools-input2.bam" /> | |
75 <param name="refs" value="chrM,chr1,chr21" /> | |
76 <output_collection name="output_bams" type="list" count="3"> | |
77 <element name="bamtools-input2.bam: chrM" file="bamtools_input2.header.bam" ftype="bam" /> | |
78 <element name="bamtools-input2.bam: chr1" file="bamtools_input2.chr1" ftype="bam" /> | |
79 <element name="bamtools-input2.bam: chr21" file="bamtools_input2.chr21.bam" ftype="bam" /> | |
51 </output_collection> | 80 </output_collection> |
52 </test> | 81 </test> |
53 </tests> | 82 </tests> |
54 <help> | 83 <help> |
55 **What is does** | 84 **What is does** |
56 | 85 |
57 BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). | 86 BAMTools split is a utility for splitting BAM files. It is based on the BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). |
58 | |
59 ----- | |
60 | |
61 .. class:: warningmark | |
62 | |
63 **DANGER: Multiple Outputs** | |
64 | |
65 As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing. | |
66 | 87 |
67 ----- | 88 ----- |
68 | 89 |
69 **How it works** | 90 **How it works** |
70 | 91 |
71 Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references. | 92 Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references and will consist of as many elements as there are references selected or listed in the input BAM header. |
72 | 93 |
73 In cases of unfinished genomes with very large number of reference sequences (scaffolds) | 94 .. class:: warningmark |
74 it can generate thousands (if not millions) of output datasets. | |
75 | 95 |
96 In cases of unfinished genomes with very large number of reference sequences (scaffolds) | |
97 this could generate thousands (if not millions) of output datasets. | |
76 | 98 |
77 ----- | 99 ----- |
78 | 100 |
79 .. class:: infomark | 101 .. class:: infomark |
80 | 102 |