Mercurial > repos > artbio > bamparse
changeset 2:8ea06787c08a draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
author | artbio |
---|---|
date | Tue, 09 Oct 2018 17:14:57 -0400 |
parents | ae9ea0488850 |
children | 120eb76aa500 |
files | bamparse.py bamparse.xml test-data/input1.bam test-data/input2.bam test-data/input_new2.bam test-data/more_antisense_table.tabular test-data/more_sense_table.tabular test-data/more_table0.tabular test-data/more_table1.tabular test-data/more_table2.tabular test-data/table_antisense.tabular |
diffstat | 11 files changed, 466 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- a/bamparse.py Sun Oct 15 19:14:29 2017 -0400 +++ b/bamparse.py Tue Oct 09 17:14:57 2018 -0400 @@ -9,10 +9,6 @@ the_parser = argparse.ArgumentParser() the_parser.add_argument('--output', nargs='+', action='store', type=str, help='Count tables') - the_parser.add_argument('--polarity', - choices=["sense", "antisense", "both"], - help="forward, reverse or both forward an\ - reverse reads are counted") the_parser.add_argument('--alignments', nargs='+', help="bam alignments files") the_parser.add_argument('--labels', nargs='+', help="Alignments labels") @@ -23,37 +19,17 @@ return args -def get_counts(bamfile, polarity="both"): +def get_counts(bamfile): """ Takes an AlignmentFile object and returns a dictionary of counts for sense, - antisense, or both sense and antisense reads aligning to the bam references + antisense, or both sense and antisense bam alignments to the references, + depending on the pre-treatment performed by sambamba in the xml wrapper """ - def filter_sense_read(read): - if read.is_reverse: - return 0 - else: - return 1 - - def filter_antisense_read(read): - if read.is_reverse: - return 1 - else: - return 0 - counts = defaultdict(int) for ref_name in bamfile.references: counts[ref_name] = 0 - if polarity == "both": - for ref_name in bamfile.references: - counts[ref_name] = bamfile.count(reference=ref_name) - if polarity == "sense": - for ref_name in bamfile.references: - for read in bamfile.fetch(ref_name): - counts[ref_name] += filter_sense_read(read) - if polarity == "antisense": - for ref_name in bamfile.references: - for read in bamfile.fetch(ref_name): - counts[ref_name] += filter_antisense_read(read) + for ref_name in bamfile.references: + counts[ref_name] = bamfile.count(reference=ref_name) return counts @@ -80,14 +56,14 @@ out.close() -def main(alignments, labels, polarity, output, number): +def main(alignments, labels, output, number): diclist = [] for file in alignments: bam_object = pysam.AlignmentFile(file, 'rb') - diclist.append(get_counts(bam_object, polarity=polarity)) + diclist.append(get_counts(bam_object)) writetable(diclist, labels, output, number) if __name__ == "__main__": args = Parser() - main(args.alignments, args.labels, args.polarity, args.output, args.number) + main(args.alignments, args.labels, args.output, args.number)
--- a/bamparse.xml Sun Oct 15 19:14:29 2017 -0400 +++ b/bamparse.xml Tue Oct 09 17:14:57 2018 -0400 @@ -1,28 +1,34 @@ -<tool id="bamparse" name="Count alignments" version="2.0.1"> +<tool id="bamparse" name="Count alignments" version="3.0.0"> <description>in a BAM file</description> <requirements> - <requirement type="package" version="1.1.2">bowtie</requirement> - <requirement type="package" version="1.11.2">numpy</requirement> <requirement type="package" version="0.11.2.1">pysam</requirement> + <requirement type="package" version="0.6.6">sambamba</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Tool exception" /> </stdio> <command detect_errors="exit_code"><![CDATA[ mkdir outputdir && + #if $polarity == 'sense': + #set pol=' and not reverse_strand' + #else if $polarity == 'antisense': + #set pol=' and reverse_strand' + #else: + #set pol='' + #end if #for $file in $input_list - samtools index '$file' && + sambamba view -t \$GALAXY_SLOTS -F "not unmapped$pol" -f bam '$file' -o '$file.element_identifier' && + samtools index '$file.element_identifier' && #end for python $__tool_directory__/bamparse.py --alignments #for $file in $input_list - '$file' + '$file.element_identifier' #end for --labels #for $file in $input_list '$file.element_identifier' #end for - --polarity '$polarity' --number '$output_option' ]]></command> <inputs> @@ -44,6 +50,58 @@ <discover_datasets pattern="(?P<designation>.*)\.tabular" ext="tabular" visible="true" assign_primary_output="true" directory="outputdir"/> </data> </outputs> + <tests> + <test> + <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" /> + <param name="polarity" value="both" /> + <param name="output_option" value="unique" /> + <output name="output" ftype="tabular" file="table.tabular" /> + </test> + <test> + <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" /> + <param name="polarity" value="both" /> + <param name="output_option" value="multiple" /> + <output name="output" ftype="tabular" file="table0.tabular"> + <discovered_dataset designation="table1" ftype="tabular" file="table1.tabular" /> + </output> + </test> + <test> + <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" /> + <param name="polarity" value="sense" /> + <param name="output_option" value="unique" /> + <output name="output" ftype="tabular" file="table.tabular" /> + </test> + <test> + <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" /> + <param name="polarity" value="antisense" /> + <param name="output_option" value="unique" /> + <output name="output" ftype="tabular" file="table_antisense.tabular" /> + </test> + + <test> + <param name="input_list" value="input1.bam,input2.bam,input_new2.bam" ftype="bam" /> + <param name="polarity" value="both" /> + <param name="output_option" value="multiple" /> + <output name="output" ftype="tabular" file="more_table0.tabular"> + <discovered_dataset designation="table1" ftype="tabular" file="more_table1.tabular" /> + <discovered_dataset designation="table2" ftype="tabular" file="more_table2.tabular" /> + </output> + </test> + <test> + <param name="input_list" value="input1.bam,input2.bam,input_new2.bam" ftype="bam" /> + <param name="polarity" value="sense" /> + <param name="output_option" value="unique" /> + <output name="output" ftype="tabular" file="more_sense_table.tabular" /> + </test> + <test> + <param name="input_list" value="input1.bam,input2.bam,input_new2.bam" ftype="bam" /> + <param name="polarity" value="antisense" /> + <param name="output_option" value="unique" /> + <output name="output" ftype="tabular" file="more_antisense_table.tabular" /> + </test> + + + </tests> <help> **What it does** @@ -56,20 +114,4 @@ The library labels in the returned count table are taken from the input bam datasets names in the Galaxy history. </help> - <tests> - <test> - <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="tabular" /> - <param name="polarity" value="both" /> - <param name="output_option" value="unique" /> - <output name="output" ftype="tabular" file="table.tabular" /> - </test> - <test> - <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="tabular" /> - <param name="polarity" value="both" /> - <param name="output_option" value="multiple" /> - <output name="output" ftype="tabular" file="table0.tabular"> - <discovered_dataset designation="table1" ftype="tabular" file="table1.tabular" /> - </output> - </test> - </tests> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/more_antisense_table.tabular Tue Oct 09 17:14:57 2018 -0400 @@ -0,0 +1,31 @@ +gene input1.bam input2.bam input_new2.bam +FBtr0070001 0 0 0 +FBtr0070533 0 0 0 +FBtr0070603 0 0 0 +FBtr0070604 0 0 0 +FBtr0070911 0 0 0 +FBtr0078490 0 0 0 +FBtr0078580 0 0 0 +FBtr0078790 1 0 0 +FBtr0079064 0 0 0 +FBtr0079090 1 0 0 +FBtr0079338 0 0 0 +FBtr0079528 0 0 0 +FBtr0079596 0 1 1 +FBtr0079677 0 0 0 +FBtr0079690 0 0 0 +FBtr0079692 0 0 0 +FBtr0079693 0 0 0 +FBtr0079694 0 0 0 +FBtr0079702 0 0 0 +FBtr0079728 0 0 0 +FBtr0079729 0 0 0 +FBtr0079752 0 0 0 +FBtr0079820 0 0 0 +FBtr0080609 0 0 0 +FBtr0080644 0 0 0 +FBtr0080646 0 0 0 +FBtr0080647 0 0 0 +FBtr0080660 0 0 0 +FBtr0080663 0 0 0 +FBtr0080664 0 2 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/more_sense_table.tabular Tue Oct 09 17:14:57 2018 -0400 @@ -0,0 +1,31 @@ +gene input1.bam input2.bam input_new2.bam +FBtr0070001 57 24 24 +FBtr0070533 24 45 45 +FBtr0070603 70 61 61 +FBtr0070604 40 55 55 +FBtr0070911 2 3 3 +FBtr0078490 8 14 14 +FBtr0078580 1104 370 370 +FBtr0078790 13 10 10 +FBtr0079064 4 5 0 +FBtr0079090 5 7 7 +FBtr0079338 16 23 23 +FBtr0079528 106 365 365 +FBtr0079596 152 315 315 +FBtr0079677 4 7 7 +FBtr0079690 5 5 5 +FBtr0079692 6 4 4 +FBtr0079693 6 9 9 +FBtr0079694 7 6 6 +FBtr0079702 4 4 4 +FBtr0079728 8 6 6 +FBtr0079729 4 4 4 +FBtr0079752 8 2 2 +FBtr0079820 13 109 109 +FBtr0080609 63 8 8 +FBtr0080644 6 8 8 +FBtr0080646 3 12 12 +FBtr0080647 10 12 12 +FBtr0080660 7 11 11 +FBtr0080663 115 106 106 +FBtr0080664 128 387 387
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/more_table0.tabular Tue Oct 09 17:14:57 2018 -0400 @@ -0,0 +1,31 @@ +gene input1.bam +FBtr0070001 57 +FBtr0070533 24 +FBtr0070603 70 +FBtr0070604 40 +FBtr0070911 2 +FBtr0078490 8 +FBtr0078580 1104 +FBtr0078790 14 +FBtr0079064 4 +FBtr0079090 6 +FBtr0079338 16 +FBtr0079528 106 +FBtr0079596 152 +FBtr0079677 4 +FBtr0079690 5 +FBtr0079692 6 +FBtr0079693 6 +FBtr0079694 7 +FBtr0079702 4 +FBtr0079728 8 +FBtr0079729 4 +FBtr0079752 8 +FBtr0079820 13 +FBtr0080609 63 +FBtr0080644 6 +FBtr0080646 3 +FBtr0080647 10 +FBtr0080660 7 +FBtr0080663 115 +FBtr0080664 128
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/more_table1.tabular Tue Oct 09 17:14:57 2018 -0400 @@ -0,0 +1,31 @@ +gene input2.bam +FBtr0070001 24 +FBtr0070533 45 +FBtr0070603 61 +FBtr0070604 55 +FBtr0070911 3 +FBtr0078490 14 +FBtr0078580 370 +FBtr0078790 10 +FBtr0079064 5 +FBtr0079090 7 +FBtr0079338 23 +FBtr0079528 365 +FBtr0079596 316 +FBtr0079677 7 +FBtr0079690 5 +FBtr0079692 4 +FBtr0079693 9 +FBtr0079694 6 +FBtr0079702 4 +FBtr0079728 6 +FBtr0079729 4 +FBtr0079752 2 +FBtr0079820 109 +FBtr0080609 8 +FBtr0080644 8 +FBtr0080646 12 +FBtr0080647 12 +FBtr0080660 11 +FBtr0080663 106 +FBtr0080664 389
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/more_table2.tabular Tue Oct 09 17:14:57 2018 -0400 @@ -0,0 +1,31 @@ +gene input_new2.bam +FBtr0070001 24 +FBtr0070533 45 +FBtr0070603 61 +FBtr0070604 55 +FBtr0070911 3 +FBtr0078490 14 +FBtr0078580 370 +FBtr0078790 10 +FBtr0079064 0 +FBtr0079090 7 +FBtr0079338 23 +FBtr0079528 365 +FBtr0079596 316 +FBtr0079677 7 +FBtr0079690 5 +FBtr0079692 4 +FBtr0079693 9 +FBtr0079694 6 +FBtr0079702 4 +FBtr0079728 6 +FBtr0079729 4 +FBtr0079752 2 +FBtr0079820 109 +FBtr0080609 8 +FBtr0080644 8 +FBtr0080646 12 +FBtr0080647 12 +FBtr0080660 11 +FBtr0080663 106 +FBtr0080664 389
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/table_antisense.tabular Tue Oct 09 17:14:57 2018 -0400 @@ -0,0 +1,239 @@ +gene alignment1.bam alignment2.bam +dme-bantam 0 0 +dme-let-7 0 0 +dme-mir-1 0 0 +dme-mir-10 0 0 +dme-mir-100 0 0 +dme-mir-1000 0 0 +dme-mir-1001 0 0 +dme-mir-1002 0 0 +dme-mir-1003 0 0 +dme-mir-1004 0 0 +dme-mir-1005 0 0 +dme-mir-1006 0 0 +dme-mir-1007 0 0 +dme-mir-1008 0 0 +dme-mir-1009 0 0 +dme-mir-1010 0 0 +dme-mir-1011 0 0 +dme-mir-1012 0 0 +dme-mir-1013 0 0 +dme-mir-1014 0 0 +dme-mir-1015 0 0 +dme-mir-1016 0 0 +dme-mir-1017 0 0 +dme-mir-11 0 0 +dme-mir-12 0 0 +dme-mir-124 0 0 +dme-mir-125 0 0 +dme-mir-133 0 0 +dme-mir-137 0 0 +dme-mir-13a 0 0 +dme-mir-13b-1 0 0 +dme-mir-13b-2 0 0 +dme-mir-14 0 0 +dme-mir-184 0 0 +dme-mir-190 0 0 +dme-mir-193 0 0 +dme-mir-210 0 0 +dme-mir-219 0 0 +dme-mir-2279 0 0 +dme-mir-2280 0 0 +dme-mir-2281 0 0 +dme-mir-2282 0 0 +dme-mir-2283 0 0 +dme-mir-2489 0 0 +dme-mir-2490 0 0 +dme-mir-2491 0 0 +dme-mir-2492 0 0 +dme-mir-2493 0 0 +dme-mir-2494 0 0 +dme-mir-2495 0 0 +dme-mir-2496 0 0 +dme-mir-2497 0 0 +dme-mir-2498 0 0 +dme-mir-2499 0 0 +dme-mir-2500 0 0 +dme-mir-2501 0 0 +dme-mir-252 0 0 +dme-mir-2535b 0 0 +dme-mir-263a 0 0 +dme-mir-263b 0 0 +dme-mir-274 0 0 +dme-mir-275 0 0 +dme-mir-276a 0 0 +dme-mir-276b 0 0 +dme-mir-277 0 0 +dme-mir-278 0 0 +dme-mir-279 0 0 +dme-mir-280 0 0 +dme-mir-281-1 0 0 +dme-mir-281-2 0 0 +dme-mir-282 0 0 +dme-mir-283 0 0 +dme-mir-284 0 0 +dme-mir-285 0 0 +dme-mir-286 0 0 +dme-mir-287 0 0 +dme-mir-288 0 0 +dme-mir-289 0 0 +dme-mir-2a-1 0 0 +dme-mir-2a-2 0 0 +dme-mir-2b-1 0 0 +dme-mir-2b-2 0 0 +dme-mir-2c 0 0 +dme-mir-3 0 0 +dme-mir-303 0 0 +dme-mir-304 0 0 +dme-mir-305 0 0 +dme-mir-306 0 0 +dme-mir-307a 0 0 +dme-mir-307b 0 0 +dme-mir-308 0 0 +dme-mir-309 0 0 +dme-mir-310 0 0 +dme-mir-311 0 0 +dme-mir-312 0 0 +dme-mir-313 0 0 +dme-mir-314 0 0 +dme-mir-315 0 0 +dme-mir-316 0 0 +dme-mir-317 0 0 +dme-mir-318 0 0 +dme-mir-31a 0 0 +dme-mir-31b 0 0 +dme-mir-33 0 0 +dme-mir-34 0 0 +dme-mir-3641 0 0 +dme-mir-3642 0 0 +dme-mir-3643 0 0 +dme-mir-3644 0 0 +dme-mir-3645 0 0 +dme-mir-375 0 0 +dme-mir-4 0 0 +dme-mir-4908 0 0 +dme-mir-4909 0 0 +dme-mir-4910 0 0 +dme-mir-4911 0 0 +dme-mir-4912 0 0 +dme-mir-4913 0 0 +dme-mir-4914 0 0 +dme-mir-4915 0 0 +dme-mir-4916 0 0 +dme-mir-4917 0 0 +dme-mir-4918 0 0 +dme-mir-4919 0 0 +dme-mir-4939 0 0 +dme-mir-4940 0 0 +dme-mir-4941 0 0 +dme-mir-4942 0 0 +dme-mir-4943 0 0 +dme-mir-4944 0 0 +dme-mir-4945 0 0 +dme-mir-4946 0 0 +dme-mir-4947 0 0 +dme-mir-4948 0 0 +dme-mir-4949 0 0 +dme-mir-4950 0 0 +dme-mir-4951 0 0 +dme-mir-4952 0 0 +dme-mir-4953 0 0 +dme-mir-4954 0 0 +dme-mir-4955 0 0 +dme-mir-4956 0 0 +dme-mir-4957 0 0 +dme-mir-4958 0 0 +dme-mir-4959 0 0 +dme-mir-4960 0 0 +dme-mir-4961 0 0 +dme-mir-4962 0 0 +dme-mir-4963 0 0 +dme-mir-4964 0 0 +dme-mir-4965 0 0 +dme-mir-4966 0 0 +dme-mir-4967 0 0 +dme-mir-4968 0 0 +dme-mir-4969 0 0 +dme-mir-4970 0 0 +dme-mir-4971 0 0 +dme-mir-4972 0 0 +dme-mir-4973 0 0 +dme-mir-4974 0 0 +dme-mir-4975 0 0 +dme-mir-4976 0 0 +dme-mir-4977 0 0 +dme-mir-4978 0 0 +dme-mir-4979 0 0 +dme-mir-4980 0 0 +dme-mir-4981 0 0 +dme-mir-4982 0 0 +dme-mir-4983 0 0 +dme-mir-4984 0 0 +dme-mir-4985 0 0 +dme-mir-4986 0 0 +dme-mir-4987 0 0 +dme-mir-5 0 0 +dme-mir-6-1 0 0 +dme-mir-6-2 0 0 +dme-mir-6-3 0 0 +dme-mir-7 0 0 +dme-mir-79 0 0 +dme-mir-8 0 0 +dme-mir-87 0 0 +dme-mir-927 0 0 +dme-mir-929 0 0 +dme-mir-92a 0 0 +dme-mir-92b 0 0 +dme-mir-932 0 0 +dme-mir-954 0 0 +dme-mir-955 0 0 +dme-mir-956 0 0 +dme-mir-957 0 0 +dme-mir-958 0 0 +dme-mir-959 0 0 +dme-mir-960 0 0 +dme-mir-961 0 0 +dme-mir-962 0 0 +dme-mir-963 0 0 +dme-mir-964 0 0 +dme-mir-965 0 0 +dme-mir-966 0 0 +dme-mir-967 0 0 +dme-mir-968 0 0 +dme-mir-969 0 0 +dme-mir-970 0 0 +dme-mir-971 0 0 +dme-mir-972 0 0 +dme-mir-973 0 0 +dme-mir-974 0 0 +dme-mir-975 0 0 +dme-mir-976 0 0 +dme-mir-977 0 0 +dme-mir-978 0 0 +dme-mir-979 0 0 +dme-mir-980 0 0 +dme-mir-981 0 0 +dme-mir-982 0 0 +dme-mir-983-1 0 0 +dme-mir-983-2 0 0 +dme-mir-984 0 0 +dme-mir-985 0 0 +dme-mir-986 0 0 +dme-mir-987 0 0 +dme-mir-988 0 0 +dme-mir-989 0 0 +dme-mir-990 0 0 +dme-mir-991 0 0 +dme-mir-992 0 0 +dme-mir-993 0 0 +dme-mir-994 0 0 +dme-mir-995 0 0 +dme-mir-996 0 0 +dme-mir-997 0 0 +dme-mir-998 0 0 +dme-mir-999 0 0 +dme-mir-9a 0 0 +dme-mir-9b 0 0 +dme-mir-9c 0 0 +dme-mir-iab-4 0 0 +dme-mir-iab-8 0 0