Mercurial > repos > iuc > ivar_removereads
changeset 4:ee2beb764a7b draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 693df287d23b0fd9dfd134b41d401a438c3f5ad6"
author | iuc |
---|---|
date | Mon, 22 Jun 2020 07:29:10 -0400 |
parents | 3d18f8c3c0f6 |
children | 75c279fa403a |
files | completemask.py ivar_removereads.xml sanitize_bed.py test-data/covid19/ARTIC-V1-bad.bed |
diffstat | 4 files changed, 294 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/completemask.py Mon Jun 22 07:29:10 2020 -0400 @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +import sys + + +if __name__ == '__main__': + with open(sys.argv[1]) as i: + getmasked_output = i.readline().strip() + + if not getmasked_output: + print() + print('No affected primer binding sites found!') + else: + masked_primers = getmasked_output.split('\t') + with open(sys.argv[2]) as i: + amplicon_data = [line.strip().split('\t') for line in i] + + masked_complete = [] + for primer in masked_primers: + for amplicon in amplicon_data: + if primer in amplicon: + masked_complete += amplicon + result = '\t'.join(sorted(set(masked_complete))) + print() + print('Removing reads primed with any of:') + print(result) + with open(sys.argv[1], 'w') as o: + o.write(result + '\n')
--- a/ivar_removereads.xml Fri Jun 05 04:11:42 2020 -0400 +++ b/ivar_removereads.xml Mon Jun 22 07:29:10 2020 -0400 @@ -1,24 +1,41 @@ -<tool id="ivar_removereads" name="ivar removereads" version="@VERSION@+galaxy0"> +<tool id="ivar_removereads" name="ivar removereads" version="@VERSION@+galaxy1"> <description>Remove reads from trimmed BAM file</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements" /> + <expand macro="requirements"> + <requirement type="package" version="3.8.1">python</requirement> + </expand> <expand macro="version_command" /> <command detect_errors="exit_code"><![CDATA[ + cp '$input_bed' binding_sites.bed && + python '$__tool_directory__/sanitize_bed.py' binding_sites.bed && + + ivar getmasked + -i '$variants_tsv' -b binding_sites.bed -f '$amplicon_info' -p masked_primers && + + python '$__tool_directory__/completemask.py' masked_primers.txt '$amplicon_info' && ln -s '$input_bam' sorted.bam && - ln -s '$primer_index' primers.txt && - ln -s '$input_bed' bed.bed && + ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai && + ivar removereads - -i sorted.bam + -i sorted.bam + -b binding_sites.bed -p removed_reads.bam - -t primers.txt - -b bed.bed - ]]> </command> + -t masked_primers.txt + ]]></command> <inputs> - <param name="input_bam" argument="-i" type="data" format="bam" label="Bam file" help="Aligned reads, to trim primers and quality"/> - <param name="primer_index" argument="-t" type="data" format="txt" label="Text file with primer indices separated by spaces" help="This is the output of getmasked command"/> - <param name="input_bed" argument="-b" type="data" format="bed" label="BED file with primer sequences and positions"/> + <param name="input_bam" argument="-i" type="data" format="bam" + label="Bam input" + help="BAM dataset, preprocessed with ivar trim, to remove reads from" /> + <param name="variants_tsv" type="data" format="tabular" + label="Variants input" + help="This dataset will be scanned for variants that affect primer binding sites and needs to be in tabular format with affected chromosome names in the first, and positions in the second column. If there is a header line, the name of the second column should be POS." /> + <param name="input_bed" argument="-b" type="data" format="bed" label="Primer binding sites information" + help="The same six-column BED dataset that served as input to ivar trim"/> + <param name="amplicon_info" type="data" format="tabular" + label="Primer to amplicon assignment info" + help="This input should consist of one line per amplicon with the tab-separated names of all primers used to generate that amplicon."/> </inputs> <outputs> <data name="output_bam" format="bam" label="${tool.name} on ${on_string}" from_work_dir="removed_reads.bam"/> @@ -26,20 +43,32 @@ <tests> <test> <param name="input_bam" value="zika/Z52_a.trimmed.sorted.bam"/> - <param name="primer_index" value="zika/primer_mismatchers_indices.txt"/> - <param name="input_bed" value="zika/db/zika_primers.bed"/> + <param name="variants_tsv" value="zika/primers_Z52_consensus.tsv"/> + <param name="input_bed" value="zika/db/zika_primers_consensus.bed"/> + <param name="amplicon_info" value="zika/db/pair_information.tsv"/> <output name="output_bam" file="zika/Z52_a.masked.bam" compare="sim_size" delta="100000" /> </test> </tests> <help><![CDATA[ - This command accepts an aligned and sorted BAM file trimmed using ivar trim - and removes the reads corresponding to the supplied primer indices, which is - the output of ivar getmasked command. Under the hood, ivar trim adds the - zero based primer index (based on the BED file) to the BAM auxillary data for - every read. Hence, ivar removereads will only work on BAM files that have - been trimmed using ivar trim. +This Galaxy tool combines the functionality of ``ivar getmasked`` and +``ivar removereads``. No separate ``ivar getmasked`` step is required when +using this tool. + +The wrapper takes as input a BAM dataset of aligned and sorted reads, from +which the primers listed in the primer binding sites BED input have been +trimmed with ``ivar trim``. + +From this input it will remove reads that come from amplicons that have been +generated with one or more primers that may have been affected in their binding +by variants listed in the variants input file. + +.. class:: Warning mark + + Preprocessing of the BAM input with ivar trim is essential for this tool to + work because only ``ivar trim`` can add required primer information to the + BAM auxillary data of every read. - Documentation can be found at `<https://andersen-lab.github.io/ivar/html/manualpage.html>`_. - ]]> </help> +ivar documentation can be found at `<https://andersen-lab.github.io/ivar/html/manualpage.html>`__. + ]]></help> <expand macro="citations" /> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sanitize_bed.py Mon Jun 22 07:29:10 2020 -0400 @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +import sys + + +with open(sys.argv[1]) as i: + bed_data = i.readlines() + +sanitized_data = [] +try: + for record in bed_data: + fields = record.split('\t') + sanitized_data.append( + '\t'.join(fields[:4] + ['60'] + fields[5:]) + ) +except IndexError: + pass # leave column number issue to getmasked +else: + with open(sys.argv[1], 'w') as o: + o.writelines(sanitized_data)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/covid19/ARTIC-V1-bad.bed Mon Jun 22 07:29:10 2020 -0400 @@ -0,0 +1,196 @@ +MN908947.3 30 54 nCoV-2019_1_LEFT NOINT + +MN908947.3 385 410 nCoV-2019_1_RIGHT NOINT - +MN908947.3 320 342 nCoV-2019_2_LEFT NOINT + +MN908947.3 704 726 nCoV-2019_2_RIGHT NOINT - +MN908947.3 642 664 nCoV-2019_3_LEFT NOINT + +MN908947.3 1004 1028 nCoV-2019_3_RIGHT NOINT - +MN908947.3 943 965 nCoV-2019_4_LEFT NOINT + +MN908947.3 1312 1337 nCoV-2019_4_RIGHT NOINT - +MN908947.3 1242 1264 nCoV-2019_5_LEFT NOINT + +MN908947.3 1623 1651 nCoV-2019_5_RIGHT NOINT - +MN908947.3 1573 1595 nCoV-2019_6_LEFT NOINT + +MN908947.3 1942 1964 nCoV-2019_6_RIGHT NOINT - +MN908947.3 1875 1897 nCoV-2019_7_LEFT NOINT + +MN908947.3 2247 2269 nCoV-2019_7_RIGHT NOINT - +MN908947.3 2181 2205 nCoV-2019_8_LEFT NOINT + +MN908947.3 2568 2592 nCoV-2019_8_RIGHT NOINT - +MN908947.3 2505 2529 nCoV-2019_9_LEFT NOINT + +MN908947.3 2882 2904 nCoV-2019_9_RIGHT NOINT - +MN908947.3 2826 2850 nCoV-2019_10_LEFT NOINT + +MN908947.3 3183 3210 nCoV-2019_10_RIGHT NOINT - +MN908947.3 3144 3166 nCoV-2019_11_LEFT NOINT + +MN908947.3 3507 3531 nCoV-2019_11_RIGHT NOINT - +MN908947.3 3460 3482 nCoV-2019_12_LEFT NOINT + +MN908947.3 3826 3853 nCoV-2019_12_RIGHT NOINT - +MN908947.3 3771 3795 nCoV-2019_13_LEFT NOINT + +MN908947.3 4142 4164 nCoV-2019_13_RIGHT NOINT - +MN908947.3 4054 4077 nCoV-2019_14_LEFT NOINT + +MN908947.3 4428 4450 nCoV-2019_14_RIGHT NOINT - +MN908947.3 4294 4321 nCoV-2019_15_LEFT NOINT + +MN908947.3 4674 4696 nCoV-2019_15_RIGHT NOINT - +MN908947.3 4636 4658 nCoV-2019_16_LEFT NOINT + +MN908947.3 4995 5017 nCoV-2019_16_RIGHT NOINT - +MN908947.3 4939 4966 nCoV-2019_17_LEFT NOINT + +MN908947.3 5296 5321 nCoV-2019_17_RIGHT NOINT - +MN908947.3 5230 5259 nCoV-2019_18_LEFT NOINT + +MN908947.3 5620 5644 nCoV-2019_18_RIGHT NOINT - +MN908947.3 5563 5586 nCoV-2019_19_LEFT NOINT + +MN908947.3 5932 5957 nCoV-2019_19_RIGHT NOINT - +MN908947.3 5867 5894 nCoV-2019_20_LEFT NOINT + +MN908947.3 6247 6272 nCoV-2019_20_RIGHT NOINT - +MN908947.3 6167 6196 nCoV-2019_21_LEFT NOINT + +MN908947.3 6528 6550 nCoV-2019_21_RIGHT NOINT - +MN908947.3 6466 6495 nCoV-2019_22_LEFT NOINT + +MN908947.3 6846 6873 nCoV-2019_22_RIGHT NOINT - +MN908947.3 6718 6745 nCoV-2019_23_LEFT NOINT + +MN908947.3 7092 7117 nCoV-2019_23_RIGHT NOINT - +MN908947.3 7035 7058 nCoV-2019_24_LEFT NOINT + +MN908947.3 7389 7415 nCoV-2019_24_RIGHT NOINT - +MN908947.3 7305 7332 nCoV-2019_25_LEFT NOINT + +MN908947.3 7671 7694 nCoV-2019_25_RIGHT NOINT - +MN908947.3 7626 7651 nCoV-2019_26_LEFT NOINT + +MN908947.3 7997 8019 nCoV-2019_26_RIGHT NOINT - +MN908947.3 7943 7968 nCoV-2019_27_LEFT NOINT + +MN908947.3 8319 8341 nCoV-2019_27_RIGHT NOINT - +MN908947.3 8249 8275 nCoV-2019_28_LEFT NOINT + +MN908947.3 8635 8661 nCoV-2019_28_RIGHT NOINT - +MN908947.3 8595 8619 nCoV-2019_29_LEFT NOINT + +MN908947.3 8954 8983 nCoV-2019_29_RIGHT NOINT - +MN908947.3 8888 8913 nCoV-2019_30_LEFT NOINT + +MN908947.3 9245 9271 nCoV-2019_30_RIGHT NOINT - +MN908947.3 9204 9226 nCoV-2019_31_LEFT NOINT + +MN908947.3 9557 9585 nCoV-2019_31_RIGHT NOINT - +MN908947.3 9477 9502 nCoV-2019_32_LEFT NOINT + +MN908947.3 9834 9858 nCoV-2019_32_RIGHT NOINT - +MN908947.3 9784 9806 nCoV-2019_33_LEFT NOINT + +MN908947.3 10146 10171 nCoV-2019_33_RIGHT NOINT - +MN908947.3 10076 10099 nCoV-2019_34_LEFT NOINT + +MN908947.3 10437 10459 nCoV-2019_34_RIGHT NOINT - +MN908947.3 10362 10384 nCoV-2019_35_LEFT NOINT + +MN908947.3 10737 10763 nCoV-2019_35_RIGHT NOINT - +MN908947.3 10666 10688 nCoV-2019_36_LEFT NOINT + +MN908947.3 11048 11074 nCoV-2019_36_RIGHT NOINT - +MN908947.3 10999 11022 nCoV-2019_37_LEFT NOINT + +MN908947.3 11372 11394 nCoV-2019_37_RIGHT NOINT - +MN908947.3 11306 11331 nCoV-2019_38_LEFT NOINT + +MN908947.3 11668 11693 nCoV-2019_38_RIGHT NOINT - +MN908947.3 11555 11584 nCoV-2019_39_LEFT NOINT + +MN908947.3 11927 11949 nCoV-2019_39_RIGHT NOINT - +MN908947.3 11863 11889 nCoV-2019_40_LEFT NOINT + +MN908947.3 12234 12256 nCoV-2019_40_RIGHT NOINT - +MN908947.3 12110 12133 nCoV-2019_41_LEFT NOINT + +MN908947.3 12465 12490 nCoV-2019_41_RIGHT NOINT - +MN908947.3 12417 12439 nCoV-2019_42_LEFT NOINT + +MN908947.3 12779 12802 nCoV-2019_42_RIGHT NOINT - +MN908947.3 12710 12732 nCoV-2019_43_LEFT NOINT + +MN908947.3 13074 13096 nCoV-2019_43_RIGHT NOINT - +MN908947.3 13005 13027 nCoV-2019_44_LEFT NOINT + +MN908947.3 13378 13400 nCoV-2019_44_RIGHT NOINT - +MN908947.3 13319 13344 nCoV-2019_45_LEFT NOINT + +MN908947.3 13669 13699 nCoV-2019_45_RIGHT NOINT - +MN908947.3 13599 13621 nCoV-2019_46_LEFT NOINT + +MN908947.3 13962 13984 nCoV-2019_46_RIGHT NOINT - +MN908947.3 13918 13946 nCoV-2019_47_LEFT NOINT + +MN908947.3 14271 14299 nCoV-2019_47_RIGHT NOINT - +MN908947.3 14207 14232 nCoV-2019_48_LEFT NOINT + +MN908947.3 14579 14601 nCoV-2019_48_RIGHT NOINT - +MN908947.3 14545 14570 nCoV-2019_49_LEFT NOINT + +MN908947.3 14898 14926 nCoV-2019_49_RIGHT NOINT - +MN908947.3 14865 14895 nCoV-2019_50_LEFT NOINT + +MN908947.3 15224 15246 nCoV-2019_50_RIGHT NOINT - +MN908947.3 15171 15193 nCoV-2019_51_LEFT NOINT + +MN908947.3 15538 15560 nCoV-2019_51_RIGHT NOINT - +MN908947.3 15481 15503 nCoV-2019_52_LEFT NOINT + +MN908947.3 15861 15886 nCoV-2019_52_RIGHT NOINT - +MN908947.3 15827 15851 nCoV-2019_53_LEFT NOINT + +MN908947.3 16186 16209 nCoV-2019_53_RIGHT NOINT - +MN908947.3 16118 16144 nCoV-2019_54_LEFT NOINT + +MN908947.3 16485 16510 nCoV-2019_54_RIGHT NOINT - +MN908947.3 16416 16444 nCoV-2019_55_LEFT NOINT + +MN908947.3 16804 16833 nCoV-2019_55_RIGHT NOINT - +MN908947.3 16748 16770 nCoV-2019_56_LEFT NOINT + +MN908947.3 17130 17152 nCoV-2019_56_RIGHT NOINT - +MN908947.3 17065 17087 nCoV-2019_57_LEFT NOINT + +MN908947.3 17430 17452 nCoV-2019_57_RIGHT NOINT - +MN908947.3 17381 17406 nCoV-2019_58_LEFT NOINT + +MN908947.3 17738 17761 nCoV-2019_58_RIGHT NOINT - +MN908947.3 17674 17697 nCoV-2019_59_LEFT NOINT + +MN908947.3 18036 18062 nCoV-2019_59_RIGHT NOINT - +MN908947.3 17966 17993 nCoV-2019_60_LEFT NOINT + +MN908947.3 18324 18348 nCoV-2019_60_RIGHT NOINT - +MN908947.3 18253 18275 nCoV-2019_61_LEFT NOINT + +MN908947.3 18650 18672 nCoV-2019_61_RIGHT NOINT - +MN908947.3 18596 18618 nCoV-2019_62_LEFT NOINT + +MN908947.3 18957 18979 nCoV-2019_62_RIGHT NOINT - +MN908947.3 18896 18918 nCoV-2019_63_LEFT NOINT + +MN908947.3 19275 19297 nCoV-2019_63_RIGHT NOINT - +MN908947.3 19204 19232 nCoV-2019_64_LEFT NOINT + +MN908947.3 19591 19616 nCoV-2019_64_RIGHT NOINT - +MN908947.3 19548 19570 nCoV-2019_65_LEFT NOINT + +MN908947.3 19911 19939 nCoV-2019_65_RIGHT NOINT - +MN908947.3 19844 19866 nCoV-2019_66_LEFT NOINT + +MN908947.3 20231 20255 nCoV-2019_66_RIGHT NOINT - +MN908947.3 20172 20200 nCoV-2019_67_LEFT NOINT + +MN908947.3 20542 20572 nCoV-2019_67_RIGHT NOINT - +MN908947.3 20472 20496 nCoV-2019_68_LEFT NOINT + +MN908947.3 20867 20890 nCoV-2019_68_RIGHT NOINT - +MN908947.3 20786 20813 nCoV-2019_69_LEFT NOINT + +MN908947.3 21146 21169 nCoV-2019_69_RIGHT NOINT - +MN908947.3 21075 21104 nCoV-2019_70_LEFT NOINT + +MN908947.3 21427 21455 nCoV-2019_70_RIGHT NOINT - +MN908947.3 21357 21386 nCoV-2019_71_LEFT NOINT + +MN908947.3 21716 21743 nCoV-2019_71_RIGHT NOINT - +MN908947.3 21658 21682 nCoV-2019_72_LEFT NOINT + +MN908947.3 22013 22038 nCoV-2019_72_RIGHT NOINT - +MN908947.3 21961 21990 nCoV-2019_73_LEFT NOINT + +MN908947.3 22324 22346 nCoV-2019_73_RIGHT NOINT - +MN908947.3 22262 22290 nCoV-2019_74_LEFT NOINT + +MN908947.3 22626 22650 nCoV-2019_74_RIGHT NOINT - +MN908947.3 22516 22542 nCoV-2019_75_LEFT NOINT + +MN908947.3 22877 22903 nCoV-2019_75_RIGHT NOINT - +MN908947.3 22797 22819 nCoV-2019_76_LEFT NOINT + +MN908947.3 23192 23214 nCoV-2019_76_RIGHT NOINT - +MN908947.3 23122 23144 nCoV-2019_77_LEFT NOINT + +MN908947.3 23500 23522 nCoV-2019_77_RIGHT NOINT - +MN908947.3 23443 23466 nCoV-2019_78_LEFT NOINT + +MN908947.3 23822 23847 nCoV-2019_78_RIGHT NOINT - +MN908947.3 23789 23812 nCoV-2019_79_LEFT NOINT + +MN908947.3 24145 24169 nCoV-2019_79_RIGHT NOINT - +MN908947.3 24078 24100 nCoV-2019_80_LEFT NOINT + +MN908947.3 24443 24467 nCoV-2019_80_RIGHT NOINT - +MN908947.3 24391 24416 nCoV-2019_81_LEFT NOINT + +MN908947.3 24765 24789 nCoV-2019_81_RIGHT NOINT - +MN908947.3 24696 24721 nCoV-2019_82_LEFT NOINT + +MN908947.3 25052 25076 nCoV-2019_82_RIGHT NOINT - +MN908947.3 24978 25003 nCoV-2019_83_LEFT NOINT + +MN908947.3 25347 25369 nCoV-2019_83_RIGHT NOINT - +MN908947.3 25279 25301 nCoV-2019_84_LEFT NOINT + +MN908947.3 25646 25673 nCoV-2019_84_RIGHT NOINT - +MN908947.3 25601 25623 nCoV-2019_85_LEFT NOINT + +MN908947.3 25969 25994 nCoV-2019_85_RIGHT NOINT - +MN908947.3 25902 25924 nCoV-2019_86_LEFT NOINT + +MN908947.3 26290 26315 nCoV-2019_86_RIGHT NOINT - +MN908947.3 26197 26219 nCoV-2019_87_LEFT NOINT + +MN908947.3 26566 26590 nCoV-2019_87_RIGHT NOINT - +MN908947.3 26520 26542 nCoV-2019_88_LEFT NOINT + +MN908947.3 26890 26913 nCoV-2019_88_RIGHT NOINT - +MN908947.3 26835 26857 nCoV-2019_89_LEFT NOINT + +MN908947.3 27202 27227 nCoV-2019_89_RIGHT NOINT - +MN908947.3 27141 27164 nCoV-2019_90_LEFT NOINT + +MN908947.3 27511 27533 nCoV-2019_90_RIGHT NOINT - +MN908947.3 27446 27471 nCoV-2019_91_LEFT NOINT + +MN908947.3 27825 27854 nCoV-2019_91_RIGHT NOINT - +MN908947.3 27784 27808 nCoV-2019_92_LEFT NOINT + +MN908947.3 28145 28172 nCoV-2019_92_RIGHT NOINT - +MN908947.3 28081 28104 nCoV-2019_93_LEFT NOINT + +MN908947.3 28442 28464 nCoV-2019_93_RIGHT NOINT - +MN908947.3 28394 28416 nCoV-2019_94_LEFT NOINT + +MN908947.3 28756 28779 nCoV-2019_94_RIGHT NOINT - +MN908947.3 28677 28699 nCoV-2019_95_LEFT NOINT + +MN908947.3 29041 29063 nCoV-2019_95_RIGHT NOINT - +MN908947.3 28985 29007 nCoV-2019_96_LEFT NOINT + +MN908947.3 29356 29378 nCoV-2019_96_RIGHT NOINT - +MN908947.3 29288 29316 nCoV-2019_97_LEFT NOINT + +MN908947.3 29665 29693 nCoV-2019_97_RIGHT NOINT - +MN908947.3 29486 29510 nCoV-2019_98_LEFT NOINT + +MN908947.3 29836 29866 nCoV-2019_98_RIGHT NOINT -