Mercurial > repos > iuc > ivar_removereads
changeset 8:28a6f1908fcc draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
author | iuc |
---|---|
date | Thu, 05 Aug 2021 12:44:59 +0000 |
parents | 43aac8d29685 |
children | 8d36959b000d |
files | ivar_removereads.xml write_amplicon_info_file.py |
diffstat | 2 files changed, 83 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/ivar_removereads.xml Fri Jun 11 15:41:24 2021 +0000 +++ b/ivar_removereads.xml Thu Aug 05 12:44:59 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="ivar_removereads" name="ivar removereads" version="@VERSION@+galaxy0"> +<tool id="ivar_removereads" name="ivar removereads" version="@VERSION@+galaxy1"> <description>Remove reads from trimmed BAM file</description> <macros> <import>macros.xml</import> @@ -10,11 +10,15 @@ <command detect_errors="exit_code"><![CDATA[ cp '$input_bed' binding_sites.bed && python '$__tool_directory__/sanitize_bed.py' binding_sites.bed && - + #if $amplicons.computed == "yes" + python '$__tool_directory__/write_amplicon_info_file.py' binding_sites.bed amplicon_info.tsv && + #else + ln -s '$amplicon_info' amplicon_info.tsv && + #end if ivar getmasked - -i '$variants_tsv' -b binding_sites.bed -f '$amplicon_info' -p masked_primers && + -i '$variants_tsv' -b binding_sites.bed -f amplicon_info.tsv -p masked_primers && - python '$__tool_directory__/completemask.py' masked_primers.txt '$amplicon_info' && + python '$__tool_directory__/completemask.py' masked_primers.txt amplicon_info.tsv && ln -s '$input_bam' sorted.bam && ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai && @@ -33,9 +37,18 @@ help="This dataset will be scanned for variants that affect primer binding sites and needs to be in tabular format with affected chromosome names in the first, and positions in the second column. If there is a header line, the name of the second column should be POS." /> <param name="input_bed" argument="-b" type="data" format="bed" label="Primer binding sites information" help="The same six-column BED dataset that served as input to ivar trim"/> - <param name="amplicon_info" type="data" format="tabular" - label="Primer to amplicon assignment info" - help="This input should consist of one line per amplicon with the tab-separated names of all primers used to generate that amplicon."/> + <conditional name="amplicons"> + <param name="computed" type="select" label="Compute amplicon info from BED file" help="Compute the amplicon info file from the primer BED file"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + <when value="yes" /> + <when value="no"> + <param name="amplicon_info" type="data" format="tabular" + label="Primer to amplicon assignment info" + help="This input should consist of one line per amplicon with the tab-separated names of all primers used to generate that amplicon."/> + </when> + </conditional> </inputs> <outputs> <data name="output_bam" format="bam" label="${tool.name} on ${on_string}" from_work_dir="removed_reads.bam"/> @@ -45,7 +58,19 @@ <param name="input_bam" value="zika/Z52_a.trimmed.sorted.bam"/> <param name="variants_tsv" value="zika/primers_Z52_consensus.tsv"/> <param name="input_bed" value="zika/db/zika_primers_consensus.bed"/> - <param name="amplicon_info" value="zika/db/pair_information.tsv"/> + <conditional name="amplicons"> + <param name="computed" value="no" /> + <param name="amplicon_info" value="zika/db/pair_information.tsv"/> + </conditional> + <output name="output_bam" file="zika/Z52_a.masked.bam" compare="sim_size" delta="100000" /> + </test> + <test> + <param name="input_bam" value="zika/Z52_a.trimmed.sorted.bam"/> + <param name="variants_tsv" value="zika/primers_Z52_consensus.tsv"/> + <param name="input_bed" value="zika/db/zika_primers_consensus.bed"/> + <conditional name="amplicons"> + <param name="computed" value="yes" /> + </conditional> <output name="output_bam" file="zika/Z52_a.masked.bam" compare="sim_size" delta="100000" /> </test> </tests>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/write_amplicon_info_file.py Thu Aug 05 12:44:59 2021 +0000 @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import argparse +import re + +AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)' + + +def primer_info_to_position(name): + position = 0 + re_match = re.match(AMPLICON_NAME_RE, name) + if re_match is None: + raise ValueError("{} does not match expected amplicon name format".format(name)) + side = re_match.group('name') + num = re_match.group('num') + if side == 'RIGHT' or side == 'R': + position += 1000 + if num is not None: + position += int(num) + return position + + +def write_amplicon_info_file(bed_file, amplicon_info_file): + amplicon_sets = {} + amplicon_ids = set() + for line in bed_file: + fields = line.strip().split('\t') + name = fields[3] + re_match = re.match(AMPLICON_NAME_RE, name) + if re_match is None: + raise ValueError("{} does not match expected amplicon name format".format(name)) + amplicon_id = int(re_match.group('num')) + amplicon_set = amplicon_sets.get(amplicon_id, []) + amplicon_set.append(name) + amplicon_sets[amplicon_id] = amplicon_set + amplicon_ids.add(amplicon_id) + + for id in sorted(list(amplicon_ids)): + amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n' + amplicon_info_file.write(amplicon_info) + amplicon_info_file.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions') + parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file') + parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format') + args = parser.parse_args() + + write_amplicon_info_file(args.bed_file, args.amplicon_info_file)