Mercurial > repos > cpt > cpt_cluster_lcbs
changeset 1:e6d8cdb65df0 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:40:20 +0000 |
parents | 85cd5feab3b7 |
children | c9e004668d6c |
files | cluster_lcbs.py cluster_lcbs.xml cpt-macros.xml cpt_cluster_lcbs/cluster_lcbs.py cpt_cluster_lcbs/cluster_lcbs.xml cpt_cluster_lcbs/cpt-macros.xml cpt_cluster_lcbs/macros.xml macros.xml |
diffstat | 8 files changed, 449 insertions(+), 440 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cluster_lcbs.py Mon Jun 05 02:40:20 2023 +0000 @@ -0,0 +1,227 @@ +#!/usr/bin/env python +from Bio import SeqIO +import tempfile +import sys +import argparse + + +def parse_xmfa(xmfa): + """Simple XMFA parser until https://github.com/biopython/biopython/pull/544""" + current_lcb = [] + current_seq = {} + for line in xmfa.readlines(): + if line.startswith("#"): + continue + + if line.strip() == "=": + if "id" in current_seq: + current_lcb.append(current_seq) + current_seq = {} + yield current_lcb + current_lcb = [] + else: + line = line.strip() + if line.startswith(">"): + if "id" in current_seq: + current_lcb.append(current_seq) + current_seq = {} + data = line.strip().split() + # 0 1 2 3 4 5 + # > 1:5986-6406 + CbK.fa # CbK_gp011 + id, loc = data[1].split(":") + start, end = loc.split("-") + current_seq = { + "rid": "_".join(data[1:]), + "id": id, + "start": int(start), + "end": int(end), + "strand": 1 if data[2] == "+" else -1, + "file": data[3], + "seq": "", + "comment": "", + } + if len(data) > 5: + current_seq["comment"] = " ".join(data[5:]) + else: + current_seq["seq"] += line.strip() + + +HEADER_TPL = "> {id}:{start}-{end} {strand} {file} # {comment}\n" + + +def split_by_n(seq, n): + """A generator to divide a sequence into chunks of n units.""" + # http://stackoverflow.com/questions/9475241/split-python-string-every-nth-character + while seq: + yield seq[:n] + seq = seq[n:] + + +def to_xmfa(lcbs, handle=sys.stdout): + handle.write("#FormatVersion Mauve1\n") + for lcb in lcbs: + for aln in lcb: + handle.write( + HEADER_TPL.format( + id=aln["id"], + start=aln["start"], + end=aln["end"], + strand="+" if aln["strand"] > 0 else "-", + file=aln["file"], + comment=aln["comment"], + ) + ) + + for line in split_by_n(aln["seq"], 80): + handle.write(line + "\n") + handle.write("=\n") + + +def percent_identity(a, b): + """Calculate % identity, ignoring gaps in the host sequence""" + match = 0 + mismatch = 0 + for char_a, char_b in zip(list(a), list(b)): + if char_a == "-": + continue + if char_a == char_b: + match += 1 + else: + mismatch += 1 + + if match + mismatch == 0: + return 0.0 + return 100 * float(match) / (match + mismatch) + + +def id_tn_dict(sequences, tmpfile=False): + """Figure out sequence IDs""" + label_convert = {} + correct_chrom = None + if not isinstance(sequences, list): + sequences = [sequences] + + i = 0 + for sequence_file in sequences: + for record in SeqIO.parse(sequence_file, "fasta"): + if correct_chrom is None: + correct_chrom = record.id + + i += 1 + key = str(i) + label_convert[key] = {"record_id": record.id, "len": len(record.seq)} + + if tmpfile: + label_convert[key] = tempfile.NamedTemporaryFile(delete=False) + + return label_convert + + +def filter_lcbs_for_seq(xmfa): + """clusters lcbs based on which sequences they involve""" + strand_info = {"1": "+", "-1": "-"} + clusters = {} + + for i in list(parse_xmfa(xmfa)): + cluster_name = "" + + for g in i: + cluster_name += g["id"] + strand_info[str(g["strand"])] + # allow clusters with all opposite strands to be together (alt name is opposite strand of orig) + alt_name = cluster_name.replace("+", "*").replace("-", "+").replace("*", "-") + + orig_not_in_clusters = cluster_name not in clusters + alt_not_in_clusters = alt_name not in clusters + + if orig_not_in_clusters and alt_not_in_clusters: + # if original or alternate names not already in clusters + clusters[cluster_name] = [i] + else: + if not orig_not_in_clusters: # if original name is already in clusters + clusters[cluster_name].append(i) + if not alt_not_in_clusters: # if alt name is already in clusters + clusters[alt_name].append(i) + + return clusters + # to_xmfa(clusters['123456']) + + +def merge_lcbs(lcb1, lcb2): + for num, i in enumerate(lcb1): + i["start"] = min([i["start"], lcb2[num]["start"]]) + i["end"] = max([i["end"], lcb2[num]["end"]]) + i["seq"] += lcb2[num]["seq"] + + return lcb1 + + +def resolve_clusters(clusters): + merged = [] + for lcbs in clusters: + if len(lcbs) == 1: + merged.append(lcbs[0]) + continue + merging = lcbs[0] + for lcb in lcbs[1:]: + merging = merge_lcbs(merging, lcb) + merged.append(merging) + + return merged + + +def new(clusters, lcb): + new = True + for c in clusters: + if lcb in c: + new = False + return new + + +def cluster_lcbs(lcbs, threshold): + """clusters lcbs based on how far apart they are""" + + clusters = [] + for o, i in enumerate(lcbs): + cluster = [] + + if not new(clusters, i): + continue + + cluster.append(i) + compare_against = i + + for n, j in enumerate(lcbs): + + if not new(clusters, j) or i == j or compare_against == j: + continue + + close = True + for num, k in enumerate(compare_against): + # for num, k in enumerate(i): + if j[num]["start"] - k["end"] > threshold: + close = False + + if close: + cluster.append(j) + compare_against = j + + clusters.append(cluster) + return resolve_clusters(clusters) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="process XMFA") + parser.add_argument("xmfa", type=argparse.FileType("r"), help="XMFA file") + parser.add_argument( + "threshold", + type=int, + help="maximum number of nucleotides between lcbs in a cluster", + ) + args = parser.parse_args() + + # assuming lcbs are filtered + final_lcbs = [] + lcbs_filtered_for_seq = filter_lcbs_for_seq(args.xmfa) + for i in lcbs_filtered_for_seq: + final_lcbs += cluster_lcbs(lcbs_filtered_for_seq[i], args.threshold) + to_xmfa(final_lcbs)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cluster_lcbs.xml Mon Jun 05 02:40:20 2023 +0000 @@ -0,0 +1,33 @@ +<tool id="edu.tamu.cpt.cluster_lcbs" name="Merge LCBs within a given threshold distance" version="@WRAPPER_VERSION@.0"> + <description/> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +'python $__tool_directory__/cluster_lcbs.py' +@XMFA_INPUT@ +'$threshold' +> '$output' +]]></command> + <inputs> + <expand macro="xmfa_input"/> + <param type="integer" name="threshold" value="50" label="maximum number of nucleotides between LCBs in a cluster"/> + </inputs> + <outputs> + <data format="xmfa" name="output"/> + </outputs> + <help><![CDATA[ +**What it does** + +Merges LCBs near each other into one LCB. Helps with cluttered-looking data due to multiple LCBs. + +**WARNING** + +Might not work if you have - strand genes. Need to test. + +]]></help> + <!-- TODO --> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt-macros.xml Mon Jun 05 02:40:20 2023 +0000 @@ -0,0 +1,115 @@ +<macros> + <xml name="gff_requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.12.1">requests</requirement> + <requirement type="package" version="1.2.2">cpt_gffparser</requirement> + <yield/> + </requirements> + <version_command> + <![CDATA[ + cd '$__tool_directory__' && git rev-parse HEAD + ]]> + </version_command> + </xml> + <xml name="citation/mijalisrasche"> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex">@unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-crr"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020-AJC-solo"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-clm"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="sl-citations-clm"> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </xml> +</macros>
--- a/cpt_cluster_lcbs/cluster_lcbs.py Tue Jul 05 05:23:56 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,230 +0,0 @@ -#!/usr/bin/env python -from Bio import SeqIO -import tempfile -import sys -import argparse - - -def parse_xmfa(xmfa): - """Simple XMFA parser until https://github.com/biopython/biopython/pull/544 - """ - current_lcb = [] - current_seq = {} - for line in xmfa.readlines(): - if line.startswith("#"): - continue - - if line.strip() == "=": - if "id" in current_seq: - current_lcb.append(current_seq) - current_seq = {} - yield current_lcb - current_lcb = [] - else: - line = line.strip() - if line.startswith(">"): - if "id" in current_seq: - current_lcb.append(current_seq) - current_seq = {} - data = line.strip().split() - # 0 1 2 3 4 5 - # > 1:5986-6406 + CbK.fa # CbK_gp011 - id, loc = data[1].split(":") - start, end = loc.split("-") - current_seq = { - "rid": "_".join(data[1:]), - "id": id, - "start": int(start), - "end": int(end), - "strand": 1 if data[2] == "+" else -1, - "file": data[3], - "seq": "", - "comment": "", - } - if len(data) > 5: - current_seq["comment"] = " ".join(data[5:]) - else: - current_seq["seq"] += line.strip() - - -HEADER_TPL = "> {id}:{start}-{end} {strand} {file} # {comment}\n" - - -def split_by_n(seq, n): - """A generator to divide a sequence into chunks of n units.""" - # http://stackoverflow.com/questions/9475241/split-python-string-every-nth-character - while seq: - yield seq[:n] - seq = seq[n:] - - -def to_xmfa(lcbs, handle=sys.stdout): - handle.write("#FormatVersion Mauve1\n") - for lcb in lcbs: - for aln in lcb: - handle.write( - HEADER_TPL.format( - id=aln["id"], - start=aln["start"], - end=aln["end"], - strand="+" if aln["strand"] > 0 else "-", - file=aln["file"], - comment=aln["comment"], - ) - ) - - for line in split_by_n(aln["seq"], 80): - handle.write(line + "\n") - handle.write("=\n") - - -def percent_identity(a, b): - """Calculate % identity, ignoring gaps in the host sequence - """ - match = 0 - mismatch = 0 - for char_a, char_b in zip(list(a), list(b)): - if char_a == "-": - continue - if char_a == char_b: - match += 1 - else: - mismatch += 1 - - if match + mismatch == 0: - return 0.0 - return 100 * float(match) / (match + mismatch) - - -def id_tn_dict(sequences, tmpfile=False): - """Figure out sequence IDs - """ - label_convert = {} - correct_chrom = None - if not isinstance(sequences, list): - sequences = [sequences] - - i = 0 - for sequence_file in sequences: - for record in SeqIO.parse(sequence_file, "fasta"): - if correct_chrom is None: - correct_chrom = record.id - - i += 1 - key = str(i) - label_convert[key] = {"record_id": record.id, "len": len(record.seq)} - - if tmpfile: - label_convert[key] = tempfile.NamedTemporaryFile(delete=False) - - return label_convert - - -def filter_lcbs_for_seq(xmfa): - """ clusters lcbs based on which sequences they involve """ - strand_info = {"1": "+", "-1": "-"} - clusters = {} - - for i in list(parse_xmfa(xmfa)): - cluster_name = "" - - for g in i: - cluster_name += g["id"] + strand_info[str(g["strand"])] - # allow clusters with all opposite strands to be together (alt name is opposite strand of orig) - alt_name = cluster_name.replace("+", "*").replace("-", "+").replace("*", "-") - - orig_not_in_clusters = cluster_name not in clusters - alt_not_in_clusters = alt_name not in clusters - - if orig_not_in_clusters and alt_not_in_clusters: - # if original or alternate names not already in clusters - clusters[cluster_name] = [i] - else: - if not orig_not_in_clusters: # if original name is already in clusters - clusters[cluster_name].append(i) - if not alt_not_in_clusters: # if alt name is already in clusters - clusters[alt_name].append(i) - - return clusters - # to_xmfa(clusters['123456']) - - -def merge_lcbs(lcb1, lcb2): - for num, i in enumerate(lcb1): - i["start"] = min([i["start"], lcb2[num]["start"]]) - i["end"] = max([i["end"], lcb2[num]["end"]]) - i["seq"] += lcb2[num]["seq"] - - return lcb1 - - -def resolve_clusters(clusters): - merged = [] - for lcbs in clusters: - if len(lcbs) == 1: - merged.append(lcbs[0]) - continue - merging = lcbs[0] - for lcb in lcbs[1:]: - merging = merge_lcbs(merging, lcb) - merged.append(merging) - - return merged - - -def new(clusters, lcb): - new = True - for c in clusters: - if lcb in c: - new = False - return new - - -def cluster_lcbs(lcbs, threshold): - """ clusters lcbs based on how far apart they are""" - - clusters = [] - for o, i in enumerate(lcbs): - cluster = [] - - if not new(clusters, i): - continue - - cluster.append(i) - compare_against = i - - for n, j in enumerate(lcbs): - - if not new(clusters, j) or i == j or compare_against == j: - continue - - close = True - for num, k in enumerate(compare_against): - # for num, k in enumerate(i): - if j[num]["start"] - k["end"] > threshold: - close = False - - if close: - cluster.append(j) - compare_against = j - - clusters.append(cluster) - return resolve_clusters(clusters) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="process XMFA") - parser.add_argument("xmfa", type=argparse.FileType("r"), help="XMFA file") - parser.add_argument( - "threshold", - type=int, - help="maximum number of nucleotides between lcbs in a cluster", - ) - args = parser.parse_args() - - # assuming lcbs are filtered - final_lcbs = [] - lcbs_filtered_for_seq = filter_lcbs_for_seq(args.xmfa) - for i in lcbs_filtered_for_seq: - final_lcbs += cluster_lcbs(lcbs_filtered_for_seq[i], args.threshold) - to_xmfa(final_lcbs)
--- a/cpt_cluster_lcbs/cluster_lcbs.xml Tue Jul 05 05:23:56 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -<?xml version="1.0"?> -<tool id="edu.tamu.cpt.cluster_lcbs" name="Merge LCBs within a given threshold distance" version="@WRAPPER_VERSION@.0"> - <description></description> - <macros> - <import>macros.xml</import> - <import>cpt-macros.xml</import> - </macros> - <expand macro="requirements"/> - <command detect_errors="aggressive"><![CDATA[ -python $__tool_directory__/cluster_lcbs.py -@XMFA_INPUT@ -$threshold -> $output -]]></command> - <inputs> - <expand macro="xmfa_input" /> - <param type="integer" name="threshold" value="50" label="maximum number of nucleotides between LCBs in a cluster"/> - </inputs> - <outputs> - <data format="xmfa" name="output" /> - </outputs> - <help><![CDATA[ -**What it does** - -Merges LCBs near each other into one LCB. Helps with cluttered-looking data due to multiple LCBs. - -**WARNING** - -Might not work if you have - strand genes. Need to test. - -]]></help> -<!-- TODO --> - <expand macro="citations" /> -</tool>
--- a/cpt_cluster_lcbs/cpt-macros.xml Tue Jul 05 05:23:56 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="gff_requirements"> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.65">biopython</requirement> - <requirement type="package" version="2.12.1">requests</requirement> - <yield/> - </requirements> - <version_command> - <![CDATA[ - cd $__tool_directory__ && git rev-parse HEAD - ]]> - </version_command> - </xml> - <xml name="citation/mijalisrasche"> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex">@unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - </xml> - <xml name="citations"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-crr"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Ross}, - title = {CPT Galaxy Tools}, - year = {2020-}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020-AJC-solo"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-clm"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="sl-citations-clm"> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </xml> -</macros>
--- a/cpt_cluster_lcbs/macros.xml Tue Jul 05 05:23:56 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="requirements"> - <requirements> - <requirement type="package">progressivemauve</requirement> - <requirement type="package" version="3.8.13">python</requirement> - <requirement type="package" version="1.79">biopython</requirement> - <requirement type="package" version="1.2.2">cpt_gffparser</requirement> - <yield/> - </requirements> - </xml> - <token name="@WRAPPER_VERSION@">2.4.0</token> - <xml name="citation/progressive_mauve"> - <citation type="doi">10.1371/journal.pone.0011147</citation> - </xml> - <xml name="citation/gepard"> - <citation type="doi">10.1093/bioinformatics/btm039</citation> - </xml> - - <token name="@XMFA_INPUT@"> - "$xmfa" - </token> - <xml name="xmfa_input" - token_formats="xmfa"> - <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA" /> - </xml> - - <token name="@XMFA_FA_INPUT@"> - "$sequences" - </token> - <xml name="xmfa_fa_input"> - <param type="data" format="fasta" name="sequences" label="Sequences in alignment" - help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/> - - </xml> - <xml name="genome_selector"> - <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> - </xml> - <xml name="gff3_input"> - <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> - </xml> - <xml name="input/gff3+fasta"> - <expand macro="gff3_input" /> - <expand macro="genome_selector" /> - </xml> - <token name="@INPUT_GFF@"> - "$gff3_data" - </token> - <token name="@INPUT_FASTA@"> - genomeref.fa - </token> - <token name="@GENOME_SELECTOR_PRE@"> - ln -s $genome_fasta genomeref.fa; - </token> - <token name="@GENOME_SELECTOR@"> - genomeref.fa - </token> - <xml name="input/fasta"> - <param label="Fasta file" name="sequences" type="data" format="fasta"/> - </xml> -</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 05 02:40:20 2023 +0000 @@ -0,0 +1,74 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package">progressivemauve</requirement> + <!--<requirement type="package" version="2.7">python</requirement>--> + <requirement type="package" version="0.6.4">bcbiogff</requirement> + <yield/> + </requirements> + </xml> + <token name="@WRAPPER_VERSION@">2.4.0</token> + <xml name="citation/progressive_mauve"> + <citation type="doi">10.1371/journal.pone.0011147</citation> + </xml> + <xml name="citation/gepard"> + <citation type="doi">10.1093/bioinformatics/btm039</citation> + </xml> + <token name="@XMFA_INPUT@"> + '$xmfa' + </token> + <xml name="xmfa_input" token_formats="xmfa"> + <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/> + </xml> + <token name="@XMFA_FA_INPUT@"> + '$sequences' + </token> + <xml name="xmfa_fa_input"> + <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/> + </xml> + <xml name="genome_selector"> + <conditional name="reference_genome"> + <param name="reference_genome_source" type="select" label="Reference Genome"> + <option value="history" selected="True">From History</option> + <option value="cached">Locally Cached</option> + </param> + <when value="cached"> + <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> + </when> + </conditional> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <xml name="input/gff3+fasta"> + <expand macro="gff3_input"/> + <expand macro="genome_selector"/> + </xml> + <token name="@INPUT_GFF@"> + '$gff3_data' + </token> + <token name="@INPUT_FASTA@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> + <token name="@GENOME_SELECTOR_PRE@"> + #if $reference_genome.reference_genome_source == 'history': + ln -s '$reference_genome.genome_fasta' genomeref.fa; + #end if + </token> + <token name="@GENOME_SELECTOR@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> +</macros>