Mercurial > repos > cpt > cpt_cluster_lcbs

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster_lcbs.py	Mon Jun 05 02:40:20 2023 +0000
@@ -0,0 +1,227 @@
+#!/usr/bin/env python
+from Bio import SeqIO
+import tempfile
+import sys
+import argparse
+
+
+def parse_xmfa(xmfa):
+    """Simple XMFA parser until https://github.com/biopython/biopython/pull/544"""
+    current_lcb = []
+    current_seq = {}
+    for line in xmfa.readlines():
+        if line.startswith("#"):
+            continue
+
+        if line.strip() == "=":
+            if "id" in current_seq:
+                current_lcb.append(current_seq)
+                current_seq = {}
+            yield current_lcb
+            current_lcb = []
+        else:
+            line = line.strip()
+            if line.startswith(">"):
+                if "id" in current_seq:
+                    current_lcb.append(current_seq)
+                    current_seq = {}
+                data = line.strip().split()
+                # 0 1           2 3      4 5
+                # > 1:5986-6406 + CbK.fa # CbK_gp011
+                id, loc = data[1].split(":")
+                start, end = loc.split("-")
+                current_seq = {
+                    "rid": "_".join(data[1:]),
+                    "id": id,
+                    "start": int(start),
+                    "end": int(end),
+                    "strand": 1 if data[2] == "+" else -1,
+                    "file": data[3],
+                    "seq": "",
+                    "comment": "",
+                }
+                if len(data) > 5:
+                    current_seq["comment"] = " ".join(data[5:])
+            else:
+                current_seq["seq"] += line.strip()
+
+
+HEADER_TPL = "> {id}:{start}-{end} {strand} {file} # {comment}\n"
+
+
+def split_by_n(seq, n):
+    """A generator to divide a sequence into chunks of n units."""
+    # http://stackoverflow.com/questions/9475241/split-python-string-every-nth-character
+    while seq:
+        yield seq[:n]
+        seq = seq[n:]
+
+
+def to_xmfa(lcbs, handle=sys.stdout):
+    handle.write("#FormatVersion Mauve1\n")
+    for lcb in lcbs:
+        for aln in lcb:
+            handle.write(
+                HEADER_TPL.format(
+                    id=aln["id"],
+                    start=aln["start"],
+                    end=aln["end"],
+                    strand="+" if aln["strand"] > 0 else "-",
+                    file=aln["file"],
+                    comment=aln["comment"],
+                )
+            )
+
+            for line in split_by_n(aln["seq"], 80):
+                handle.write(line + "\n")
+        handle.write("=\n")
+
+
+def percent_identity(a, b):
+    """Calculate % identity, ignoring gaps in the host sequence"""
+    match = 0
+    mismatch = 0
+    for char_a, char_b in zip(list(a), list(b)):
+        if char_a == "-":
+            continue
+        if char_a == char_b:
+            match += 1
+        else:
+            mismatch += 1
+
+    if match + mismatch == 0:
+        return 0.0
+    return 100 * float(match) / (match + mismatch)
+
+
+def id_tn_dict(sequences, tmpfile=False):
+    """Figure out sequence IDs"""
+    label_convert = {}
+    correct_chrom = None
+    if not isinstance(sequences, list):
+        sequences = [sequences]
+
+    i = 0
+    for sequence_file in sequences:
+        for record in SeqIO.parse(sequence_file, "fasta"):
+            if correct_chrom is None:
+                correct_chrom = record.id
+
+            i += 1
+            key = str(i)
+            label_convert[key] = {"record_id": record.id, "len": len(record.seq)}
+
+            if tmpfile:
+                label_convert[key] = tempfile.NamedTemporaryFile(delete=False)
+
+    return label_convert
+
+
+def filter_lcbs_for_seq(xmfa):
+    """clusters lcbs based on which sequences they involve"""
+    strand_info = {"1": "+", "-1": "-"}
+    clusters = {}
+
+    for i in list(parse_xmfa(xmfa)):
+        cluster_name = ""
+
+        for g in i:
+            cluster_name += g["id"] + strand_info[str(g["strand"])]
+        # allow clusters with all opposite strands to be together (alt name is opposite strand of orig)
+        alt_name = cluster_name.replace("+", "*").replace("-", "+").replace("*", "-")
+
+        orig_not_in_clusters = cluster_name not in clusters
+        alt_not_in_clusters = alt_name not in clusters
+
+        if orig_not_in_clusters and alt_not_in_clusters:
+            # if original or alternate names not already in clusters
+            clusters[cluster_name] = [i]
+        else:
+            if not orig_not_in_clusters:  # if original name is already in clusters
+                clusters[cluster_name].append(i)
+            if not alt_not_in_clusters:  # if alt name is already in clusters
+                clusters[alt_name].append(i)
+
+    return clusters
+    # to_xmfa(clusters['123456'])
+
+
+def merge_lcbs(lcb1, lcb2):
+    for num, i in enumerate(lcb1):
+        i["start"] = min([i["start"], lcb2[num]["start"]])
+        i["end"] = max([i["end"], lcb2[num]["end"]])
+        i["seq"] += lcb2[num]["seq"]
+
+    return lcb1
+
+
+def resolve_clusters(clusters):
+    merged = []
+    for lcbs in clusters:
+        if len(lcbs) == 1:
+            merged.append(lcbs[0])
+            continue
+        merging = lcbs[0]
+        for lcb in lcbs[1:]:
+            merging = merge_lcbs(merging, lcb)
+        merged.append(merging)
+
+    return merged
+
+
+def new(clusters, lcb):
+    new = True
+    for c in clusters:
+        if lcb in c:
+            new = False
+    return new
+
+
+def cluster_lcbs(lcbs, threshold):
+    """clusters lcbs based on how far apart they are"""
+
+    clusters = []
+    for o, i in enumerate(lcbs):
+        cluster = []
+
+        if not new(clusters, i):
+            continue
+
+        cluster.append(i)
+        compare_against = i
+
+        for n, j in enumerate(lcbs):
+
+            if not new(clusters, j) or i == j or compare_against == j:
+                continue
+
+            close = True
+            for num, k in enumerate(compare_against):
+                # for num, k in enumerate(i):
+                if j[num]["start"] - k["end"] > threshold:
+                    close = False
+
+            if close:
+                cluster.append(j)
+                compare_against = j
+
+        clusters.append(cluster)
+    return resolve_clusters(clusters)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="process XMFA")
+    parser.add_argument("xmfa", type=argparse.FileType("r"), help="XMFA file")
+    parser.add_argument(
+        "threshold",
+        type=int,
+        help="maximum number of nucleotides between lcbs in a cluster",
+    )
+    args = parser.parse_args()
+
+    # assuming lcbs are filtered
+    final_lcbs = []
+    lcbs_filtered_for_seq = filter_lcbs_for_seq(args.xmfa)
+    for i in lcbs_filtered_for_seq:
+        final_lcbs += cluster_lcbs(lcbs_filtered_for_seq[i], args.threshold)
+    to_xmfa(final_lcbs)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster_lcbs.xml	Mon Jun 05 02:40:20 2023 +0000
@@ -0,0 +1,33 @@
+<tool id="edu.tamu.cpt.cluster_lcbs" name="Merge LCBs within a given threshold distance" version="@WRAPPER_VERSION@.0">
+    <description/>
+    <macros>
+        <import>macros.xml</import>
+        <import>cpt-macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="aggressive"><![CDATA[
+'python $__tool_directory__/cluster_lcbs.py'
+@XMFA_INPUT@
+'$threshold'
+> '$output'
+]]></command>
+    <inputs>
+        <expand macro="xmfa_input"/>
+        <param type="integer" name="threshold" value="50" label="maximum number of nucleotides between LCBs in a cluster"/>
+    </inputs>
+    <outputs>
+        <data format="xmfa" name="output"/>
+    </outputs>
+    <help><![CDATA[
+**What it does**
+
+Merges LCBs near each other into one LCB. Helps with cluttered-looking data due to multiple LCBs.
+
+**WARNING**
+
+Might not work if you have - strand genes. Need to test.
+
+]]></help>
+    <!-- TODO -->
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml	Mon Jun 05 02:40:20 2023 +0000
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+		<![CDATA[
+			cd '$__tool_directory__' && git rev-parse HEAD
+		]]>
+		</version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+        <yield/>
+    </xml>
+</macros>
--- a/cpt_cluster_lcbs/cluster_lcbs.py	Tue Jul 05 05:23:56 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,230 +0,0 @@
-#!/usr/bin/env python
-from Bio import SeqIO
-import tempfile
-import sys
-import argparse
-
-
-def parse_xmfa(xmfa):
-    """Simple XMFA parser until https://github.com/biopython/biopython/pull/544
-    """
-    current_lcb = []
-    current_seq = {}
-    for line in xmfa.readlines():
-        if line.startswith("#"):
-            continue
-
-        if line.strip() == "=":
-            if "id" in current_seq:
-                current_lcb.append(current_seq)
-                current_seq = {}
-            yield current_lcb
-            current_lcb = []
-        else:
-            line = line.strip()
-            if line.startswith(">"):
-                if "id" in current_seq:
-                    current_lcb.append(current_seq)
-                    current_seq = {}
-                data = line.strip().split()
-                # 0 1           2 3      4 5
-                # > 1:5986-6406 + CbK.fa # CbK_gp011
-                id, loc = data[1].split(":")
-                start, end = loc.split("-")
-                current_seq = {
-                    "rid": "_".join(data[1:]),
-                    "id": id,
-                    "start": int(start),
-                    "end": int(end),
-                    "strand": 1 if data[2] == "+" else -1,
-                    "file": data[3],
-                    "seq": "",
-                    "comment": "",
-                }
-                if len(data) > 5:
-                    current_seq["comment"] = " ".join(data[5:])
-            else:
-                current_seq["seq"] += line.strip()
-
-
-HEADER_TPL = "> {id}:{start}-{end} {strand} {file} # {comment}\n"
-
-
-def split_by_n(seq, n):
-    """A generator to divide a sequence into chunks of n units."""
-    # http://stackoverflow.com/questions/9475241/split-python-string-every-nth-character
-    while seq:
-        yield seq[:n]
-        seq = seq[n:]
-
-
-def to_xmfa(lcbs, handle=sys.stdout):
-    handle.write("#FormatVersion Mauve1\n")
-    for lcb in lcbs:
-        for aln in lcb:
-            handle.write(
-                HEADER_TPL.format(
-                    id=aln["id"],
-                    start=aln["start"],
-                    end=aln["end"],
-                    strand="+" if aln["strand"] > 0 else "-",
-                    file=aln["file"],
-                    comment=aln["comment"],
-                )
-            )
-
-            for line in split_by_n(aln["seq"], 80):
-                handle.write(line + "\n")
-        handle.write("=\n")
-
-
-def percent_identity(a, b):
-    """Calculate % identity, ignoring gaps in the host sequence
-    """
-    match = 0
-    mismatch = 0
-    for char_a, char_b in zip(list(a), list(b)):
-        if char_a == "-":
-            continue
-        if char_a == char_b:
-            match += 1
-        else:
-            mismatch += 1
-
-    if match + mismatch == 0:
-        return 0.0
-    return 100 * float(match) / (match + mismatch)
-
-
-def id_tn_dict(sequences, tmpfile=False):
-    """Figure out sequence IDs
-    """
-    label_convert = {}
-    correct_chrom = None
-    if not isinstance(sequences, list):
-        sequences = [sequences]
-
-    i = 0
-    for sequence_file in sequences:
-        for record in SeqIO.parse(sequence_file, "fasta"):
-            if correct_chrom is None:
-                correct_chrom = record.id
-
-            i += 1
-            key = str(i)
-            label_convert[key] = {"record_id": record.id, "len": len(record.seq)}
-
-            if tmpfile:
-                label_convert[key] = tempfile.NamedTemporaryFile(delete=False)
-
-    return label_convert
-
-
-def filter_lcbs_for_seq(xmfa):
-    """ clusters lcbs based on which sequences they involve """
-    strand_info = {"1": "+", "-1": "-"}
-    clusters = {}
-
-    for i in list(parse_xmfa(xmfa)):
-        cluster_name = ""
-
-        for g in i:
-            cluster_name += g["id"] + strand_info[str(g["strand"])]
-        # allow clusters with all opposite strands to be together (alt name is opposite strand of orig)
-        alt_name = cluster_name.replace("+", "*").replace("-", "+").replace("*", "-")
-
-        orig_not_in_clusters = cluster_name not in clusters
-        alt_not_in_clusters = alt_name not in clusters
-
-        if orig_not_in_clusters and alt_not_in_clusters:
-            # if original or alternate names not already in clusters
-            clusters[cluster_name] = [i]
-        else:
-            if not orig_not_in_clusters:  # if original name is already in clusters
-                clusters[cluster_name].append(i)
-            if not alt_not_in_clusters:  # if alt name is already in clusters
-                clusters[alt_name].append(i)
-
-    return clusters
-    # to_xmfa(clusters['123456'])
-
-
-def merge_lcbs(lcb1, lcb2):
-    for num, i in enumerate(lcb1):
-        i["start"] = min([i["start"], lcb2[num]["start"]])
-        i["end"] = max([i["end"], lcb2[num]["end"]])
-        i["seq"] += lcb2[num]["seq"]
-
-    return lcb1
-
-
-def resolve_clusters(clusters):
-    merged = []
-    for lcbs in clusters:
-        if len(lcbs) == 1:
-            merged.append(lcbs[0])
-            continue
-        merging = lcbs[0]
-        for lcb in lcbs[1:]:
-            merging = merge_lcbs(merging, lcb)
-        merged.append(merging)
-
-    return merged
-
-
-def new(clusters, lcb):
-    new = True
-    for c in clusters:
-        if lcb in c:
-            new = False
-    return new
-
-
-def cluster_lcbs(lcbs, threshold):
-    """ clusters lcbs based on how far apart they are"""
-
-    clusters = []
-    for o, i in enumerate(lcbs):
-        cluster = []
-
-        if not new(clusters, i):
-            continue
-
-        cluster.append(i)
-        compare_against = i
-
-        for n, j in enumerate(lcbs):
-
-            if not new(clusters, j) or i == j or compare_against == j:
-                continue
-
-            close = True
-            for num, k in enumerate(compare_against):
-                # for num, k in enumerate(i):
-                if j[num]["start"] - k["end"] > threshold:
-                    close = False
-
-            if close:
-                cluster.append(j)
-                compare_against = j
-
-        clusters.append(cluster)
-    return resolve_clusters(clusters)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="process XMFA")
-    parser.add_argument("xmfa", type=argparse.FileType("r"), help="XMFA file")
-    parser.add_argument(
-        "threshold",
-        type=int,
-        help="maximum number of nucleotides between lcbs in a cluster",
-    )
-    args = parser.parse_args()
-
-    # assuming lcbs are filtered
-    final_lcbs = []
-    lcbs_filtered_for_seq = filter_lcbs_for_seq(args.xmfa)
-    for i in lcbs_filtered_for_seq:
-        final_lcbs += cluster_lcbs(lcbs_filtered_for_seq[i], args.threshold)
-    to_xmfa(final_lcbs)
--- a/cpt_cluster_lcbs/cluster_lcbs.xml	Tue Jul 05 05:23:56 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.cluster_lcbs" name="Merge LCBs within a given threshold distance" version="@WRAPPER_VERSION@.0">
-	<description></description>
-	<macros>
-		<import>macros.xml</import>
-		<import>cpt-macros.xml</import>
-	</macros>
-	<expand macro="requirements"/>
-	<command detect_errors="aggressive"><![CDATA[
-python $__tool_directory__/cluster_lcbs.py
-@XMFA_INPUT@
-$threshold
-> $output
-]]></command>
-	<inputs>
-		<expand macro="xmfa_input" />
-		<param type="integer" name="threshold" value="50" label="maximum number of nucleotides between LCBs in a cluster"/>
-	</inputs>
-	<outputs>
-		<data format="xmfa" name="output" />
-	</outputs>
-	<help><![CDATA[
-**What it does**
-
-Merges LCBs near each other into one LCB. Helps with cluttered-looking data due to multiple LCBs.
-
-**WARNING**
-
-Might not work if you have - strand genes. Need to test.
-
-]]></help>
-<!-- TODO -->
-		<expand macro="citations" />
-</tool>
--- a/cpt_cluster_lcbs/cpt-macros.xml	Tue Jul 05 05:23:56 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="gff_requirements">
-		<requirements>
-			<requirement type="package" version="2.7">python</requirement>
-			<requirement type="package" version="1.65">biopython</requirement>
-			<requirement type="package" version="2.12.1">requests</requirement>
-			<yield/>
-		</requirements>
-		<version_command>
-		<![CDATA[
-			cd $__tool_directory__ && git rev-parse HEAD
-		]]>
-		</version_command>
-	</xml>
-	<xml name="citation/mijalisrasche">
-		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-		<citation type="bibtex">@unpublished{galaxyTools,
-		author = {E. Mijalis, H. Rasche},
-		title = {CPT Galaxy Tools},
-		year = {2013-2017},
-		note = {https://github.com/tamu-cpt/galaxy-tools/}
-		}
-		</citation>
-	</xml>
-	<xml name="citations">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-    	<xml name="citations-crr">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Ross},
-				title = {CPT Galaxy Tools},
-				year = {2020-},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020-AJC-solo">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-clm">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="sl-citations-clm">
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-	</xml>
-</macros>
--- a/cpt_cluster_lcbs/macros.xml	Tue Jul 05 05:23:56 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="requirements">
-		<requirements>
-			<requirement type="package">progressivemauve</requirement>
-			<requirement type="package" version="3.8.13">python</requirement>
-			<requirement type="package" version="1.79">biopython</requirement>
-			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
-			<yield/>
-		</requirements>
-	</xml>
-	<token name="@WRAPPER_VERSION@">2.4.0</token>
-	<xml name="citation/progressive_mauve">
-		<citation type="doi">10.1371/journal.pone.0011147</citation>
-	</xml>
-	<xml name="citation/gepard">
-		<citation type="doi">10.1093/bioinformatics/btm039</citation>
-	</xml>
-
-	<token name="@XMFA_INPUT@">
-		"$xmfa"
-	</token>
-	<xml name="xmfa_input"
-		token_formats="xmfa">
-		<param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA" />
-	</xml>
-
-	<token name="@XMFA_FA_INPUT@">
-		"$sequences"
-	</token>
-	<xml name="xmfa_fa_input">
-		<param type="data" format="fasta" name="sequences" label="Sequences in alignment"
-			help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
-
-	</xml>
-	<xml name="genome_selector">
-		<param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
-	</xml>
-	<xml name="gff3_input">
-		<param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
-	</xml>
-	<xml name="input/gff3+fasta">
-		<expand macro="gff3_input" />
-		<expand macro="genome_selector" />
-	</xml>
-	<token name="@INPUT_GFF@">
-	"$gff3_data"
-	</token>
-	<token name="@INPUT_FASTA@">
-		genomeref.fa
-	</token>
-	<token name="@GENOME_SELECTOR_PRE@">
-		ln -s $genome_fasta genomeref.fa;
-	</token>
-	<token name="@GENOME_SELECTOR@">
-		genomeref.fa
-	</token>
-        <xml name="input/fasta">
-		<param label="Fasta file" name="sequences" type="data" format="fasta"/>
-	</xml>
-</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Jun 05 02:40:20 2023 +0000
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+		'$xmfa'
+	</token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+		'$sequences'
+	</token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+	    '$gff3_data'
+	</token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+</macros>