Mercurial > repos > cpt > cpt_export_seq_unique
changeset 1:ac766d7dd641 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:41:21 +0000 |
parents | aaed5a0c774c |
children | ba370cca3857 |
files | cpt-macros.xml cpt_export_seq_unique/cpt-macros.xml cpt_export_seq_unique/gff3.py cpt_export_seq_unique/gff3_extract_sequence.py cpt_export_seq_unique/gff3_extract_sequence2.xml cpt_export_seq_unique/macros.xml gff3.py gff3_extract_sequence.py gff3_extract_sequence2.xml macros.xml |
diffstat | 10 files changed, 906 insertions(+), 835 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt-macros.xml Mon Jun 05 02:41:21 2023 +0000 @@ -0,0 +1,115 @@ +<macros> + <xml name="gff_requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.12.1">requests</requirement> + <requirement type="package" version="1.2.2">cpt_gffparser</requirement> + <yield/> + </requirements> + <version_command> + <![CDATA[ + cd '$__tool_directory__' && git rev-parse HEAD + ]]> + </version_command> + </xml> + <xml name="citation/mijalisrasche"> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex">@unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-crr"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020-AJC-solo"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-clm"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="sl-citations-clm"> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </xml> +</macros>
--- a/cpt_export_seq_unique/cpt-macros.xml Fri Jul 01 13:43:49 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="gff_requirements"> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.65">biopython</requirement> - <requirement type="package" version="2.12.1">requests</requirement> - <yield/> - </requirements> - <version_command> - <![CDATA[ - cd $__tool_directory__ && git rev-parse HEAD - ]]> - </version_command> - </xml> - <xml name="citation/mijalisrasche"> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex">@unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - </xml> - <xml name="citations"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-crr"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Ross}, - title = {CPT Galaxy Tools}, - year = {2020-}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020-AJC-solo"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-clm"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="sl-citations-clm"> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </xml> -</macros>
--- a/cpt_export_seq_unique/gff3.py Fri Jul 01 13:43:49 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,346 +0,0 @@ -import copy -import logging - -log = logging.getLogger() -log.setLevel(logging.WARN) - - -def feature_lambda( - feature_list, - test, - test_kwargs, - subfeatures=True, - parent=None, - invert=False, - recurse=True, -): - """Recursively search through features, testing each with a test function, yielding matches. - - GFF3 is a hierachical data structure, so we need to be able to recursively - search through features. E.g. if you're looking for a feature with - ID='bob.42', you can't just do a simple list comprehension with a test - case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in. - - :type feature_list: list - :param feature_list: an iterable of features - - :type test: function reference - :param test: a closure with the method signature (feature, **kwargs) where - the kwargs are those passed in the next argument. This - function should return True or False, True if the feature is - to be yielded as part of the main feature_lambda function, or - False if it is to be ignored. This function CAN mutate the - features passed to it (think "apply"). - - :type test_kwargs: dictionary - :param test_kwargs: kwargs to pass to your closure when it is called. - - :type subfeatures: boolean - :param subfeatures: when a feature is matched, should just that feature be - yielded to the caller, or should the entire sub_feature - tree for that feature be included? subfeatures=True is - useful in cases such as searching for a gene feature, - and wanting to know what RBS/Shine_Dalgarno_sequences - are in the sub_feature tree (which can be accomplished - with two feature_lambda calls). subfeatures=False is - useful in cases when you want to process (and possibly - return) the entire feature tree, such as applying a - qualifier to every single feature. - - :type invert: boolean - :param invert: Negate/invert the result of the filter. - - :rtype: yielded list - :return: Yields a list of matching features. - """ - # Either the top level set of [features] or the subfeature attribute - for feature in feature_list: - feature._parent = parent - if not parent: - # Set to self so we cannot go above root. - feature._parent = feature - test_result = test(feature, **test_kwargs) - # if (not invert and test_result) or (invert and not test_result): - if invert ^ test_result: - if not subfeatures: - feature_copy = copy.deepcopy(feature) - feature_copy.sub_features = list() - yield feature_copy - else: - yield feature - - if recurse and hasattr(feature, "sub_features"): - for x in feature_lambda( - feature.sub_features, - test, - test_kwargs, - subfeatures=subfeatures, - parent=feature, - invert=invert, - recurse=recurse, - ): - yield x - - -def fetchParent(feature): - if not hasattr(feature, "_parent") or feature._parent is None: - return feature - else: - return fetchParent(feature._parent) - - -def feature_test_true(feature, **kwargs): - return True - - -def feature_test_type(feature, **kwargs): - if "type" in kwargs: - return str(feature.type).upper() == str(kwargs["type"]).upper() - elif "types" in kwargs: - for x in kwargs["types"]: - if str(feature.type).upper() == str(x).upper(): - return True - return False - raise Exception("Incorrect feature_test_type call, need type or types") - - -def feature_test_qual_value(feature, **kwargs): - """Test qualifier values. - - For every feature, check that at least one value in - feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] - """ - if isinstance(kwargs["qualifier"], list): - for qualifier in kwargs["qualifier"]: - for attribute_value in feature.qualifiers.get(qualifier, []): - if attribute_value in kwargs["attribute_list"]: - return True - else: - for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): - if attribute_value in kwargs["attribute_list"]: - return True - return False - - -def feature_test_location(feature, **kwargs): - if "strand" in kwargs: - if feature.location.strand != kwargs["strand"]: - return False - - return feature.location.start <= kwargs["loc"] <= feature.location.end - - -def feature_test_quals(feature, **kwargs): - """ - Example:: - - a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']}) - - # Check if a contains a Note - feature_test_quals(a, {'Note': None}) # Returns True - feature_test_quals(a, {'Product': None}) # Returns False - - # Check if a contains a note with specific value - feature_test_quals(a, {'Note': ['ome']}) # Returns True - - # Check if a contains a note with specific value - feature_test_quals(a, {'Note': ['other']}) # Returns False - """ - for key in kwargs: - if key not in feature.qualifiers: - return False - - # Key is present, no value specified - if kwargs[key] is None: - return True - - # Otherwise there is a key value we're looking for. - # so we make a list of matches - matches = [] - # And check all of the feature qualifier valuse - for value in feature.qualifiers[key]: - # For that kwargs[key] value - for x in kwargs[key]: - matches.append(x in value) - - # If none matched, then we return false. - if not any(matches): - return False - - return True - - -def feature_test_contains(feature, **kwargs): - if "index" in kwargs: - return feature.location.start < kwargs["index"] < feature.location.end - elif "range" in kwargs: - return ( - feature.location.start < kwargs["range"]["start"] < feature.location.end - and feature.location.start < kwargs["range"]["end"] < feature.location.end - ) - else: - raise RuntimeError("Must use index or range keyword") - - -def get_id(feature=None, parent_prefix=None): - result = "" - if parent_prefix is not None: - result += parent_prefix + "|" - if "locus_tag" in feature.qualifiers: - result += feature.qualifiers["locus_tag"][0] - elif "gene" in feature.qualifiers: - result += feature.qualifiers["gene"][0] - elif "Gene" in feature.qualifiers: - result += feature.qualifiers["Gene"][0] - elif "product" in feature.qualifiers: - result += feature.qualifiers["product"][0] - elif "Product" in feature.qualifiers: - result += feature.qualifiers["Product"][0] - elif "Name" in feature.qualifiers: - result += feature.qualifiers["Name"][0] - else: - return feature.id - # Leaving in case bad things happen. - # result += '%s_%s_%s_%s' % ( - # feature.id, - # feature.location.start, - # feature.location.end, - # feature.location.strand - # ) - return result - - -def get_gff3_id(gene): - return gene.qualifiers.get("Name", [gene.id])[0] - - -def ensure_location_in_bounds(start=0, end=0, parent_length=0): - # This prevents frameshift errors - while start < 0: - start += 3 - while end < 0: - end += 3 - while start > parent_length: - start -= 3 - while end > parent_length: - end -= 3 - return (start, end) - - -def coding_genes(feature_list): - for x in genes(feature_list): - if ( - len( - list( - feature_lambda( - x.sub_features, - feature_test_type, - {"type": "CDS"}, - subfeatures=False, - ) - ) - ) - > 0 - ): - yield x - - -def genes(feature_list, feature_type="gene", sort=False): - """ - Simple filter to extract gene features from the feature set. - """ - - if not sort: - for x in feature_lambda( - feature_list, feature_test_type, {"type": feature_type}, subfeatures=True - ): - yield x - else: - data = list(genes(feature_list, feature_type=feature_type, sort=False)) - data = sorted(data, key=lambda feature: feature.location.start) - for x in data: - yield x - - -def wa_unified_product_name(feature): - """ - Try and figure out a name. We gave conflicting instructions, so - this isn't as trivial as it should be. Sometimes it will be in - 'product' or 'Product', othertimes in 'Name' - """ - # Manually applied tags. - protein_product = feature.qualifiers.get( - "product", feature.qualifiers.get("Product", [None]) - )[0] - - # If neither of those are available ... - if protein_product is None: - # And there's a name... - if "Name" in feature.qualifiers: - if not is_uuid(feature.qualifiers["Name"][0]): - protein_product = feature.qualifiers["Name"][0] - - return protein_product - - -def is_uuid(name): - return name.count("-") == 4 and len(name) == 36 - - -def get_rbs_from(gene): - # Normal RBS annotation types - rbs_rbs = list( - feature_lambda( - gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False - ) - ) - rbs_sds = list( - feature_lambda( - gene.sub_features, - feature_test_type, - {"type": "Shine_Dalgarno_sequence"}, - subfeatures=False, - ) - ) - # Fraking apollo - apollo_exons = list( - feature_lambda( - gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False - ) - ) - apollo_exons = [x for x in apollo_exons if len(x) < 10] - # These are more NCBI's style - regulatory_elements = list( - feature_lambda( - gene.sub_features, - feature_test_type, - {"type": "regulatory"}, - subfeatures=False, - ) - ) - rbs_regulatory = list( - feature_lambda( - regulatory_elements, - feature_test_quals, - {"regulatory_class": ["ribosome_binding_site"]}, - subfeatures=False, - ) - ) - # Here's hoping you find just one ;) - return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons - - -def nice_name(record): - """ - get the real name rather than NCBI IDs and so on. If fails, will return record.id - """ - name = record.id - likely_parental_contig = list(genes(record.features, feature_type="contig")) - if len(likely_parental_contig) == 1: - name = likely_parental_contig[0].qualifiers.get("organism", [name])[0] - return name - - -def fsort(it): - for i in sorted(it, key=lambda x: int(x.location.start)): - yield i
--- a/cpt_export_seq_unique/gff3_extract_sequence.py Fri Jul 01 13:43:49 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,275 +0,0 @@ -#!/usr/bin/env python -import sys -import argparse -import logging -import uuid -from CPT_GFFParser import gffParse, gffWrite -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -from Bio.SeqFeature import FeatureLocation, CompoundLocation -from gff3 import feature_lambda, feature_test_type, get_id - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger(__name__) - - -def main(fasta, gff3, feature_filter=None, nodesc=False): - if feature_filter == "nice_cds": - from gff2gb import gff3_to_genbank as cpt_Gff2Gbk - - - for rec in cpt_Gff2Gbk(gff3, fasta, 11): - seenList = {} - if rec.seq[0] == "?": - sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n") - exit(1) - for feat in sorted(rec.features, key=lambda x: x.location.start): - if feat.type != "CDS": - continue - - ind = 0 - if ( - str( - feat.qualifiers.get("locus_tag", get_id(feat)).replace(" ", "-") - ) - in seenList.keys() - ): - seenList[ - str( - feat.qualifiers.get("locus_tag", get_id(feat)).replace( - " ", "-" - ) - ) - ] += 1 - ind = seenList[ - str( - feat.qualifiers.get("locus_tag", get_id(feat)).replace( - " ", "-" - ) - ) - ] - else: - seenList[ - str( - feat.qualifiers.get("locus_tag", get_id(feat)).replace( - " ", "-" - ) - ) - ] = 1 - append = "" - if ind != 0: - append = "_" + str(ind) - - if nodesc: - description = "" - else: - feat.qualifiers["ID"] = [feat._ID] - product = feat.qualifiers.get("product", "") - description = "{1} [Location={0.location};ID={0.qualifiers[ID][0]}]".format( - feat, product - ) - yield [ - SeqRecord( - feat.extract(rec).seq, - id=str( - feat.qualifiers.get("locus_tag", get_id(feat)).replace( - " ", "-" - ) - ) - + append, - description=description, - ) - ] - - elif feature_filter == "unique_cds": - seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) - seen_ids = {} - - for rec in gffParse(gff3, base_dict=seq_dict): - noMatch = True - if "Alias" in rec.features[0].qualifiers.keys(): - lColumn = rec.features[0].qualifiers["Alias"][0] - else: - lColumn = "" - for x in seq_dict: - if x == rec.id or x == lColumn: - noMatch = False - if noMatch: - sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n") - exit(1) - newfeats = [] - for feat in sorted( - feature_lambda( - rec.features, feature_test_type, {"type": "CDS"}, subfeatures=False - ), - key=lambda f: f.location.start, - ): - nid = rec.id + "____" + feat.id - if nid in seen_ids: - nid = nid + "__" + uuid.uuid4().hex - feat.qualifiers["ID"] = [nid] - newfeats.append(feat) - seen_ids[nid] = True - - if nodesc: - description = "" - else: - if feat.strand == -1: - important_data = {"Location": FeatureLocation(feat.location.start + 1, feat.location.end - feat.phase, feat.strand)} - else: - important_data = {"Location": FeatureLocation(feat.location.start + 1 + feat.phase, feat.location.end, feat.strand)} - if "Name" in feat.qualifiers: - important_data["Name"] = feat.qualifiers.get("Name", [""])[0] - - description = "[{}]".format( - ";".join( - [ - "{key}={value}".format(key=k, value=v) - for (k, v) in important_data.items() - ] - ) - ) - #if feat.id == "CPT_Privateer_006.p01": - #print(feat) - #exit() - - if isinstance(feat.location, CompoundLocation): - finSeq = "" - if feat.strand == -1: - for x in feat.location.parts: - finSeq += str((rec.seq[feat.location.start: feat.location.end - feat.phase]).reverse_complement()) - else: - for x in feat.location.parts: - finSeq += str(rec.seq[feat.location.start + feat.phase: feat.location.end]) - yield [ - SeqRecord( - finSeq, - id=nid.replace(" ", "-"), - description=description, - ) - ] - elif feat.strand == -1: - yield [ - SeqRecord( - (rec.seq[feat.location.start: feat.location.end - feat.phase]).reverse_complement(), - id=nid.replace(" ", "-"), - description=description, - ) - ] - else: - yield [ - SeqRecord( - #feat.extract(rec).seq, - rec.seq[feat.location.start + feat.phase: feat.location.end], - id=nid.replace(" ", "-"), - description=description, - ) - ] - rec.features = newfeats - rec.annotations = {} - #gffWrite([rec], sys.stdout) - else: - seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) - - for rec in gffParse(gff3, base_dict=seq_dict): - noMatch = True - if "Alias" in rec.features[0].qualifiers.keys(): - lColumn = rec.features[0].qualifiers["Alias"][0] - else: - lColumn = "" - for x in seq_dict: - if x == rec.id or x == lColumn: - noMatch = False - if noMatch: - sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n") - exit(1) - for feat in sorted( - feature_lambda( - rec.features, - feature_test_type, - {"type": feature_filter}, - subfeatures=True, - ), - key=lambda f: f.location.start, - ): - id = feat.id - if len(id) == 0: - id = get_id(feat) - - if nodesc: - description = "" - else: - if feat.strand == -1: - important_data = {"Location": FeatureLocation(feat.location.start + 1, feat.location.end - feat.phase, feat.strand)} - else: - important_data = {"Location": FeatureLocation(feat.location.start + 1 + feat.phase, feat.location.end, feat.strand)} - if "Name" in feat.qualifiers: - important_data["Name"] = feat.qualifiers.get("Name", [""])[0] - - description = "[{}]".format( - ";".join( - [ - "{key}={value}".format(key=k, value=v) - for (k, v) in important_data.items() - ] - ) - ) - - if isinstance(feat.location, CompoundLocation): - finSeq = "" - if feat.strand == -1: - for x in feat.location.parts: - finSeq += str((rec.seq[x.start: x.end - feat.phase]).reverse_complement()) - else: - for x in feat.location.parts: - finSeq += str(rec.seq[x.start + feat.phase: x.end]) - yield [ - SeqRecord( - Seq(finSeq), - id=id.replace(" ", "-"), - description=description, - ) - ] - - else: - - if feat.strand == -1: - yield [ - SeqRecord( - seq=Seq(str(rec.seq[feat.location.start: feat.location.end - feat.phase])).reverse_complement(), - id=id.replace(" ", "-"), - description=description, - ) - ] - else: - yield [ - SeqRecord( - #feat.extract(rec).seq, - seq=Seq(str(rec.seq[feat.location.start + feat.phase: feat.location.end])), - id=id.replace(" ", "-"), - description=description, - ) - ] - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Export corresponding sequence in genome from GFF3", epilog="" - ) - parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") - parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File") - parser.add_argument( - "--feature_filter", default=None, help="Filter for specific feature types" - ) - parser.add_argument( - "--nodesc", action="store_true", help="Strip description field off" - ) - args = parser.parse_args() - for seq in main(**vars(args)): - #if isinstance(seq, list): - # for x in seq: - # print(type(x.seq)) - # SeqIO.write(x, sys.stdout, "fasta") - #else: - SeqIO.write(seq, sys.stdout, "fasta")
--- a/cpt_export_seq_unique/gff3_extract_sequence2.xml Fri Jul 01 13:43:49 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -<?xml version="1.0"?> -<tool id="edu.tamu.cpt.gff3.export_seq_unique" name="Feature Sequence Export Unique" version="1.2"> - <description>specially modified for sending CDSs to blast</description> - <macros> - <import>macros.xml</import> - <import>cpt-macros.xml</import> - </macros> - <expand macro="requirements"/> - <command detect_errors="aggressive"><![CDATA[ -@GENOME_SELECTOR_PRE@ - -$__tool_directory__/gff3_extract_sequence.py -@GENOME_SELECTOR@ - -@INPUT_GFF@ - ---feature_filter unique_cds -$nodesc -> $default -2> $gff3 -]]></command> - <inputs> - <expand macro="genome_selector" /> - <expand macro="gff3_input" /> - <param label="Remove description (use if blasting)" name="nodesc" type="boolean" truevalue="--nodesc" falsevalue=""/> - </inputs> - <outputs> - <data format="fasta" hidden="false" name="default"/> - <data format="gff3" hidden="false" name="gff3"/> - </outputs> - <help><![CDATA[ -**What it does** - -Extract fasta sequences from a parent genome. - ]]></help> - <expand macro="citations" /> -</tool>
--- a/cpt_export_seq_unique/macros.xml Fri Jul 01 13:43:49 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="requirements"> - <requirements> - <requirement type="package" version="3.8.13">python</requirement> - <requirement type="package" version="1.79">biopython</requirement> - <requirement type="package" version="1.2.2">cpt_gffparser</requirement> - <yield/> - </requirements> - </xml> - <token name="@BLAST_TSV@"> - "$blast_tsv" - </token> - <xml name="blast_tsv"> - <param label="Blast Results" help="TSV/tabular (25 Column)" - name="blast_tsv" type="data" format="tabular" /> - </xml> - - <token name="@BLAST_XML@"> - "$blast_xml" - </token> - <xml name="blast_xml"> - <param label="Blast Results" help="XML format" - name="blast_xml" type="data" format="blastxml" /> - </xml> - <xml name="gff3_with_fasta"> - <param label="Genome Sequences" name="fasta" type="data" format="fasta" /> - <param label="Genome Annotations" name="gff3" type="data" format="gff3" /> - </xml> - <xml name="genome_selector"> - <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> - </xml> - <xml name="gff3_input"> - <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> - </xml> - <xml name="input/gff3+fasta"> - <expand macro="gff3_input" /> - <expand macro="genome_selector" /> - </xml> - <token name="@INPUT_GFF@"> - "$gff3_data" - </token> - <token name="@INPUT_FASTA@"> - genomeref.fa - </token> - <token name="@GENOME_SELECTOR_PRE@"> - ln -s $genome_fasta genomeref.fa; - </token> - <token name="@GENOME_SELECTOR@"> - genomeref.fa - </token> - <xml name="input/fasta"> - <param label="Fasta file" name="sequences" type="data" format="fasta"/> - </xml> - - <token name="@SEQUENCE@"> - "$sequences" - </token> - <xml name="input/fasta/protein"> - <param label="Protein fasta file" name="sequences" type="data" format="fasta"/> - </xml> -</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3.py Mon Jun 05 02:41:21 2023 +0000 @@ -0,0 +1,346 @@ +import copy +import logging + +log = logging.getLogger() +log.setLevel(logging.WARN) + + +def feature_lambda( + feature_list, + test, + test_kwargs, + subfeatures=True, + parent=None, + invert=False, + recurse=True, +): + """Recursively search through features, testing each with a test function, yielding matches. + + GFF3 is a hierachical data structure, so we need to be able to recursively + search through features. E.g. if you're looking for a feature with + ID='bob.42', you can't just do a simple list comprehension with a test + case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in. + + :type feature_list: list + :param feature_list: an iterable of features + + :type test: function reference + :param test: a closure with the method signature (feature, **kwargs) where + the kwargs are those passed in the next argument. This + function should return True or False, True if the feature is + to be yielded as part of the main feature_lambda function, or + False if it is to be ignored. This function CAN mutate the + features passed to it (think "apply"). + + :type test_kwargs: dictionary + :param test_kwargs: kwargs to pass to your closure when it is called. + + :type subfeatures: boolean + :param subfeatures: when a feature is matched, should just that feature be + yielded to the caller, or should the entire sub_feature + tree for that feature be included? subfeatures=True is + useful in cases such as searching for a gene feature, + and wanting to know what RBS/Shine_Dalgarno_sequences + are in the sub_feature tree (which can be accomplished + with two feature_lambda calls). subfeatures=False is + useful in cases when you want to process (and possibly + return) the entire feature tree, such as applying a + qualifier to every single feature. + + :type invert: boolean + :param invert: Negate/invert the result of the filter. + + :rtype: yielded list + :return: Yields a list of matching features. + """ + # Either the top level set of [features] or the subfeature attribute + for feature in feature_list: + feature._parent = parent + if not parent: + # Set to self so we cannot go above root. + feature._parent = feature + test_result = test(feature, **test_kwargs) + # if (not invert and test_result) or (invert and not test_result): + if invert ^ test_result: + if not subfeatures: + feature_copy = copy.deepcopy(feature) + feature_copy.sub_features = list() + yield feature_copy + else: + yield feature + + if recurse and hasattr(feature, "sub_features"): + for x in feature_lambda( + feature.sub_features, + test, + test_kwargs, + subfeatures=subfeatures, + parent=feature, + invert=invert, + recurse=recurse, + ): + yield x + + +def fetchParent(feature): + if not hasattr(feature, "_parent") or feature._parent is None: + return feature + else: + return fetchParent(feature._parent) + + +def feature_test_true(feature, **kwargs): + return True + + +def feature_test_type(feature, **kwargs): + if "type" in kwargs: + return str(feature.type).upper() == str(kwargs["type"]).upper() + elif "types" in kwargs: + for x in kwargs["types"]: + if str(feature.type).upper() == str(x).upper(): + return True + return False + raise Exception("Incorrect feature_test_type call, need type or types") + + +def feature_test_qual_value(feature, **kwargs): + """Test qualifier values. + + For every feature, check that at least one value in + feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] + """ + if isinstance(kwargs["qualifier"], list): + for qualifier in kwargs["qualifier"]: + for attribute_value in feature.qualifiers.get(qualifier, []): + if attribute_value in kwargs["attribute_list"]: + return True + else: + for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): + if attribute_value in kwargs["attribute_list"]: + return True + return False + + +def feature_test_location(feature, **kwargs): + if "strand" in kwargs: + if feature.location.strand != kwargs["strand"]: + return False + + return feature.location.start <= kwargs["loc"] <= feature.location.end + + +def feature_test_quals(feature, **kwargs): + """ + Example:: + + a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']}) + + # Check if a contains a Note + feature_test_quals(a, {'Note': None}) # Returns True + feature_test_quals(a, {'Product': None}) # Returns False + + # Check if a contains a note with specific value + feature_test_quals(a, {'Note': ['ome']}) # Returns True + + # Check if a contains a note with specific value + feature_test_quals(a, {'Note': ['other']}) # Returns False + """ + for key in kwargs: + if key not in feature.qualifiers: + return False + + # Key is present, no value specified + if kwargs[key] is None: + return True + + # Otherwise there is a key value we're looking for. + # so we make a list of matches + matches = [] + # And check all of the feature qualifier valuse + for value in feature.qualifiers[key]: + # For that kwargs[key] value + for x in kwargs[key]: + matches.append(x in value) + + # If none matched, then we return false. + if not any(matches): + return False + + return True + + +def feature_test_contains(feature, **kwargs): + if "index" in kwargs: + return feature.location.start < kwargs["index"] < feature.location.end + elif "range" in kwargs: + return ( + feature.location.start < kwargs["range"]["start"] < feature.location.end + and feature.location.start < kwargs["range"]["end"] < feature.location.end + ) + else: + raise RuntimeError("Must use index or range keyword") + + +def get_id(feature=None, parent_prefix=None): + result = "" + if parent_prefix is not None: + result += parent_prefix + "|" + if "locus_tag" in feature.qualifiers: + result += feature.qualifiers["locus_tag"][0] + elif "gene" in feature.qualifiers: + result += feature.qualifiers["gene"][0] + elif "Gene" in feature.qualifiers: + result += feature.qualifiers["Gene"][0] + elif "product" in feature.qualifiers: + result += feature.qualifiers["product"][0] + elif "Product" in feature.qualifiers: + result += feature.qualifiers["Product"][0] + elif "Name" in feature.qualifiers: + result += feature.qualifiers["Name"][0] + else: + return feature.id + # Leaving in case bad things happen. + # result += '%s_%s_%s_%s' % ( + # feature.id, + # feature.location.start, + # feature.location.end, + # feature.location.strand + # ) + return result + + +def get_gff3_id(gene): + return gene.qualifiers.get("Name", [gene.id])[0] + + +def ensure_location_in_bounds(start=0, end=0, parent_length=0): + # This prevents frameshift errors + while start < 0: + start += 3 + while end < 0: + end += 3 + while start > parent_length: + start -= 3 + while end > parent_length: + end -= 3 + return (start, end) + + +def coding_genes(feature_list): + for x in genes(feature_list): + if ( + len( + list( + feature_lambda( + x.sub_features, + feature_test_type, + {"type": "CDS"}, + subfeatures=False, + ) + ) + ) + > 0 + ): + yield x + + +def genes(feature_list, feature_type="gene", sort=False): + """ + Simple filter to extract gene features from the feature set. + """ + + if not sort: + for x in feature_lambda( + feature_list, feature_test_type, {"type": feature_type}, subfeatures=True + ): + yield x + else: + data = list(genes(feature_list, feature_type=feature_type, sort=False)) + data = sorted(data, key=lambda feature: feature.location.start) + for x in data: + yield x + + +def wa_unified_product_name(feature): + """ + Try and figure out a name. We gave conflicting instructions, so + this isn't as trivial as it should be. Sometimes it will be in + 'product' or 'Product', othertimes in 'Name' + """ + # Manually applied tags. + protein_product = feature.qualifiers.get( + "product", feature.qualifiers.get("Product", [None]) + )[0] + + # If neither of those are available ... + if protein_product is None: + # And there's a name... + if "Name" in feature.qualifiers: + if not is_uuid(feature.qualifiers["Name"][0]): + protein_product = feature.qualifiers["Name"][0] + + return protein_product + + +def is_uuid(name): + return name.count("-") == 4 and len(name) == 36 + + +def get_rbs_from(gene): + # Normal RBS annotation types + rbs_rbs = list( + feature_lambda( + gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False + ) + ) + rbs_sds = list( + feature_lambda( + gene.sub_features, + feature_test_type, + {"type": "Shine_Dalgarno_sequence"}, + subfeatures=False, + ) + ) + # Fraking apollo + apollo_exons = list( + feature_lambda( + gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False + ) + ) + apollo_exons = [x for x in apollo_exons if len(x) < 10] + # These are more NCBI's style + regulatory_elements = list( + feature_lambda( + gene.sub_features, + feature_test_type, + {"type": "regulatory"}, + subfeatures=False, + ) + ) + rbs_regulatory = list( + feature_lambda( + regulatory_elements, + feature_test_quals, + {"regulatory_class": ["ribosome_binding_site"]}, + subfeatures=False, + ) + ) + # Here's hoping you find just one ;) + return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons + + +def nice_name(record): + """ + get the real name rather than NCBI IDs and so on. If fails, will return record.id + """ + name = record.id + likely_parental_contig = list(genes(record.features, feature_type="contig")) + if len(likely_parental_contig) == 1: + name = likely_parental_contig[0].qualifiers.get("organism", [name])[0] + return name + + +def fsort(it): + for i in sorted(it, key=lambda x: int(x.location.start)): + yield i
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3_extract_sequence.py Mon Jun 05 02:41:21 2023 +0000 @@ -0,0 +1,335 @@ +#!/usr/bin/env python +import sys +import argparse +import logging +import uuid +from CPT_GFFParser import gffParse, gffWrite +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio.SeqFeature import FeatureLocation, CompoundLocation +from gff3 import feature_lambda, feature_test_type, get_id + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def main(fasta, gff3, feature_filter=None, nodesc=False): + if feature_filter == "nice_cds": + from gff2gb import gff3_to_genbank as cpt_Gff2Gbk + + for rec in cpt_Gff2Gbk(gff3, fasta, 11): + seenList = {} + if rec.seq[0] == "?": + sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n") + exit(1) + for feat in sorted(rec.features, key=lambda x: x.location.start): + if feat.type != "CDS": + continue + + ind = 0 + if ( + str( + feat.qualifiers.get("locus_tag", get_id(feat)).replace(" ", "-") + ) + in seenList.keys() + ): + seenList[ + str( + feat.qualifiers.get("locus_tag", get_id(feat)).replace( + " ", "-" + ) + ) + ] += 1 + ind = seenList[ + str( + feat.qualifiers.get("locus_tag", get_id(feat)).replace( + " ", "-" + ) + ) + ] + else: + seenList[ + str( + feat.qualifiers.get("locus_tag", get_id(feat)).replace( + " ", "-" + ) + ) + ] = 1 + append = "" + if ind != 0: + append = "_" + str(ind) + + if nodesc: + description = "" + else: + feat.qualifiers["ID"] = [feat._ID] + product = feat.qualifiers.get("product", "") + description = ( + "{1} [Location={0.location};ID={0.qualifiers[ID][0]}]".format( + feat, product + ) + ) + yield [ + SeqRecord( + feat.extract(rec).seq, + id=str( + feat.qualifiers.get("locus_tag", get_id(feat)).replace( + " ", "-" + ) + ) + + append, + description=description, + ) + ] + + elif feature_filter == "unique_cds": + seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) + seen_ids = {} + + for rec in gffParse(gff3, base_dict=seq_dict): + noMatch = True + if "Alias" in rec.features[0].qualifiers.keys(): + lColumn = rec.features[0].qualifiers["Alias"][0] + else: + lColumn = "" + for x in seq_dict: + if x == rec.id or x == lColumn: + noMatch = False + if noMatch: + sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n") + exit(1) + newfeats = [] + for feat in sorted( + feature_lambda( + rec.features, feature_test_type, {"type": "CDS"}, subfeatures=False + ), + key=lambda f: f.location.start, + ): + nid = rec.id + "____" + feat.id + if nid in seen_ids: + nid = nid + "__" + uuid.uuid4().hex + feat.qualifiers["ID"] = [nid] + newfeats.append(feat) + seen_ids[nid] = True + + if nodesc: + description = "" + else: + if feat.strand == -1: + important_data = { + "Location": FeatureLocation( + feat.location.start + 1, + feat.location.end - feat.phase, + feat.strand, + ) + } + else: + important_data = { + "Location": FeatureLocation( + feat.location.start + 1 + feat.phase, + feat.location.end, + feat.strand, + ) + } + if "Name" in feat.qualifiers: + important_data["Name"] = feat.qualifiers.get("Name", [""])[0] + + description = "[{}]".format( + ";".join( + [ + "{key}={value}".format(key=k, value=v) + for (k, v) in important_data.items() + ] + ) + ) + # if feat.id == "CPT_Privateer_006.p01": + # print(feat) + # exit() + + if isinstance(feat.location, CompoundLocation): + finSeq = "" + if feat.strand == -1: + for x in feat.location.parts: + finSeq += str( + ( + rec.seq[ + feat.location.start : feat.location.end + - feat.phase + ] + ).reverse_complement() + ) + else: + for x in feat.location.parts: + finSeq += str( + rec.seq[ + feat.location.start + feat.phase : feat.location.end + ] + ) + yield [ + SeqRecord( + finSeq, + id=nid.replace(" ", "-"), + description=description, + ) + ] + elif feat.strand == -1: + yield [ + SeqRecord( + ( + rec.seq[ + feat.location.start : feat.location.end - feat.phase + ] + ).reverse_complement(), + id=nid.replace(" ", "-"), + description=description, + ) + ] + else: + yield [ + SeqRecord( + # feat.extract(rec).seq, + rec.seq[ + feat.location.start + feat.phase : feat.location.end + ], + id=nid.replace(" ", "-"), + description=description, + ) + ] + rec.features = newfeats + rec.annotations = {} + # gffWrite([rec], sys.stdout) + else: + seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) + + for rec in gffParse(gff3, base_dict=seq_dict): + noMatch = True + if "Alias" in rec.features[0].qualifiers.keys(): + lColumn = rec.features[0].qualifiers["Alias"][0] + else: + lColumn = "" + for x in seq_dict: + if x == rec.id or x == lColumn: + noMatch = False + if noMatch: + sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n") + exit(1) + for feat in sorted( + feature_lambda( + rec.features, + feature_test_type, + {"type": feature_filter}, + subfeatures=True, + ), + key=lambda f: f.location.start, + ): + id = feat.id + if len(id) == 0: + id = get_id(feat) + + if nodesc: + description = "" + else: + if feat.strand == -1: + important_data = { + "Location": FeatureLocation( + feat.location.start + 1, + feat.location.end - feat.phase, + feat.strand, + ) + } + else: + important_data = { + "Location": FeatureLocation( + feat.location.start + 1 + feat.phase, + feat.location.end, + feat.strand, + ) + } + if "Name" in feat.qualifiers: + important_data["Name"] = feat.qualifiers.get("Name", [""])[0] + + description = "[{}]".format( + ";".join( + [ + "{key}={value}".format(key=k, value=v) + for (k, v) in important_data.items() + ] + ) + ) + + if isinstance(feat.location, CompoundLocation): + finSeq = "" + if feat.strand == -1: + for x in feat.location.parts: + finSeq += str( + ( + rec.seq[x.start : x.end - feat.phase] + ).reverse_complement() + ) + else: + for x in feat.location.parts: + finSeq += str(rec.seq[x.start + feat.phase : x.end]) + yield [ + SeqRecord( + Seq(finSeq), + id=id.replace(" ", "-"), + description=description, + ) + ] + + else: + + if feat.strand == -1: + yield [ + SeqRecord( + seq=Seq( + str( + rec.seq[ + feat.location.start : feat.location.end + - feat.phase + ] + ) + ).reverse_complement(), + id=id.replace(" ", "-"), + description=description, + ) + ] + else: + yield [ + SeqRecord( + # feat.extract(rec).seq, + seq=Seq( + str( + rec.seq[ + feat.location.start + + feat.phase : feat.location.end + ] + ) + ), + id=id.replace(" ", "-"), + description=description, + ) + ] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Export corresponding sequence in genome from GFF3", epilog="" + ) + parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") + parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File") + parser.add_argument( + "--feature_filter", default=None, help="Filter for specific feature types" + ) + parser.add_argument( + "--nodesc", action="store_true", help="Strip description field off" + ) + args = parser.parse_args() + for seq in main(**vars(args)): + # if isinstance(seq, list): + # for x in seq: + # print(type(x.seq)) + # SeqIO.write(x, sys.stdout, "fasta") + # else: + SeqIO.write(seq, sys.stdout, "fasta")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3_extract_sequence2.xml Mon Jun 05 02:41:21 2023 +0000 @@ -0,0 +1,36 @@ +<tool id="edu.tamu.cpt.gff3.export_seq_unique" name="Feature Sequence Export Unique" version="1.2"> + <description>specially modified for sending CDSs to blast</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +@GENOME_SELECTOR_PRE@ + +'$__tool_directory__/gff3_extract_sequence.py' +@GENOME_SELECTOR@ + +@INPUT_GFF@ + +--feature_filter unique_cds +'$nodesc' +> '$default' +2> '$gff3' +]]></command> + <inputs> + <expand macro="genome_selector"/> + <expand macro="gff3_input"/> + <param label="Remove description (use if blasting)" name="nodesc" type="boolean" truevalue="--nodesc" falsevalue=""/> + </inputs> + <outputs> + <data format="fasta" hidden="false" name="default"/> + <data format="gff3" hidden="false" name="gff3"/> + </outputs> + <help><![CDATA[ +**What it does** + +Extract fasta sequences from a parent genome. + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 05 02:41:21 2023 +0000 @@ -0,0 +1,74 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package">progressivemauve</requirement> + <!--<requirement type="package" version="2.7">python</requirement>--> + <requirement type="package" version="0.6.4">bcbiogff</requirement> + <yield/> + </requirements> + </xml> + <token name="@WRAPPER_VERSION@">2.4.0</token> + <xml name="citation/progressive_mauve"> + <citation type="doi">10.1371/journal.pone.0011147</citation> + </xml> + <xml name="citation/gepard"> + <citation type="doi">10.1093/bioinformatics/btm039</citation> + </xml> + <token name="@XMFA_INPUT@"> + '$xmfa' + </token> + <xml name="xmfa_input" token_formats="xmfa"> + <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/> + </xml> + <token name="@XMFA_FA_INPUT@"> + '$sequences' + </token> + <xml name="xmfa_fa_input"> + <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/> + </xml> + <xml name="genome_selector"> + <conditional name="reference_genome"> + <param name="reference_genome_source" type="select" label="Reference Genome"> + <option value="history" selected="True">From History</option> + <option value="cached">Locally Cached</option> + </param> + <when value="cached"> + <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> + </when> + </conditional> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <xml name="input/gff3+fasta"> + <expand macro="gff3_input"/> + <expand macro="genome_selector"/> + </xml> + <token name="@INPUT_GFF@"> + '$gff3_data' + </token> + <token name="@INPUT_FASTA@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> + <token name="@GENOME_SELECTOR_PRE@"> + #if $reference_genome.reference_genome_source == 'history': + ln -s '$reference_genome.genome_fasta' genomeref.fa; + #end if + </token> + <token name="@GENOME_SELECTOR@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> +</macros>