Mercurial > repos > cpt > cpt_req_phage_start
diff gff3_require_phage_start.py @ 4:5de686a70681 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:52:41 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3_require_phage_start.py Mon Jun 05 02:52:41 2023 +0000 @@ -0,0 +1,56 @@ +#!/usr/bin/env python +import sys +import argparse +import logging +from Bio import SeqIO +from CPT_GFFParser import gffParse, gffWrite +from gff3 import feature_lambda, feature_test_type + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def require_shinefind(gff3, fasta): + # Load up sequence(s) for GFF3 data + seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) + # Parse GFF3 records + for record in gffParse(gff3, base_dict=seq_dict): + # Reopen + genes = list( + feature_lambda( + record.features, feature_test_type, {"type": "gene"}, subfeatures=True + ) + ) + good_genes = [] + for gene in genes: + cdss = list( + feature_lambda( + gene.sub_features, + feature_test_type, + {"type": "CDS"}, + subfeatures=False, + ) + ) + if len(cdss) == 0: + continue + + one_good_cds = False + for cds in cdss: + if cds.extract(record).seq[0:3].upper() in ("GTG", "ATG", "TTG"): + one_good_cds = True + + if one_good_cds: + good_genes.append(gene) + record.features = good_genes + record.annotations = {} + yield record + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Require specific start codons") + parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") + parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations") + args = parser.parse_args() + + for rec in require_shinefind(**vars(args)): + gffWrite([rec], sys.stdout)