comparison gff3_require_phage_start.py @ 4:5de686a70681 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:52:41 +0000
parents
children
comparison
equal deleted inserted replaced
3:67e5202e43c9 4:5de686a70681
1 #!/usr/bin/env python
2 import sys
3 import argparse
4 import logging
5 from Bio import SeqIO
6 from CPT_GFFParser import gffParse, gffWrite
7 from gff3 import feature_lambda, feature_test_type
8
9 logging.basicConfig(level=logging.INFO)
10 log = logging.getLogger(__name__)
11
12
13 def require_shinefind(gff3, fasta):
14 # Load up sequence(s) for GFF3 data
15 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
16 # Parse GFF3 records
17 for record in gffParse(gff3, base_dict=seq_dict):
18 # Reopen
19 genes = list(
20 feature_lambda(
21 record.features, feature_test_type, {"type": "gene"}, subfeatures=True
22 )
23 )
24 good_genes = []
25 for gene in genes:
26 cdss = list(
27 feature_lambda(
28 gene.sub_features,
29 feature_test_type,
30 {"type": "CDS"},
31 subfeatures=False,
32 )
33 )
34 if len(cdss) == 0:
35 continue
36
37 one_good_cds = False
38 for cds in cdss:
39 if cds.extract(record).seq[0:3].upper() in ("GTG", "ATG", "TTG"):
40 one_good_cds = True
41
42 if one_good_cds:
43 good_genes.append(gene)
44 record.features = good_genes
45 record.annotations = {}
46 yield record
47
48
49 if __name__ == "__main__":
50 parser = argparse.ArgumentParser(description="Require specific start codons")
51 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")
52 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
53 args = parser.parse_args()
54
55 for rec in require_shinefind(**vars(args)):
56 gffWrite([rec], sys.stdout)