Mercurial > repos > cpt > cpt_req_phage_start
comparison cpt_req_phage_start/gff3_require_phage_start.py @ 0:45a5cf6b0ff4 draft
Uploaded
author | cpt |
---|---|
date | Fri, 13 May 2022 05:31:56 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:45a5cf6b0ff4 |
---|---|
1 #!/usr/bin/env python | |
2 import sys | |
3 import argparse | |
4 import logging | |
5 from Bio import SeqIO | |
6 from CPT_GFFParser import gffParse, gffWrite | |
7 from gff3 import feature_lambda, feature_test_type | |
8 | |
9 logging.basicConfig(level=logging.INFO) | |
10 log = logging.getLogger(__name__) | |
11 | |
12 | |
13 def require_shinefind(gff3, fasta): | |
14 # Load up sequence(s) for GFF3 data | |
15 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) | |
16 # Parse GFF3 records | |
17 for record in gffParse(gff3, base_dict=seq_dict): | |
18 # Reopen | |
19 genes = list( | |
20 feature_lambda( | |
21 record.features, feature_test_type, {"type": "gene"}, subfeatures=True | |
22 ) | |
23 ) | |
24 good_genes = [] | |
25 for gene in genes: | |
26 cdss = list( | |
27 feature_lambda( | |
28 gene.sub_features, | |
29 feature_test_type, | |
30 {"type": "CDS"}, | |
31 subfeatures=False, | |
32 ) | |
33 ) | |
34 if len(cdss) == 0: | |
35 continue | |
36 | |
37 one_good_cds = False | |
38 for cds in cdss: | |
39 if cds.extract(record).seq[0:3].upper() in ("GTG", "ATG", "TTG"): | |
40 one_good_cds = True | |
41 | |
42 if one_good_cds: | |
43 good_genes.append(gene) | |
44 record.features = good_genes | |
45 record.annotations = {} | |
46 yield record | |
47 | |
48 | |
49 if __name__ == "__main__": | |
50 parser = argparse.ArgumentParser(description="Require specific start codons") | |
51 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") | |
52 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations") | |
53 args = parser.parse_args() | |
54 | |
55 for rec in require_shinefind(**vars(args)): | |
56 gffWrite([rec], sys.stdout) |