0
|
1 #!/usr/bin/env python
|
|
2 import sys
|
|
3 import logging
|
|
4 import argparse
|
|
5 from CPT_GFFParser import gffParse, gffWrite
|
|
6 from Bio.SeqFeature import SeqFeature
|
|
7 from gff3 import feature_lambda, feature_test_type
|
|
8
|
|
9 logging.basicConfig(level=logging.INFO)
|
|
10 log = logging.getLogger(__name__)
|
|
11
|
|
12
|
|
13 def fixed_feature(rec):
|
|
14 # Get all gene features to remove the mRNAs from
|
|
15 for feature in feature_lambda(
|
|
16 rec.features, feature_test_type, {"type": "gene"}, subfeatures=True
|
|
17 ):
|
|
18 gene = feature
|
|
19 sub_features = []
|
|
20 # Filter out mRNA subfeatures, save other ones to new gene object.
|
|
21 for sf in feature_lambda(
|
|
22 feature.sub_features,
|
|
23 feature_test_type,
|
|
24 {"type": "mRNA"},
|
|
25 subfeatures=True,
|
|
26 invert=True,
|
|
27 ):
|
|
28 sf.qualifiers["Parent"] = gene.qualifiers["ID"]
|
|
29 sub_features.append(sf)
|
|
30 # override original subfeatures with our filtered list
|
|
31 gene.sub_features = sub_features
|
|
32 yield gene
|
|
33
|
|
34
|
|
35 def gff_filter(gff3):
|
|
36 for rec in gffParse(gff3):
|
|
37 rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start)
|
|
38 rec.annotations = {}
|
|
39 gffWrite([rec], sys.stdout)
|
|
40
|
|
41
|
|
42 if __name__ == "__main__":
|
|
43 parser = argparse.ArgumentParser(description="Fix gene model from naive ORF caller")
|
|
44 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
|
|
45 args = parser.parse_args()
|
|
46 gff_filter(**vars(args))
|