Mercurial > repos > cpt > cpt_fix_sixpack
view gff3_fix_sixpack.py @ 5:e23a13d56fce draft default tip
planemo upload commit f33bdf952d796c5d7a240b132af3c4cbd102decc
author | cpt |
---|---|
date | Fri, 05 Jan 2024 05:51:19 +0000 |
parents | efa4dfc23549 |
children |
line wrap: on
line source
#!/usr/bin/env python import sys import logging import argparse from CPT_GFFParser import gffParse, gffWrite from Bio.SeqFeature import SeqFeature from gff3 import feature_lambda, feature_test_type logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) def fixed_feature(rec): # Get all gene features to remove the mRNAs from for feature in feature_lambda( rec.features, feature_test_type, {"type": "gene"}, subfeatures=True ): gene = feature sub_features = [] # Filter out mRNA subfeatures, save other ones to new gene object. for sf in feature_lambda( feature.sub_features, feature_test_type, {"type": "mRNA"}, subfeatures=True, invert=True, ): sf.qualifiers["Parent"] = gene.qualifiers["ID"] sub_features.append(sf) # override original subfeatures with our filtered list gene.sub_features = sub_features yield gene def gff_filter(gff3): for rec in gffParse(gff3): rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start) rec.annotations = {} gffWrite([rec], sys.stdout) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Fix gene model from naive ORF caller") parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations") args = parser.parse_args() gff_filter(**vars(args))