comparison cpt_lipop_conv/lipoP_to_gff3.py @ 0:adde21b6bdb3 draft

Uploaded
author cpt
date Fri, 13 May 2022 05:18:31 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:adde21b6bdb3
1 #!/usr/bin/env python
2 import sys
3 import copy
4 import argparse
5 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
6 from Bio.Seq import Seq
7 from Bio.SeqRecord import SeqRecord
8 from Bio.SeqFeature import FeatureLocation
9 from gff3 import feature_lambda, feature_test_type, get_id
10
11
12 def lipoP_gff(lipoIn, gff3In, jBrowseOut, filterSP2):
13
14 orgIDs = {}
15 orgID = ""
16
17 # Take and parse the txt output into a sequence of records
18 # Dict of X records, with the ID as key and an array Y of each cleavage site as the value,
19 for row in lipoIn:
20 if row.startswith("#"):
21 orgID = ""
22 continue
23
24 rowElem = row.split("\t")
25
26 orgID = rowElem[0]
27
28 if filterSP2:
29 if rowElem[2] == "CleavII":
30 if not (orgID in orgIDs.keys()):
31 orgIDs[orgID] = []
32 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4])))
33 else:
34 if rowElem[2] in "CleavII":
35 if not (orgID in orgIDs.keys()):
36 orgIDs[orgID] = []
37 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4])))
38
39
40 # Rebase
41 for gff in gffParse(gff3In):
42 keepSeq = []
43 for xRec in gff.features:
44 cdss = list(
45 feature_lambda(
46 xRec.sub_features,
47 feature_test_type,
48 {"type": "CDS"},
49 subfeatures=False,
50 )
51 )
52 findCleave = ""
53 cdsOff = 0
54 for cds in cdss:
55 if cds.id in orgIDs:
56 findCleave = cds.id
57 break
58 cdsOff += 1
59 if findCleave == "":
60 if not jBrowseOut:
61 keepSeq.append(xRec)
62 continue
63
64 #if jBrowseOut:
65 # xRec.sub_features = []
66
67 i = 0
68 for cleaveBase in orgIDs[findCleave]:
69 tempQuals = xRec.qualifiers.copy()
70 i += 1
71 tempQuals["ID"] = xRec.id + "_cleavage_" + str(i)
72
73 xRec.sub_features.append(
74 gffSeqFeature(
75 FeatureLocation(
76 cdss[cdsOff].location.start + (cleaveBase * 3) - 1,
77 cdss[cdsOff].location.start + (cleaveBase * 3) + 1,
78 ),
79 type="cleavage_site",
80 strand=xRec.location.strand,
81 qualifiers=tempQuals,
82 )
83 )
84 keepSeq.append(xRec)
85
86 gff.features = keepSeq
87 gffWrite([gff], sys.stdout)
88
89
90 if __name__ == "__main__":
91 parser = argparse.ArgumentParser(description="add parent gene features to CDSs")
92 parser.add_argument(
93 "lipoIn", type=argparse.FileType("r"), help="LipoP tool's .txt output"
94 )
95 parser.add_argument(
96 "gff3In", type=argparse.FileType("r"), help="GFF3 to rebase LipoP results"
97 )
98 parser.add_argument(
99 "--jBrowseOut",
100 type=bool,
101 default=False,
102 help="Prepare Output for jBrowse instance",
103 )
104 parser.add_argument(
105 "--filterSP2",
106 action='store_true',
107 help="Filter for only SPII sites",
108 )
109 args = parser.parse_args()
110 lipoP_gff(**vars(args))