0
|
1 #!/usr/bin/env python
|
|
2 import sys
|
|
3 import copy
|
|
4 import argparse
|
|
5 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
|
|
6 from Bio.Seq import Seq
|
|
7 from Bio.SeqRecord import SeqRecord
|
|
8 from Bio.SeqFeature import FeatureLocation
|
|
9 from gff3 import feature_lambda, feature_test_type, get_id
|
|
10
|
|
11
|
|
12 def lipoP_gff(lipoIn, gff3In, jBrowseOut, filterSP2):
|
|
13
|
|
14 orgIDs = {}
|
|
15 orgID = ""
|
|
16
|
|
17 # Take and parse the txt output into a sequence of records
|
|
18 # Dict of X records, with the ID as key and an array Y of each cleavage site as the value,
|
|
19 for row in lipoIn:
|
|
20 if row.startswith("#"):
|
|
21 orgID = ""
|
|
22 continue
|
|
23
|
|
24 rowElem = row.split("\t")
|
|
25
|
|
26 orgID = rowElem[0]
|
|
27
|
|
28 if filterSP2:
|
|
29 if rowElem[2] == "CleavII":
|
|
30 if not (orgID in orgIDs.keys()):
|
|
31 orgIDs[orgID] = []
|
|
32 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4])))
|
|
33 else:
|
|
34 if rowElem[2] in "CleavII":
|
|
35 if not (orgID in orgIDs.keys()):
|
|
36 orgIDs[orgID] = []
|
|
37 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4])))
|
|
38
|
|
39
|
|
40 # Rebase
|
|
41 for gff in gffParse(gff3In):
|
|
42 keepSeq = []
|
|
43 for xRec in gff.features:
|
|
44 cdss = list(
|
|
45 feature_lambda(
|
|
46 xRec.sub_features,
|
|
47 feature_test_type,
|
|
48 {"type": "CDS"},
|
|
49 subfeatures=False,
|
|
50 )
|
|
51 )
|
|
52 findCleave = ""
|
|
53 cdsOff = 0
|
|
54 for cds in cdss:
|
|
55 if cds.id in orgIDs:
|
|
56 findCleave = cds.id
|
|
57 break
|
|
58 cdsOff += 1
|
|
59 if findCleave == "":
|
|
60 if not jBrowseOut:
|
|
61 keepSeq.append(xRec)
|
|
62 continue
|
|
63
|
|
64 #if jBrowseOut:
|
|
65 # xRec.sub_features = []
|
|
66
|
|
67 i = 0
|
|
68 for cleaveBase in orgIDs[findCleave]:
|
|
69 tempQuals = xRec.qualifiers.copy()
|
|
70 i += 1
|
|
71 tempQuals["ID"] = xRec.id + "_cleavage_" + str(i)
|
|
72
|
|
73 xRec.sub_features.append(
|
|
74 gffSeqFeature(
|
|
75 FeatureLocation(
|
|
76 cdss[cdsOff].location.start + (cleaveBase * 3) - 1,
|
|
77 cdss[cdsOff].location.start + (cleaveBase * 3) + 1,
|
|
78 ),
|
|
79 type="cleavage_site",
|
|
80 strand=xRec.location.strand,
|
|
81 qualifiers=tempQuals,
|
|
82 )
|
|
83 )
|
|
84 keepSeq.append(xRec)
|
|
85
|
|
86 gff.features = keepSeq
|
|
87 gffWrite([gff], sys.stdout)
|
|
88
|
|
89
|
|
90 if __name__ == "__main__":
|
|
91 parser = argparse.ArgumentParser(description="add parent gene features to CDSs")
|
|
92 parser.add_argument(
|
|
93 "lipoIn", type=argparse.FileType("r"), help="LipoP tool's .txt output"
|
|
94 )
|
|
95 parser.add_argument(
|
|
96 "gff3In", type=argparse.FileType("r"), help="GFF3 to rebase LipoP results"
|
|
97 )
|
|
98 parser.add_argument(
|
|
99 "--jBrowseOut",
|
|
100 type=bool,
|
|
101 default=False,
|
|
102 help="Prepare Output for jBrowse instance",
|
|
103 )
|
|
104 parser.add_argument(
|
|
105 "--filterSP2",
|
|
106 action='store_true',
|
|
107 help="Filter for only SPII sites",
|
|
108 )
|
|
109 args = parser.parse_args()
|
|
110 lipoP_gff(**vars(args))
|