Mercurial > repos > cpt > cpt_lipop_conversion
comparison cpt_lipop_conv/lipoP_to_gff3.py @ 0:adde21b6bdb3 draft
Uploaded
author | cpt |
---|---|
date | Fri, 13 May 2022 05:18:31 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:adde21b6bdb3 |
---|---|
1 #!/usr/bin/env python | |
2 import sys | |
3 import copy | |
4 import argparse | |
5 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature | |
6 from Bio.Seq import Seq | |
7 from Bio.SeqRecord import SeqRecord | |
8 from Bio.SeqFeature import FeatureLocation | |
9 from gff3 import feature_lambda, feature_test_type, get_id | |
10 | |
11 | |
12 def lipoP_gff(lipoIn, gff3In, jBrowseOut, filterSP2): | |
13 | |
14 orgIDs = {} | |
15 orgID = "" | |
16 | |
17 # Take and parse the txt output into a sequence of records | |
18 # Dict of X records, with the ID as key and an array Y of each cleavage site as the value, | |
19 for row in lipoIn: | |
20 if row.startswith("#"): | |
21 orgID = "" | |
22 continue | |
23 | |
24 rowElem = row.split("\t") | |
25 | |
26 orgID = rowElem[0] | |
27 | |
28 if filterSP2: | |
29 if rowElem[2] == "CleavII": | |
30 if not (orgID in orgIDs.keys()): | |
31 orgIDs[orgID] = [] | |
32 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4]))) | |
33 else: | |
34 if rowElem[2] in "CleavII": | |
35 if not (orgID in orgIDs.keys()): | |
36 orgIDs[orgID] = [] | |
37 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4]))) | |
38 | |
39 | |
40 # Rebase | |
41 for gff in gffParse(gff3In): | |
42 keepSeq = [] | |
43 for xRec in gff.features: | |
44 cdss = list( | |
45 feature_lambda( | |
46 xRec.sub_features, | |
47 feature_test_type, | |
48 {"type": "CDS"}, | |
49 subfeatures=False, | |
50 ) | |
51 ) | |
52 findCleave = "" | |
53 cdsOff = 0 | |
54 for cds in cdss: | |
55 if cds.id in orgIDs: | |
56 findCleave = cds.id | |
57 break | |
58 cdsOff += 1 | |
59 if findCleave == "": | |
60 if not jBrowseOut: | |
61 keepSeq.append(xRec) | |
62 continue | |
63 | |
64 #if jBrowseOut: | |
65 # xRec.sub_features = [] | |
66 | |
67 i = 0 | |
68 for cleaveBase in orgIDs[findCleave]: | |
69 tempQuals = xRec.qualifiers.copy() | |
70 i += 1 | |
71 tempQuals["ID"] = xRec.id + "_cleavage_" + str(i) | |
72 | |
73 xRec.sub_features.append( | |
74 gffSeqFeature( | |
75 FeatureLocation( | |
76 cdss[cdsOff].location.start + (cleaveBase * 3) - 1, | |
77 cdss[cdsOff].location.start + (cleaveBase * 3) + 1, | |
78 ), | |
79 type="cleavage_site", | |
80 strand=xRec.location.strand, | |
81 qualifiers=tempQuals, | |
82 ) | |
83 ) | |
84 keepSeq.append(xRec) | |
85 | |
86 gff.features = keepSeq | |
87 gffWrite([gff], sys.stdout) | |
88 | |
89 | |
90 if __name__ == "__main__": | |
91 parser = argparse.ArgumentParser(description="add parent gene features to CDSs") | |
92 parser.add_argument( | |
93 "lipoIn", type=argparse.FileType("r"), help="LipoP tool's .txt output" | |
94 ) | |
95 parser.add_argument( | |
96 "gff3In", type=argparse.FileType("r"), help="GFF3 to rebase LipoP results" | |
97 ) | |
98 parser.add_argument( | |
99 "--jBrowseOut", | |
100 type=bool, | |
101 default=False, | |
102 help="Prepare Output for jBrowse instance", | |
103 ) | |
104 parser.add_argument( | |
105 "--filterSP2", | |
106 action='store_true', | |
107 help="Filter for only SPII sites", | |
108 ) | |
109 args = parser.parse_args() | |
110 lipoP_gff(**vars(args)) |