annotate cpt_export_seq_unique/gff3_extract_sequence.py @ 0:aaed5a0c774c draft

Uploaded
author cpt
date Fri, 01 Jul 2022 13:43:49 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
1 #!/usr/bin/env python
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
2 import sys
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
3 import argparse
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
4 import logging
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
5 import uuid
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
6 from CPT_GFFParser import gffParse, gffWrite
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
7 from Bio import SeqIO
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
8 from Bio.Seq import Seq
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
9 from Bio.SeqRecord import SeqRecord
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
10 from Bio.SeqFeature import FeatureLocation, CompoundLocation
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
11 from gff3 import feature_lambda, feature_test_type, get_id
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
12
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
13 logging.basicConfig(level=logging.INFO)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
14 log = logging.getLogger(__name__)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
15
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
16
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
17 def main(fasta, gff3, feature_filter=None, nodesc=False):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
18 if feature_filter == "nice_cds":
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
19 from gff2gb import gff3_to_genbank as cpt_Gff2Gbk
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
20
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
21
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
22 for rec in cpt_Gff2Gbk(gff3, fasta, 11):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
23 seenList = {}
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
24 if rec.seq[0] == "?":
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
25 sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
26 exit(1)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
27 for feat in sorted(rec.features, key=lambda x: x.location.start):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
28 if feat.type != "CDS":
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
29 continue
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
30
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
31 ind = 0
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
32 if (
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
33 str(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
34 feat.qualifiers.get("locus_tag", get_id(feat)).replace(" ", "-")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
35 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
36 in seenList.keys()
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
37 ):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
38 seenList[
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
39 str(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
40 feat.qualifiers.get("locus_tag", get_id(feat)).replace(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
41 " ", "-"
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
42 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
43 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
44 ] += 1
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
45 ind = seenList[
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
46 str(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
47 feat.qualifiers.get("locus_tag", get_id(feat)).replace(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
48 " ", "-"
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
49 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
50 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
51 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
52 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
53 seenList[
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
54 str(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
55 feat.qualifiers.get("locus_tag", get_id(feat)).replace(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
56 " ", "-"
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
57 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
58 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
59 ] = 1
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
60 append = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
61 if ind != 0:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
62 append = "_" + str(ind)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
63
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
64 if nodesc:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
65 description = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
66 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
67 feat.qualifiers["ID"] = [feat._ID]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
68 product = feat.qualifiers.get("product", "")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
69 description = "{1} [Location={0.location};ID={0.qualifiers[ID][0]}]".format(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
70 feat, product
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
71 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
72 yield [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
73 SeqRecord(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
74 feat.extract(rec).seq,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
75 id=str(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
76 feat.qualifiers.get("locus_tag", get_id(feat)).replace(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
77 " ", "-"
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
78 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
79 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
80 + append,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
81 description=description,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
82 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
83 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
84
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
85 elif feature_filter == "unique_cds":
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
86 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
87 seen_ids = {}
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
88
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
89 for rec in gffParse(gff3, base_dict=seq_dict):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
90 noMatch = True
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
91 if "Alias" in rec.features[0].qualifiers.keys():
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
92 lColumn = rec.features[0].qualifiers["Alias"][0]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
93 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
94 lColumn = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
95 for x in seq_dict:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
96 if x == rec.id or x == lColumn:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
97 noMatch = False
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
98 if noMatch:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
99 sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
100 exit(1)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
101 newfeats = []
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
102 for feat in sorted(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
103 feature_lambda(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
104 rec.features, feature_test_type, {"type": "CDS"}, subfeatures=False
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
105 ),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
106 key=lambda f: f.location.start,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
107 ):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
108 nid = rec.id + "____" + feat.id
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
109 if nid in seen_ids:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
110 nid = nid + "__" + uuid.uuid4().hex
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
111 feat.qualifiers["ID"] = [nid]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
112 newfeats.append(feat)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
113 seen_ids[nid] = True
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
114
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
115 if nodesc:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
116 description = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
117 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
118 if feat.strand == -1:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
119 important_data = {"Location": FeatureLocation(feat.location.start + 1, feat.location.end - feat.phase, feat.strand)}
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
120 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
121 important_data = {"Location": FeatureLocation(feat.location.start + 1 + feat.phase, feat.location.end, feat.strand)}
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
122 if "Name" in feat.qualifiers:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
123 important_data["Name"] = feat.qualifiers.get("Name", [""])[0]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
124
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
125 description = "[{}]".format(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
126 ";".join(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
127 [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
128 "{key}={value}".format(key=k, value=v)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
129 for (k, v) in important_data.items()
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
130 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
131 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
132 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
133 #if feat.id == "CPT_Privateer_006.p01":
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
134 #print(feat)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
135 #exit()
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
136
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
137 if isinstance(feat.location, CompoundLocation):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
138 finSeq = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
139 if feat.strand == -1:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
140 for x in feat.location.parts:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
141 finSeq += str((rec.seq[feat.location.start: feat.location.end - feat.phase]).reverse_complement())
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
142 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
143 for x in feat.location.parts:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
144 finSeq += str(rec.seq[feat.location.start + feat.phase: feat.location.end])
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
145 yield [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
146 SeqRecord(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
147 finSeq,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
148 id=nid.replace(" ", "-"),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
149 description=description,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
150 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
151 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
152 elif feat.strand == -1:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
153 yield [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
154 SeqRecord(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
155 (rec.seq[feat.location.start: feat.location.end - feat.phase]).reverse_complement(),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
156 id=nid.replace(" ", "-"),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
157 description=description,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
158 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
159 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
160 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
161 yield [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
162 SeqRecord(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
163 #feat.extract(rec).seq,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
164 rec.seq[feat.location.start + feat.phase: feat.location.end],
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
165 id=nid.replace(" ", "-"),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
166 description=description,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
167 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
168 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
169 rec.features = newfeats
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
170 rec.annotations = {}
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
171 #gffWrite([rec], sys.stdout)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
172 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
173 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
174
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
175 for rec in gffParse(gff3, base_dict=seq_dict):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
176 noMatch = True
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
177 if "Alias" in rec.features[0].qualifiers.keys():
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
178 lColumn = rec.features[0].qualifiers["Alias"][0]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
179 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
180 lColumn = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
181 for x in seq_dict:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
182 if x == rec.id or x == lColumn:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
183 noMatch = False
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
184 if noMatch:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
185 sys.stderr.write("Error: No Fasta ID matches GFF ID '" + rec.id + "'\n")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
186 exit(1)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
187 for feat in sorted(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
188 feature_lambda(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
189 rec.features,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
190 feature_test_type,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
191 {"type": feature_filter},
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
192 subfeatures=True,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
193 ),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
194 key=lambda f: f.location.start,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
195 ):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
196 id = feat.id
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
197 if len(id) == 0:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
198 id = get_id(feat)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
199
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
200 if nodesc:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
201 description = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
202 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
203 if feat.strand == -1:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
204 important_data = {"Location": FeatureLocation(feat.location.start + 1, feat.location.end - feat.phase, feat.strand)}
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
205 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
206 important_data = {"Location": FeatureLocation(feat.location.start + 1 + feat.phase, feat.location.end, feat.strand)}
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
207 if "Name" in feat.qualifiers:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
208 important_data["Name"] = feat.qualifiers.get("Name", [""])[0]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
209
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
210 description = "[{}]".format(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
211 ";".join(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
212 [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
213 "{key}={value}".format(key=k, value=v)
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
214 for (k, v) in important_data.items()
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
215 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
216 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
217 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
218
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
219 if isinstance(feat.location, CompoundLocation):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
220 finSeq = ""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
221 if feat.strand == -1:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
222 for x in feat.location.parts:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
223 finSeq += str((rec.seq[x.start: x.end - feat.phase]).reverse_complement())
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
224 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
225 for x in feat.location.parts:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
226 finSeq += str(rec.seq[x.start + feat.phase: x.end])
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
227 yield [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
228 SeqRecord(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
229 Seq(finSeq),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
230 id=id.replace(" ", "-"),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
231 description=description,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
232 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
233 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
234
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
235 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
236
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
237 if feat.strand == -1:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
238 yield [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
239 SeqRecord(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
240 seq=Seq(str(rec.seq[feat.location.start: feat.location.end - feat.phase])).reverse_complement(),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
241 id=id.replace(" ", "-"),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
242 description=description,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
243 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
244 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
245 else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
246 yield [
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
247 SeqRecord(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
248 #feat.extract(rec).seq,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
249 seq=Seq(str(rec.seq[feat.location.start + feat.phase: feat.location.end])),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
250 id=id.replace(" ", "-"),
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
251 description=description,
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
252 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
253 ]
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
254
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
255
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
256 if __name__ == "__main__":
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
257 parser = argparse.ArgumentParser(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
258 description="Export corresponding sequence in genome from GFF3", epilog=""
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
259 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
260 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
261 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
262 parser.add_argument(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
263 "--feature_filter", default=None, help="Filter for specific feature types"
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
264 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
265 parser.add_argument(
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
266 "--nodesc", action="store_true", help="Strip description field off"
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
267 )
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
268 args = parser.parse_args()
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
269 for seq in main(**vars(args)):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
270 #if isinstance(seq, list):
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
271 # for x in seq:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
272 # print(type(x.seq))
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
273 # SeqIO.write(x, sys.stdout, "fasta")
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
274 #else:
aaed5a0c774c Uploaded
cpt
parents:
diff changeset
275 SeqIO.write(seq, sys.stdout, "fasta")