annotate genome_editor.py @ 4:78ce8a1a8fd1 draft

planemo upload commit 852ac96ca53a2ffa0947e6df5e24671866b642f5
author cpt
date Sun, 23 Jul 2023 01:43:12 +0000
parents 134bb2d7cdfd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
1 #!/usr/bin/env python
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
2 import logging
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
3 import copy
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
4 import argparse
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
5 import tsv
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
6 from Bio import SeqIO
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
7 from Bio.Seq import Seq
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
8 from Bio.SeqFeature import FeatureLocation
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
9 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature, convertSeqRec
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
10 from gff3 import feature_lambda, feature_test_contains
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
11
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
12 logging.basicConfig(level=logging.INFO)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
13 log = logging.getLogger(__name__)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
14
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
15
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
16 def mutate(gff3, fasta, changes, customSeqs, new_id):
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
17 # Change Language
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
18 # - we can only accept ONE genome as an input. (TODO: support multiple?)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
19 # - we can only build ONE genome as an output. (TODO: support multiple?)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
20 # - must allow selection of various regions
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
21 # '1,1000,+ 40,100,- custom_seq_1'
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
22 try:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
23 custom_seqs = SeqIO.to_dict(SeqIO.parse(customSeqs, "fasta"))
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
24 except:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
25 custom_seqs = {}
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
26 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
27 # Pull first and onl record
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
28 rec = list(gffParse(gff3, base_dict=seq_dict))[0]
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
29 # Create a "clean" record
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
30 new_record = copy.deepcopy(rec)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
31 new_record.id = new_id
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
32 new_record.seq = Seq("")
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
33 new_record.features = []
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
34 new_record.annotations = {}
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
35 # Process changes.
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
36 chain = []
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
37 topFeats = {}
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
38 covered = 0
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
39 for feat in rec.features:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
40 if "ID" in feat.qualifiers.keys():
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
41 topFeats[feat.qualifiers["ID"][0]] = feat.location.start
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
42 for change in changes:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
43 if "," in change:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
44 (start, end, strand) = change.split(",")
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
45 start = int(start) - 1
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
46 end = int(end)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
47
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
48 # Make any complaints
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
49 broken_feature_start = list(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
50 feature_lambda(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
51 rec.features,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
52 feature_test_contains,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
53 {"index": start},
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
54 subfeatures=False,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
55 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
56 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
57 if len(broken_feature_start) > 0:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
58 pass
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
59 # log.info("DANGER: Start index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", start, broken_feature_start[0].id, broken_feature_start[0].location)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
60 broken_feature_end = list(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
61 feature_lambda(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
62 rec.features,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
63 feature_test_contains,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
64 {"index": end},
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
65 subfeatures=False,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
66 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
67 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
68 if len(broken_feature_end) > 0:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
69 pass
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
70 # log.info("DANGER: End index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", end, broken_feature_end[0].id, broken_feature_end[0].location)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
71
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
72 # Ok, fetch features
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
73 if strand == "+":
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
74 tmp_req = rec[start:end]
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
75 else:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
76 tmp_req = rec[start:end].reverse_complement(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
77 id=True,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
78 name=True,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
79 description=True,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
80 features=True,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
81 annotations=True,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
82 letter_annotations=True,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
83 dbxrefs=True,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
84 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
85 tmp_req = convertSeqRec(tmp_req)[0]
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
86
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
87 def update_location(feature, shiftS):
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
88 feature.location = FeatureLocation(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
89 feature.location.start + shiftS,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
90 feature.location.end + shiftS,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
91 feature.strand,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
92 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
93 for i in feature.sub_features:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
94 i = update_location(i, shiftS)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
95 return feature
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
96
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
97 # for feature in tmp_req.features:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
98
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
99 chain.append(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
100 [
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
101 rec.id,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
102 start + 1,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
103 end,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
104 strand,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
105 new_record.id,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
106 len(new_record) + 1,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
107 len(new_record) + (end - start),
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
108 "+",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
109 ]
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
110 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
111
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
112 covered += len(new_record.seq)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
113 print(covered)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
114 new_record.seq += tmp_req.seq
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
115 # NB: THIS MUST USE BIOPYTHON 1.67. 1.68 Removes access to
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
116 # subfeatures, which means you will only get top-level features.
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
117 startInd = len(new_record.features)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
118 new_record.features += tmp_req.features
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
119
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
120 for i in new_record.features[startInd:]:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
121 i.location = FeatureLocation(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
122 i.location.start + covered,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
123 i.location.end + covered,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
124 i.location.strand,
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
125 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
126 if "ID" not in i.qualifiers.keys():
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
127 continue
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
128 diffS = i.location.start - topFeats[i.qualifiers["ID"][0]]
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
129 subFeats = i.sub_features
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
130 for j in subFeats:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
131 j = update_location(j, diffS)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
132 else:
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
133 new_record.seq += custom_seqs[change].seq
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
134 yield new_record, chain
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
135
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
136
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
137 if __name__ == "__main__":
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
138 parser = argparse.ArgumentParser()
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
139 parser.add_argument("fasta", type=argparse.FileType("r"), help="Sequence")
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
140 parser.add_argument("gff3", type=argparse.FileType("r"), help="Annotations")
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
141 parser.add_argument("new_id", help="Append to ID", default="_v2")
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
142 parser.add_argument(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
143 "--out_fasta",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
144 type=argparse.FileType("w"),
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
145 help="Output fasta",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
146 default="out.fa",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
147 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
148 parser.add_argument(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
149 "--out_gff3",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
150 type=argparse.FileType("w"),
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
151 help="Output gff3",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
152 default="out.gff3",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
153 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
154 parser.add_argument(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
155 "--out_simpleChain",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
156 type=argparse.FileType("w"),
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
157 help="Output simple chain (i.e. not a real UCSC chain file)",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
158 default="out.chain",
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
159 )
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
160 parser.add_argument("--changes", nargs="+")
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
161 parser.add_argument("--customSeqs", type=argparse.FileType("r"))
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
162 args = parser.parse_args()
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
163
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
164 for rec, chain in mutate(
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
165 args.gff3, args.fasta, args.changes, args.customSeqs, args.new_id
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
166 ):
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
167 # TODO: Check that this appends and doesn't overwirte
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
168 gffWrite([rec], args.out_gff3)
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
169 SeqIO.write([rec], args.out_fasta, "fasta")
134bb2d7cdfd planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
170 tsv.dump(chain, args.out_simpleChain)