Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/tests/Test_F_GFF3Maker.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 import unittest | |
2 import os | |
3 from commons.core.utils.FileUtils import FileUtils | |
4 from commons.core.sql.DbMySql import DbMySql | |
5 from commons.tools.GFF3Maker import GFF3Maker | |
6 | |
7 class Test_F_GFF3Maker(unittest.TestCase): | |
8 | |
9 def setUp(self): | |
10 self._projectName = "projectName" | |
11 self._iDb = DbMySql() | |
12 self._tablesFileName = "annotation_tables.txt" | |
13 self._fastaFileName = "%s_chr.fa" % self._projectName | |
14 self._fastaTableName = "%s_chr_seq" % self._projectName | |
15 self._writeFastaFile(self._fastaFileName) | |
16 self._iDb.createTable(self._fastaTableName, "seq", self._fastaFileName, True) | |
17 self._inputFileName = "%s_chr_allTEs_nr_noSSR_join.pathOrSet" % self._projectName | |
18 self._expGFFFileName = "explm_SuperContig_29_v2.gff3" | |
19 self._obsGFFFileName = "lm_SuperContig_29_v2.gff3" | |
20 self._obsGFFEmptyFileName = "lm_SuperContig_30_v2.gff3" | |
21 | |
22 def tearDown(self): | |
23 self._iDb.dropTable(self._fastaTableName) | |
24 self._iDb.dropTable(self._inputTableName) | |
25 self._iDb.close() | |
26 os.remove(self._obsGFFFileName) | |
27 os.remove(self._expGFFFileName) | |
28 os.remove(self._fastaFileName) | |
29 os.remove(self._tablesFileName) | |
30 os.remove(self._inputFileName) | |
31 | |
32 def test_run_as_script_path_with_seq_withAllFiles(self): | |
33 self._writeTablesFile("path") | |
34 self._writePathFile(self._inputFileName) | |
35 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
36 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
37 self._writeExpPathGFFFile(self._expGFFFileName) | |
38 expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3" | |
39 self._writeExpEmptyPathGFFFileWithSeq(expGFFEmptyFileName) | |
40 | |
41 cmd = "GFF3Maker.py -t %s -f %s -w -a -p"% (self._tablesFileName, self._fastaTableName) | |
42 os.system(cmd) | |
43 | |
44 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
45 self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName)) | |
46 | |
47 os.remove(expGFFEmptyFileName) | |
48 os.remove(self._obsGFFEmptyFileName) | |
49 | |
50 def test_run_as_script_path_without_seq_withAllFiles(self): | |
51 self._writeTablesFile("path") | |
52 self._writePathFile(self._inputFileName) | |
53 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
54 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
55 self._writeExpPathGFFFile_without_seq(self._expGFFFileName) | |
56 expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3" | |
57 self._writeExpEmptyPathGFFFile(expGFFEmptyFileName) | |
58 | |
59 cmd = "GFF3Maker.py -t %s -f %s -a -p" % (self._tablesFileName, self._fastaTableName) | |
60 os.system(cmd) | |
61 | |
62 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
63 self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName)) | |
64 | |
65 os.remove(expGFFEmptyFileName) | |
66 os.remove(self._obsGFFEmptyFileName) | |
67 | |
68 def test_run_without_seq(self): | |
69 self._writeTablesFile("path") | |
70 self._writePathFile(self._inputFileName) | |
71 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
72 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
73 self._writeExpPathGFFFile_without_seq(self._expGFFFileName) | |
74 | |
75 iGFF3Maker = GFF3Maker() | |
76 iGFF3Maker.setTablesFileName(self._tablesFileName) | |
77 iGFF3Maker.setInFastaName(self._fastaTableName) | |
78 iGFF3Maker.setAreMatchPartCompulsory(True) | |
79 iGFF3Maker.run() | |
80 | |
81 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
82 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName)) | |
83 | |
84 def test_run_without_seq_and_match_part_not_compulsory(self): | |
85 self._writeTablesFile("path") | |
86 self._writePathFile(self._inputFileName) | |
87 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
88 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
89 self._writeExpPathGFFFile_without_seq_and_match_part_not_comulsory(self._expGFFFileName) | |
90 | |
91 iGFF3Maker = GFF3Maker() | |
92 iGFF3Maker.setTablesFileName(self._tablesFileName) | |
93 iGFF3Maker.setInFastaName(self._fastaTableName) | |
94 iGFF3Maker.run() | |
95 | |
96 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
97 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName)) | |
98 | |
99 def test_run_as_script_pathReversed(self): | |
100 self._writeTablesFile("path") | |
101 self._writePathFileReverse(self._inputFileName) | |
102 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
103 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
104 self._writeExpPathGFFFileReversed(self._expGFFFileName) | |
105 | |
106 cmd = "GFF3Maker.py -t %s -f %s -w -p" % (self._tablesFileName, self._fastaTableName) | |
107 os.system(cmd) | |
108 | |
109 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
110 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName)) | |
111 | |
112 def test_run_as_script_set(self): | |
113 self._writeTablesFile("set") | |
114 self._writeSetFile(self._inputFileName) | |
115 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_set" % self._projectName | |
116 self._iDb.createTable(self._inputTableName, "set", self._inputFileName, True) | |
117 self._writeExpSetGFFFile(self._expGFFFileName) | |
118 | |
119 cmd = "GFF3Maker.py -t %s -f %s -w -p " % (self._tablesFileName, self._fastaTableName) | |
120 os.system(cmd) | |
121 | |
122 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
123 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName)) | |
124 | |
125 def test_run_as_script_setReversed(self): | |
126 self._writeTablesFile("set") | |
127 self._writeSetFileReverse(self._inputFileName) | |
128 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_set" % self._projectName | |
129 self._iDb.createTable(self._inputTableName, "set", self._inputFileName, True) | |
130 self._writeExpSetGFFFileReversed(self._expGFFFileName) | |
131 | |
132 cmd = "GFF3Maker.py -t %s -f %s -w -p " % (self._tablesFileName, self._fastaTableName) | |
133 os.system(cmd) | |
134 | |
135 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
136 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName)) | |
137 | |
138 def test_run_as_script_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8(self): | |
139 self._writeTablesFile("path") | |
140 self._writePathFile_withClassif(self._inputFileName) | |
141 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
142 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
143 | |
144 self._writeClassifFile("input.classif") | |
145 inputClassifTableName = "%s_classif" % self._projectName | |
146 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True) | |
147 | |
148 self._expGFFFileName = "explm_SuperContig_30_v2.gff3" | |
149 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3" | |
150 self._writeExpPathGFFFile_without_seq_withClassif(self._expGFFFileName) | |
151 | |
152 cmd = "GFF3Maker.py -t %s -f %s -i %s -p" % (self._tablesFileName, self._fastaTableName, inputClassifTableName) | |
153 os.system(cmd) | |
154 | |
155 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
156 | |
157 os.remove("input.classif") | |
158 os.remove("lm_SuperContig_29_v2.gff3") | |
159 self._iDb.dropTable(inputClassifTableName) | |
160 | |
161 def test_run_as_script_path_without_seq_withAllFilesWithTargetLength(self): | |
162 fastaFileName = "genome.fa" | |
163 self._writeFastaFileExtended(fastaFileName) | |
164 self._iDb.createTable(self._fastaTableName, "seq", fastaFileName, True) | |
165 os.remove(fastaFileName) | |
166 | |
167 self._writeTablesFile_withTESeqTables("path") | |
168 | |
169 self._writePathFile(self._inputFileName) | |
170 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
171 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
172 | |
173 inFileName2 = "%s_chr_bankBLRtx.path" % self._projectName | |
174 self._writePathFile2(inFileName2) | |
175 bankPathTableName = "%s_chr_bankBLRtx_path" % self._projectName | |
176 self._iDb.createTable(bankPathTableName, "path", inFileName2, True) | |
177 os.remove(inFileName2) | |
178 | |
179 refTEsFastaFileName = "%s_refTEs.fa" % self._projectName | |
180 self._writeRefTEsSeqFile(refTEsFastaFileName) | |
181 refTESeqTableName = "%s_refTEs_seq" % self._projectName | |
182 self._iDb.createTable(refTESeqTableName, "seq", refTEsFastaFileName, True) | |
183 os.remove(refTEsFastaFileName) | |
184 | |
185 bankBLRtxFastaFileName = "dummyRepbase.fa" | |
186 self._writeBankBLRtxSeqFile(bankBLRtxFastaFileName) | |
187 bankBLRtxTableName = "%s_bankBLRtx_nt_seq" % self._projectName | |
188 self._iDb.createTable(bankBLRtxTableName, "seq", bankBLRtxFastaFileName, True) | |
189 os.remove(bankBLRtxFastaFileName) | |
190 | |
191 self._expGFFFileName = "explm_SuperContig_29_v2.gff3" | |
192 expSeq2GFFFileName = "expChr1.gff3" | |
193 self._obsGFFFileName = "lm_SuperContig_29_v2.gff3" | |
194 obsSeq2GFFFileName = "chr1.gff3" | |
195 self._writeExpPathGFFFile_without_seq_withTargetLength_seq1(self._expGFFFileName) | |
196 self._writeExpPathGFFFile_without_seq_withTargetLength_seq2(expSeq2GFFFileName) | |
197 | |
198 cmd = "GFF3Maker.py -t %s -f %s -p" % (self._tablesFileName, self._fastaTableName) | |
199 os.system(cmd) | |
200 | |
201 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
202 self.assertTrue(FileUtils.are2FilesIdentical(expSeq2GFFFileName, obsSeq2GFFFileName)) | |
203 | |
204 os.remove(expSeq2GFFFileName) | |
205 os.remove(obsSeq2GFFFileName) | |
206 self._iDb.dropTable(refTESeqTableName) | |
207 self._iDb.dropTable(bankBLRtxTableName) | |
208 self._iDb.dropTable(bankPathTableName) | |
209 | |
210 def test_run_as_script_split_file_by_annotation_method(self): | |
211 fastaFileName = "dummyDmelChr4.fa" | |
212 self._writeFastaFile_DmelChr4(fastaFileName) | |
213 self._iDb.createTable(self._fastaTableName, "seq", fastaFileName, True) | |
214 os.remove(fastaFileName) | |
215 | |
216 self._writeTablesFile_withTESeqTables("path") | |
217 | |
218 self._writePathFile_refTEs_annotation(self._inputFileName) | |
219 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
220 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
221 | |
222 inFileName2 = "%s_chr_bankBLRtx.path" % self._projectName | |
223 self._writePathFile_bankBLRtx_annotation(inFileName2) | |
224 bankPathTableName = "%s_chr_bankBLRtx_path" % self._projectName | |
225 self._iDb.createTable(bankPathTableName, "path", inFileName2, True) | |
226 os.remove(inFileName2) | |
227 | |
228 refTEsFastaFileName = "%s_refTEs.fa" % self._projectName | |
229 self._writeRefTEsSeqFile(refTEsFastaFileName) | |
230 refTESeqTableName = "%s_refTEs_seq" % self._projectName | |
231 self._iDb.createTable(refTESeqTableName, "seq", refTEsFastaFileName, True) | |
232 os.remove(refTEsFastaFileName) | |
233 | |
234 bankBLRtxFastaFileName = "dummyRepbase.fa" | |
235 self._writeBankBLRtxSeqFile(bankBLRtxFastaFileName) | |
236 bankBLRtxTableName = "%s_bankBLRtx_nt_seq" % self._projectName | |
237 self._iDb.createTable(bankBLRtxTableName, "seq", bankBLRtxFastaFileName, True) | |
238 os.remove(bankBLRtxFastaFileName) | |
239 | |
240 self._expGFFFileName = "expdmel_chr4_Annot1.gff3" | |
241 expGFFFileName2 = "expdmel_chr1_Annot1.gff3" | |
242 expGFFFileName3 = "expdmel_chr4_Annot2.gff3" | |
243 self._obsGFFFileName = "dmel_chr4_Annot1.gff3" | |
244 obsGFFFileName2 = "dmel_chr1_Annot1.gff3" | |
245 obsGFFFileName3 = "dmel_chr4_Annot2.gff3" | |
246 self._writeExpPathGFFFile_split_file1(self._expGFFFileName) | |
247 self._writeExpPathGFFFile_split_file2(expGFFFileName2) | |
248 self._writeExpPathGFFFile_split_file3(expGFFFileName3) | |
249 | |
250 cmd = "GFF3Maker.py -t %s -f %s -s -p" % (self._tablesFileName, self._fastaTableName) | |
251 os.system(cmd) | |
252 | |
253 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
254 self.assertTrue(FileUtils.are2FilesIdentical(expGFFFileName2, obsGFFFileName2)) | |
255 self.assertTrue(FileUtils.are2FilesIdentical(expGFFFileName3, obsGFFFileName3)) | |
256 | |
257 os.remove(expGFFFileName2) | |
258 os.remove(expGFFFileName3) | |
259 os.remove(obsGFFFileName2) | |
260 os.remove(obsGFFFileName3) | |
261 self._iDb.dropTable(refTESeqTableName) | |
262 self._iDb.dropTable(bankBLRtxTableName) | |
263 self._iDb.dropTable(bankPathTableName) | |
264 | |
265 def test_run_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8_withIdenticalMatches(self): | |
266 self._writeTablesFile("path") | |
267 self._writePathFile_withClassif_withIdenticalMatches(self._inputFileName) | |
268 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
269 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
270 | |
271 self._writeClassifFile("input.classif") | |
272 inputClassifTableName = "%s_classif" % self._projectName | |
273 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True) | |
274 | |
275 self._expGFFFileName = "explm_SuperContig_30_v2.gff3" | |
276 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3" | |
277 self._writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self._expGFFFileName) | |
278 | |
279 iGFF3Maker = GFF3Maker() | |
280 iGFF3Maker.setTablesFileName(self._tablesFileName) | |
281 iGFF3Maker.setInFastaName(self._fastaTableName) | |
282 iGFF3Maker.setClassifTable(inputClassifTableName) | |
283 iGFF3Maker.setDoMergeIdenticalMatches(True) | |
284 iGFF3Maker.setAreMatchPartCompulsory(True) | |
285 iGFF3Maker.run() | |
286 | |
287 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
288 | |
289 os.remove("input.classif") | |
290 os.remove("lm_SuperContig_29_v2.gff3") | |
291 self._iDb.dropTable(inputClassifTableName) | |
292 | |
293 def test_run_as_script_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8_withIdenticalMatches(self): | |
294 self._writeTablesFile("path") | |
295 self._writePathFile_withClassif_withIdenticalMatches(self._inputFileName) | |
296 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName | |
297 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True) | |
298 | |
299 self._writeClassifFile("input.classif") | |
300 inputClassifTableName = "%s_classif" % self._projectName | |
301 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True) | |
302 | |
303 self._expGFFFileName = "explm_SuperContig_30_v2.gff3" | |
304 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3" | |
305 self._writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self._expGFFFileName) | |
306 | |
307 cmd = "GFF3Maker.py -t %s -f %s -i %s -m -p" % (self._tablesFileName, self._fastaTableName, inputClassifTableName) | |
308 os.system(cmd) | |
309 | |
310 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName)) | |
311 | |
312 os.remove("input.classif") | |
313 os.remove("lm_SuperContig_29_v2.gff3") | |
314 self._iDb.dropTable(inputClassifTableName) | |
315 | |
316 def _writeRefTEsSeqFile(self, inFileName): | |
317 with open(inFileName, "w") as f: | |
318 f.write(">DTX-incomp_DmelChr4-L-B1-Map3\n") | |
319 f.write("ATCGATCGTT\n") | |
320 f.write(">DTX-incomp_DmelChr4-B-P0.0-Map3\n") | |
321 f.write("GCTAGCTA\n") | |
322 | |
323 def _writeBankBLRtxSeqFile(self, inFileName): | |
324 with open(inFileName, "w") as f: | |
325 f.write(">PROTOP_B:classII:TIR\n") | |
326 f.write("ATCGATCGTT\n") | |
327 f.write(">DMRT1C:classI:?\n") | |
328 f.write("GCTAGCTA\n") | |
329 f.write(">BATUMI_I:classI:LTR_retrotransposon\n") | |
330 f.write("GCTAATGGCATA\n") | |
331 | |
332 def _writeExpPathGFFFile_without_seq_withTargetLength_seq1(self, inFileName): | |
333 with open(inFileName, "w") as f: | |
334 f.write("##gff-version 3\n") | |
335 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n") | |
336 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;TargetLength=10;Identity=30.56\n") | |
337 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n") | |
338 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;TargetLength=10;Identity=23.99\n") | |
339 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n") | |
340 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;TargetLength=8\n") | |
341 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
342 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
343 | |
344 def _writeExpPathGFFFile_without_seq_withTargetLength_seq2(self, inFileName): | |
345 with open(inFileName, "w") as f: | |
346 f.write("##gff-version 3\n") | |
347 f.write("##sequence-region chr1 1 18\n") | |
348 f.write("chr1\tprojectName_REPET_TEs\tmatch\t1\t100\t0.0\t-\t.\tID=ms1_chr1_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 85 228;TargetLength=10;Identity=30.56\n") | |
349 f.write("chr1\tprojectName_REPET_TEs\tmatch_part\t1\t100\t1e-20\t-\t.\tID=mp1-1_chr1_PROTOP_B:classII:TIR;Parent=ms1_chr1_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 85 228;Identity=30.56\n") | |
350 f.write("chr1\tprojectName_REPET_TEs\tmatch\t100\t1000\t0.0\t-\t.\tID=ms2_chr1_DMRT1C:classI:?;Target=DMRT1C:classI:? 85 228;TargetLength=8;Identity=30.56\n") | |
351 f.write("chr1\tprojectName_REPET_TEs\tmatch_part\t100\t1000\t1e-30\t-\t.\tID=mp2-1_chr1_DMRT1C:classI:?;Parent=ms2_chr1_DMRT1C:classI:?;Target=DMRT1C:classI:? 85 228;Identity=30.56\n") | |
352 | |
353 def _writeExpPathGFFFile_split_file1(self, inFileName): | |
354 with open(inFileName, 'w') as f: | |
355 f.write("##gff-version 3\n") | |
356 f.write("##sequence-region dmel_chr4 1 18\n") | |
357 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4630\t4889\t0.0\t+\t.\tID=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 232 512;TargetLength=8\n" % self._projectName) | |
358 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4630\t4704\t0.0\t+\t.\tID=mp21-1_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 232 312;Identity=84.4417\n" % self._projectName) | |
359 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4837\t4889\t0.0\t+\t.\tID=mp21-2_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 456 512;Identity=84.4417\n" % self._projectName) | |
360 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4364\t4611\t0.0\t+\t.\tID=ms6_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;TargetLength=10;Identity=91.24\n" % self._projectName) | |
361 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4364\t4611\t0.0\t+\t.\tID=mp6-1_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms6_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;Identity=91.24\n" % self._projectName) | |
362 | |
363 def _writeExpPathGFFFile_split_file2(self, inFileName): | |
364 with open(inFileName, 'w') as f: | |
365 f.write("##gff-version 3\n") | |
366 f.write("##sequence-region dmel_chr1 1 25\n") | |
367 f.write("dmel_chr1\t%s_REPET_TEs\tmatch\t4364\t4611\t0.0\t+\t.\tID=ms35_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;TargetLength=10;Identity=91.24\n" % self._projectName) | |
368 f.write("dmel_chr1\t%s_REPET_TEs\tmatch_part\t4364\t4611\t0.0\t+\t.\tID=mp35-1_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms35_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;Identity=91.24\n" % self._projectName) | |
369 | |
370 def _writeExpPathGFFFile_split_file3(self, inFileName): | |
371 with open(inFileName, 'w') as f: | |
372 f.write("##gff-version 3\n") | |
373 f.write("##sequence-region dmel_chr4 1 18\n") | |
374 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t3143\t4364\t0.0\t-\t.\tID=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 24 2206;TargetLength=8\n" % self._projectName) | |
375 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3143\t3361\t0.0\t-\t.\tID=mp66-1_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1988 2206;Identity=89.7202\n" % self._projectName) | |
376 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3345\t3410\t0.0\t-\t.\tID=mp66-2_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1654 1719;Identity=54.55\n" % self._projectName) | |
377 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3448\t3572\t0.0\t-\t.\tID=mp66-3_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1495 1619;Identity=77.3433\n" % self._projectName) | |
378 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3564\t3693\t0.0\t-\t.\tID=mp66-4_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1380 1509;Identity=83.7306\n" % self._projectName) | |
379 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3705\t3857\t0.0\t-\t.\tID=mp66-5_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 539 691;Identity=84.5762\n" % self._projectName) | |
380 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3861\t3992\t0.0\t-\t.\tID=mp66-6_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 403 534;Identity=67.1422\n" % self._projectName) | |
381 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3985\t4106\t0.0\t-\t.\tID=mp66-7_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 290 411;Identity=76.612\n" % self._projectName) | |
382 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4102\t4248\t0.0\t-\t.\tID=mp66-8_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 139 285;Identity=75.3027\n" % self._projectName) | |
383 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4236\t4364\t0.0\t-\t.\tID=mp66-9_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 24 152;Identity=66.6657\n" % self._projectName) | |
384 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4412\t4889\t0.0\t-\t.\tID=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 553 1082;TargetLength=10\n" % self._projectName) | |
385 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4412\t4501\t5e-108\t-\t.\tID=mp27-1_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 993 1082;Identity=91.3066\n" % self._projectName) | |
386 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4483\t4652\t9e-146\t-\t.\tID=mp27-2_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 821 990;Identity=86.5572\n" % self._projectName) | |
387 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4672\t4775\t9e-146\t-\t.\tID=mp27-3_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 684 787;Identity=88.8013\n" % self._projectName) | |
388 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4782\t4889\t9e-141\t-\t.\tID=mp27-4_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 553 661;Identity=53.7067\n" % self._projectName) | |
389 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4917\t5195\t0.0\t+\t.\tID=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7030 7303;TargetLength=12\n" % self._projectName) | |
390 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4917\t5007\t2e-144\t+\t.\tID=mp141-1_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Parent=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7030 7120;Identity=94.1756\n" % self._projectName) | |
391 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4976\t5195\t2e-144\t+\t.\tID=mp141-2_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Parent=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7087 7303;Identity=82.2343\n" % self._projectName) | |
392 | |
393 def _writeClassifFile(self, inputFileName): | |
394 f = open(inputFileName, "w") | |
395 f.write("PotentialHostGene-chim_fTest05105818-B-G11-Map20\t1240\t+\tPotentialChimeric\tNA\tPotentialHostGene\tNA\tCI=100; coding=(HG_BLRn: FBtr0089196_Dmel_r4.3: 95.65%); other=(TE_BLRtx: PROTOP:classII:TIR: 12.03%, PROTOP_A:classII:TIR: 49.14%; TermRepeats: termTIR: 49; SSRCoverage=0.25<0.75)\n") | |
396 f.write("DTX-comp-chim_fTest05105818-B-G7-Map3_reversed\t1944\t-\tPotentialChimeric\tII\tTIR\tcomplete\tCI=33; coding=(TE_BLRtx: PROTOP:classII:TIR: 12.77%, PROTOP_A:classII:TIR: 25.16%, PROTOP_A:classII:TIR: 100.00%); struct=(TElength: <700bps; TermRepeats: termTIR: 844); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 29.48%; SSRCoverage=0.24<0.75)\n") | |
397 f.write("DTX-incomp_fTest05105818-B-G9-Map3_reversed\t1590\t-\tok\tII\tTIR\tincomplete\tCI=33; coding=(TE_BLRtx: PROTOP:classII:TIR: 10.92%, PROTOP:classII:TIR: 11.03%, PROTOP_A:classII:TIR: 55.20%); struct=(TElength: >700bps); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60%; SSRCoverage=0.21<0.75)\n") | |
398 f.write("DTX-incomp_fTest05105818-B-P0.0-Map3\t1042\t.\tok\tII\tTIR\tincomplete\tCI=50; coding=(TE_BLRtx: PROTOP:classII:TIR: 17.39%, PROTOP_A:classII:TIR: 22.17%); struct=(TElength: >700bps; TermRepeats: termTIR: 50); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 47.22%; SSRCoverage=0.25<0.75)\n") | |
399 f.write("DTX-comp_fTest05105818-B-P1.0-Map9_reversed\t1137\t-\tok\tII\tTIR\tcomplete\tCI=50; coding=(TE_BLRtx: PROTOP:classII:TIR: 6.70%, PROTOP_A:classII:TIR: 66.43%, PROTOP_B:classII:TIR: 6.42%); struct=(TElength: >700bps; TermRepeats: termTIR: 52); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 51.19%; SSRCoverage=0.22<0.75)\n") | |
400 f.write("RLX-incomp_fTest05105818-B-R12-Map3_reversed\t2284\t-\tok\tI\tLTR\tincomplete\tCI=28; coding=(TE_BLRtx: ROOA_I:classI:LTR_retrotransposon: 27.57%, ROOA_LTR:classI:LTR_retrotransposon: 94.56%; TE_BLRx: BEL11_AGp:classI:LTR_retrotransposon: 19.47%, BEL2-I_Dmoj_1p:classI:LTR_retrotransposon: 11.49%); struct=(TElength: >700bps); other=(SSRCoverage=0.07<0.75)\n") | |
401 f.write("DTX-incomp_fTest05105818-B-R19-Map4\t705\t+\tok\tII\tTIR\tincomplete\tCI=66; coding=(TE_BLRtx: TC1-2_DM:classII:TIR: 42.70%; TE_BLRx: TC1-2_DMp:classII:TIR: 41.18%); struct=(TElength: >700bps); other=(SSRCoverage=0.14<0.75)\n") | |
402 f.write("DHX-incomp_fTest05105818-B-R1-Map4\t2367\t.\tok\tII\tHelitron\tincomplete\tCI=20; coding=(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00%, DNAREP1_DYak:classII:Helitron: 9.08%); struct=(TElength: >700bps); other=(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52%; SSRCoverage=0.18<0.75)\n") | |
403 f.write("noCat_fTest05105818-B-R2-Map6\t4638\t.\tok\tnoCat\tnoCat\tNA\tCI=NA; coding=(HG_BLRn: FBtr0089179_Dmel_r4.3: 73.65%); struct=(SSRCoverage=0.05<0.75)\n") | |
404 f.write("PotentialHostGene-chim_fTest05105818-B-R4-Map5_reversed\t1067\t-\tPotentialChimeric\tNA\tPotentialHostGene\tNA\tCI=100; coding=(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%); other=(TE_BLRtx: PROTOP:classII:TIR: 13.06%, PROTOP_A:classII:TIR: 37.47%; SSRCoverage=0.27<0.75)\n") | |
405 f.write("DTX-incomp_fTest05105818-B-R9-Map3_reversed\t714\t-\tok\tII\tTIR\tincomplete\tCI=66; coding=(TE_BLRtx: TC1_DM:classII:TIR: 40.88%; TE_BLRx: Tc1-1_TCa_1p:classII:TIR: 30.18%, Tc1-3_FR_1p:classII:TIR: 9.97%); struct=(TElength: >700bps); other=(SSRCoverage=0.08<0.75)\n") | |
406 f.close() | |
407 | |
408 def _writePathFile_withClassif(self,inFileName): | |
409 f = open(inFileName,'w') | |
410 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030\t228\t85\t1e-40\t84\t30.56\n') | |
411 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030\t522\t229\t1e-40\t106\t23.99\n') | |
412 f.write('3\tlm_SuperContig_30_v2\t78081\t78088\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n') | |
413 f.write('3\tlm_SuperContig_30_v2\t78089\t78588\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n') | |
414 f.write('4\tlm_SuperContig_30_v2\t88031\t88080\tDTX-incomp_fTest05105818-B-G1-Map3\t370\t420\t3e-23\t101\t31.89\n') | |
415 f.write('5\tlm_SuperContig_30_v2\t108588\t108081\tDTX-incomp_fTest05105818-B-G9-Map3_reversed\t590\t820\t3e-24\t101\t32.89\n') | |
416 f.write('6\tlm_SuperContig_30_v2\t118081\t118588\tPotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed\t154\t289\t3e-25\t101\t33.89\n') | |
417 f.write('7\tlm_SuperContig_30_v2\t288031\t288080\tnoCat_Blc22_fTest05105818-B-R2-Map6\t1900\t2090\t3e-26\t101\t34.89\n') | |
418 f.close() | |
419 | |
420 def _writeExpPathGFFFile_without_seq_withClassif(self, inFileName): | |
421 f = open(inFileName, 'w') | |
422 f.write("##gff-version 3\n") | |
423 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n") | |
424 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t78081\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;TargetDescription=CI:20 coding:(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00% | DNAREP1_DYak:classII:Helitron: 9.08%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52% SSRCoverage:0.18<0.75)\n") | |
425 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78088\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209;Identity=30.89\n") | |
426 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78089\t78588\t3e-22\t+\t.\tID=mp3-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350;Identity=35.89\n") | |
427 | |
428 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t88031\t88080\t0.0\t+\t.\tID=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n") | |
429 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t88031\t88080\t3e-23\t+\t.\tID=mp4-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n") | |
430 | |
431 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t108081\t108588\t0.0\t-\t.\tID=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;TargetDescription=CI:33 coding:(TE_BLRtx: PROTOP:classII:TIR: 10.92% | PROTOP:classII:TIR: 11.03% | PROTOP_A:classII:TIR: 55.20%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60% SSRCoverage:0.21<0.75);Identity=32.89\n") | |
432 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t108081\t108588\t3e-24\t-\t.\tID=mp5-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Parent=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;Identity=32.89\n") | |
433 | |
434 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t118081\t118588\t0.0\t+\t.\tID=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;TargetDescription=CI:100 coding:(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%) other:(TE_BLRtx: PROTOP:classII:TIR: 13.06% | PROTOP_A:classII:TIR: 37.47% SSRCoverage:0.27<0.75);Identity=33.89\n") | |
435 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t118081\t118588\t3e-25\t+\t.\tID=mp6-1_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Parent=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;Identity=33.89\n") | |
436 | |
437 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms7_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Target=noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090;TargetDescription=CI:NA coding:(HG_BLRn: FBtr0089179_Dmel_r4.3: 73.65%) struct:(SSRCoverage:0.05<0.75);Identity=34.89\n") | |
438 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp7-1_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Parent=ms7_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Target=noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090;Identity=34.89\n") | |
439 | |
440 f.close() | |
441 | |
442 def _writePathFile_withClassif_withIdenticalMatches(self,inFileName): | |
443 f = open(inFileName,'w') | |
444 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030\t228\t85\t1e-40\t84\t30.56\n') | |
445 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030\t522\t229\t1e-40\t106\t23.99\n') | |
446 f.write('3\tlm_SuperContig_30_v2\t78081\t78088\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n') | |
447 f.write('3\tlm_SuperContig_30_v2\t78089\t78588\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n') | |
448 f.write('4\tlm_SuperContig_30_v2\t88031\t88080\tDTX-incomp_fTest05105818-B-G1-Map3\t370\t420\t3e-23\t101\t31.89\n') | |
449 f.write('5\tlm_SuperContig_30_v2\t108588\t108081\tDTX-incomp_fTest05105818-B-G9-Map3_reversed\t590\t820\t3e-24\t101\t32.89\n') | |
450 f.write('6\tlm_SuperContig_30_v2\t118081\t118588\tPotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed\t154\t289\t3e-25\t101\t33.89\n') | |
451 | |
452 f.write('7\tlm_SuperContig_30_v2\t288031\t288080\tnoCat_Blc22_fTest05105818-B-R2-Map6\t1900\t2090\t3e-26\t101\t34.89\n') | |
453 f.write('8\tlm_SuperContig_30_v2\t288031\t288080\tDTX-incomp_fTest05105818-B-P0.0-Map3\t100\t190\t3e-26\t101\t39.89\n') | |
454 f.write('9\tlm_SuperContig_30_v2\t288031\t288080\tRLX-incomp_fTest05105818-B-R12-Map3_reversed\t1100\t1290\t3e-26\t101\t40.89\n') | |
455 f.write('10\tlm_SuperContig_30_v2\t288031\t288080\tPotentialHostGene-chim_fTest05105818-B-G11-Map20\t990\t1890\t3e-26\t101\t38.09\n') | |
456 | |
457 f.write('11\tlm_SuperContig_30_v2\t288031\t288080\tDTX-incomp_fTest05105818-B-G1-Map3\t990\t1890\t3e-26\t301\t38.09\n') | |
458 | |
459 f.write('12\tlm_SuperContig_30_v2\t388031\t388080\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n') | |
460 f.write('12\tlm_SuperContig_30_v2\t388081\t388380\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n') | |
461 | |
462 f.write('13\tlm_SuperContig_30_v2\t388031\t388080\tDTX-incomp_fTest05105818-B-P0.0-Map3\t119\t309\t3e-21\t101\t30.89\n') | |
463 f.write('13\tlm_SuperContig_30_v2\t388081\t388380\tDTX-incomp_fTest05105818-B-P0.0-Map3\t250\t450\t3e-22\t101\t35.89\n') | |
464 f.close() | |
465 | |
466 def _writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self, inFileName): | |
467 f = open(inFileName, 'w') | |
468 f.write("##gff-version 3\n") | |
469 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n") | |
470 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t78081\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;TargetDescription=CI:20 coding:(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00% | DNAREP1_DYak:classII:Helitron: 9.08%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52% SSRCoverage:0.18<0.75)\n") | |
471 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78088\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209;Identity=30.89\n") | |
472 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78089\t78588\t3e-22\t+\t.\tID=mp3-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350;Identity=35.89\n") | |
473 | |
474 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t88031\t88080\t0.0\t+\t.\tID=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n") | |
475 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t88031\t88080\t3e-23\t+\t.\tID=mp4-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n") | |
476 | |
477 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t108081\t108588\t0.0\t-\t.\tID=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;TargetDescription=CI:33 coding:(TE_BLRtx: PROTOP:classII:TIR: 10.92% | PROTOP:classII:TIR: 11.03% | PROTOP_A:classII:TIR: 55.20%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60% SSRCoverage:0.21<0.75);Identity=32.89\n") | |
478 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t108081\t108588\t3e-24\t-\t.\tID=mp5-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Parent=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;Identity=32.89\n") | |
479 | |
480 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t118081\t118588\t0.0\t+\t.\tID=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;TargetDescription=CI:100 coding:(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%) other:(TE_BLRtx: PROTOP:classII:TIR: 13.06% | PROTOP_A:classII:TIR: 37.47% SSRCoverage:0.27<0.75);Identity=33.89\n") | |
481 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t118081\t118588\t3e-25\t+\t.\tID=mp6-1_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Parent=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;Identity=33.89\n") | |
482 | |
483 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms10_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Target=PotentialHostGene-chim_fTest05105818-B-G11-Map20 990 1890;OtherTargets=RLX-incomp_fTest05105818-B-R12-Map3_reversed 1100 1290, DTX-incomp_fTest05105818-B-P0.0-Map3 100 190, noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090\n") | |
484 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp10-1_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Parent=ms10_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Target=PotentialHostGene-chim_fTest05105818-B-G11-Map20 990 1890\n") | |
485 | |
486 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms11_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 990 1890;Identity=38.09\n") | |
487 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp11-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms11_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 990 1890;Identity=38.09\n") | |
488 | |
489 #TODO: | |
490 #Should this case really occur : If merging multiple match-parts, the current behaviour needs to be fixed to get correct subject start/end coordinates | |
491 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t388031\t388380\t0.0\t+\t.\tID=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;OtherTargets=DTX-incomp_fTest05105818-B-P0.0-Map3 119 309\n") | |
492 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t388031\t388080\t3e-21\t+\t.\tID=mp12-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209\n") | |
493 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t388081\t388380\t3e-22\t+\t.\tID=mp12-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350\n") | |
494 f.close() | |
495 | |
496 def _writeTablesFile_withTESeqTables(self, tableType): | |
497 tableFile = open( self._tablesFileName, "w" ) | |
498 string = "%s_REPET_TEs\t%s\t%s_chr_allTEs_nr_noSSR_join_%s\t%s_refTEs_seq\n" % (self._projectName, tableType, self._projectName, tableType, self._projectName) | |
499 tableFile.write(string) | |
500 string = "%s_REPET_TEs\t%s\t%s_chr_bankBLRtx_%s\t%s_bankBLRtx_nt_seq\n" % (self._projectName, tableType, self._projectName, tableType, self._projectName) | |
501 tableFile.write(string) | |
502 tableFile.close() | |
503 | |
504 def _writeTablesFile(self, tableType): | |
505 tableFile = open( self._tablesFileName, "w" ) | |
506 string = "%s_REPET_TEs\t%s\t%s_chr_allTEs_nr_noSSR_join_%s\n" % (self._projectName, tableType, self._projectName, tableType) | |
507 tableFile.write(string) | |
508 tableFile.close() | |
509 | |
510 def _writePathFile(self,inFileName): | |
511 f = open(inFileName,'w') | |
512 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\tDTX-incomp_DmelChr4-L-B1-Map3\t228\t85\t1e-40\t84\t30.56\n') | |
513 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\tDTX-incomp_DmelChr4-L-B1-Map3\t522\t229\t1e-40\t106\t23.99\n') | |
514 f.write('3\tlm_SuperContig_29_v2\t78031\t78080\tDTX-incomp_DmelChr4-B-P0.0-Map3\t19\t209\t3e-21\t101\t30.89\n') | |
515 f.write('3\tlm_SuperContig_29_v2\t78081\t78588\tDTX-incomp_DmelChr4-B-P0.0-Map3\t19\t209\t3e-21\t101\t30.89\n') | |
516 f.close() | |
517 | |
518 def _writePathFile2(self,inFileName): | |
519 f = open(inFileName,'w') | |
520 f.write('1\tchr1\t1\t100\tPROTOP_B:classII:TIR\t228\t85\t1e-20\t84\t30.56\n') | |
521 f.write('2\tchr1\t100\t1000\tDMRT1C:classI:?\t228\t85\t1e-30\t84\t30.56\n') | |
522 f.close() | |
523 | |
524 def _writePathFile_refTEs_annotation(self,inFileName): | |
525 f = open(inFileName,'w') | |
526 f.write('6\tdmel_chr4\t4364\t4611\tDTX-incomp_DmelChr4-L-B1-Map3\t854\t1150\t0\t1475\t91.24\n') | |
527 f.write('21\tdmel_chr4\t4630\t4704\tDTX-incomp_DmelChr4-B-P0.0-Map3\t232\t312\t0\t65\t84.4417\n') | |
528 f.write('21\tdmel_chr4\t4837\t4889\tDTX-incomp_DmelChr4-B-P0.0-Map3\t456\t512\t0\t46\t84.4417\n') | |
529 f.write('35\tdmel_chr1\t4364\t4611\tDTX-incomp_DmelChr4-L-B1-Map3\t854\t1150\t0\t1475\t91.24\n') | |
530 f.close() | |
531 | |
532 def _writePathFile_bankBLRtx_annotation(self,inFileName): | |
533 f = open(inFileName, 'w') | |
534 f.write('27\tdmel_chr4\t4412\t4501\tPROTOP_B:classII:TIR\t1082\t993\t5e-108\t702\t91.3066\n') | |
535 f.write('27\tdmel_chr4\t4483\t4652\tPROTOP_B:classII:TIR\t990\t821\t9e-146\t707\t86.5572\n') | |
536 f.write('27\tdmel_chr4\t4672\t4775\tPROTOP_B:classII:TIR\t787\t684\t9e-146\t707\t88.8013\n') | |
537 f.write('27\tdmel_chr4\t4782\t4889\tPROTOP_B:classII:TIR\t661\t553\t9e-141\t356\t53.7067\n') | |
538 f.write('66\tdmel_chr4\t3143\t3361\tDMRT1C:classI:?\t2206\t1988\t0\t1878\t89.7202\n') | |
539 f.write('66\tdmel_chr4\t3345\t3410\tDMRT1C:classI:?\t1719\t1654\t0\t313\t54.55\n') | |
540 f.write('66\tdmel_chr4\t3448\t3572\tDMRT1C:classI:?\t1619\t1495\t0\t1252\t77.3433\n') | |
541 f.write('66\tdmel_chr4\t3564\t3693\tDMRT1C:classI:?\t1509\t1380\t0\t1565\t83.7306\n') | |
542 f.write('66\tdmel_chr4\t3705\t3857\tDMRT1C:classI:?\t691\t539\t0\t1252\t84.5762\n') | |
543 f.write('66\tdmel_chr4\t3861\t3992\tDMRT1C:classI:?\t534\t403\t0\t1565\t67.1422\n') | |
544 f.write('66\tdmel_chr4\t3985\t4106\tDMRT1C:classI:?\t411\t290\t0\t1252\t76.612\n') | |
545 f.write('66\tdmel_chr4\t4102\t4248\tDMRT1C:classI:?\t285\t139\t0\t1565\t75.3027\n') | |
546 f.write('66\tdmel_chr4\t4236\t4364\tDMRT1C:classI:?\t152\t24\t0\t1565\t66.6657\n') | |
547 f.write('141\tdmel_chr4\t4917\t5007\tBATUMI_I:classI:LTR_retrotransposon\t7030\t7120\t2e-144\t984\t94.1756\n') | |
548 f.write('141\tdmel_chr4\t4976\t5195\tBATUMI_I:classI:LTR_retrotransposon\t7087\t7303\t2e-144\t2098\t82.2343\n') | |
549 f.close() | |
550 | |
551 def _writePathFileReverse(self,inFileName): | |
552 f = open(inFileName,'w') | |
553 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein)\t228\t85\t1e-40\t84\t30.56\n') | |
554 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein)\t522\t229\t1e-40\t106\t23.99\n') | |
555 f.write('3\tlm_SuperContig_29_v2\t78080\t78031\txnc164_090 related to multidrug resistance protein\t19\t209\t3e-21\t101\t30.89\n') | |
556 f.write('3\tlm_SuperContig_29_v2\t78588\t78081\txnc164_090 related to multidrug resistance protein\t19\t209\t3e-21\t101\t30.89\n') | |
557 f.close() | |
558 | |
559 def _writeSetFile(self,inFileName): | |
560 f = open(inFileName,'w') | |
561 f.write('1\tset1\tlm_SuperContig_29_v2\t193781\t194212\n') | |
562 f.write('2\tset2\tlm_SuperContig_29_v2\t192832\t193704\n') | |
563 f.write('3\tset3\tlm_SuperContig_29_v2\t78031\t78080\n') | |
564 f.write('3\tset3\tlm_SuperContig_29_v2\t78081\t78588\n') | |
565 f.close() | |
566 | |
567 def _writeSetFileReverse(self,inFileName): | |
568 f = open(inFileName,'w') | |
569 f.write('1\tset1\tlm_SuperContig_29_v2\t193781\t194212\n') | |
570 f.write('2\tset2\tlm_SuperContig_29_v2\t192832\t193704\n') | |
571 f.write('3\tset3\tlm_SuperContig_29_v2\t78080\t78031\n') | |
572 f.write('3\tset3\tlm_SuperContig_29_v2\t78588\t78081\n') | |
573 f.close() | |
574 | |
575 def _writeExpEmptyPathGFFFile(self, inFileName): | |
576 f = open(inFileName, 'w') | |
577 f.write("##gff-version 3\n") | |
578 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n") | |
579 f.close() | |
580 | |
581 def _writeExpPathGFFFile(self, inFileName): | |
582 f = open(inFileName, 'w') | |
583 f.write("##gff-version 3\n") | |
584 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n") | |
585 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n") | |
586 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n") | |
587 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n") | |
588 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n") | |
589 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n") | |
590 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
591 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
592 f.write("##FASTA\n") | |
593 self._writeSeq1(f) | |
594 f.close() | |
595 | |
596 def _writeExpEmptyPathGFFFileWithSeq(self, inFileName): | |
597 f = open(inFileName, 'w') | |
598 f.write("##gff-version 3\n") | |
599 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n") | |
600 f.write("##FASTA\n") | |
601 self._writeSeq2(f) | |
602 f.close() | |
603 | |
604 def _writeExpPathGFFFile_without_seq(self, inFileName): | |
605 f = open(inFileName, 'w') | |
606 f.write("##gff-version 3\n") | |
607 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n") | |
608 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n") | |
609 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n") | |
610 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n") | |
611 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n") | |
612 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n") | |
613 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
614 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
615 f.close() | |
616 | |
617 def _writeExpPathGFFFile_without_seq_and_match_part_not_comulsory(self, inFileName): | |
618 f = open(inFileName, 'w') | |
619 f.write("##gff-version 3\n") | |
620 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n") | |
621 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n") | |
622 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n") | |
623 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n") | |
624 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
625 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n") | |
626 f.close() | |
627 | |
628 def _writeExpPathGFFFileReversed(self, inFileName): | |
629 f = open(inFileName, 'w') | |
630 f.write("##gff-version 3\n") | |
631 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n") | |
632 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 85 228;Identity=30.56\n") | |
633 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Parent=ms1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 85 228;Identity=30.56\n") | |
634 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 229 522;Identity=23.99\n") | |
635 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Parent=ms2_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 229 522;Identity=23.99\n") | |
636 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t-\t.\tID=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209\n") | |
637 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t-\t.\tID=mp3-1_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Parent=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209;Identity=30.89\n") | |
638 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t-\t.\tID=mp3-2_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Parent=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209;Identity=30.89\n") | |
639 f.write("##FASTA\n") | |
640 self._writeSeq1(f) | |
641 f.close() | |
642 | |
643 def _writeExpSetGFFFile(self, inFileName): | |
644 f = open(inFileName, 'w') | |
645 f.write("##gff-version 3\n") | |
646 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n") | |
647 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t+\t.\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n") | |
648 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t0.0\t+\t.\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n") | |
649 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t+\t.\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n") | |
650 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t0.0\t+\t.\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n") | |
651 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\n") | |
652 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t0.0\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\n") | |
653 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t0.0\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\n") | |
654 f.write("##FASTA\n") | |
655 self._writeSeq1(f) | |
656 f.close() | |
657 | |
658 def _writeExpSetGFFFileReversed(self, inFileName): | |
659 f = open(inFileName, 'w') | |
660 f.write("##gff-version 3\n") | |
661 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n") | |
662 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t+\t.\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n") | |
663 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t0.0\t+\t.\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n") | |
664 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t+\t.\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n") | |
665 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t0.0\t+\t.\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n") | |
666 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t-\t.\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\n") | |
667 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t0.0\t-\t.\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\n") | |
668 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t0.0\t-\t.\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\n") | |
669 f.write("##FASTA\n") | |
670 self._writeSeq1(f) | |
671 f.close() | |
672 | |
673 def _writeFastaFile(self, inFileName): | |
674 f = open(inFileName,'w') | |
675 self._writeSeq2(f) | |
676 self._writeSeq1(f) | |
677 f.close() | |
678 | |
679 def _writeFastaFileExtended(self, inFileName): | |
680 f = open(inFileName,'w') | |
681 self._writeSeq2(f) | |
682 self._writeSeq1(f) | |
683 f.write(">chr1\n") | |
684 f.write("CTAAGCTGCGCTATGTAG\n") | |
685 f.close() | |
686 | |
687 def _writeSeq1(self, f): | |
688 f.write('>lm_SuperContig_29_v2\n') | |
689 f.write('CCTAGACAATTAATTATAATAATTAATAAACTATTAGGCTAGTAGTAGGTAATAATAAAA\n') | |
690 f.write('GGATTACTACTAAGCTGCGCTATGTAGATATTTAAAACATGTGGCTTAGGCAAGAGTATA\n') | |
691 | |
692 def _writeSeq2(self, f): | |
693 f.write('>lm_SuperContig_30_v2\n') | |
694 f.write('TGTTCATATTCATAGGATGGAGCTAGTAAGCGATGTCGGCTTAGCTCATCCACATGAATG\n') | |
695 f.write('CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACACGAGCTGGGTTAAAT\n') | |
696 | |
697 def _writeFastaFile_DmelChr4(self, inFileName): | |
698 f = open(inFileName,'w') | |
699 f.write(">dmel_chr4\n") | |
700 f.write("CTAAGCTGCGCTATGTAG\n") | |
701 f.write(">dmel_chr1\n") | |
702 f.write("CGTAACGCTAGCGCTTATAGTGAGC\n") | |
703 f.close() | |
704 | |
705 | |
706 if __name__ == "__main__": | |
707 unittest.main() |