comparison commons/tools/tests/Test_F_GFF3Maker.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 import unittest
2 import os
3 from commons.core.utils.FileUtils import FileUtils
4 from commons.core.sql.DbMySql import DbMySql
5 from commons.tools.GFF3Maker import GFF3Maker
6
7 class Test_F_GFF3Maker(unittest.TestCase):
8
9 def setUp(self):
10 self._projectName = "projectName"
11 self._iDb = DbMySql()
12 self._tablesFileName = "annotation_tables.txt"
13 self._fastaFileName = "%s_chr.fa" % self._projectName
14 self._fastaTableName = "%s_chr_seq" % self._projectName
15 self._writeFastaFile(self._fastaFileName)
16 self._iDb.createTable(self._fastaTableName, "seq", self._fastaFileName, True)
17 self._inputFileName = "%s_chr_allTEs_nr_noSSR_join.pathOrSet" % self._projectName
18 self._expGFFFileName = "explm_SuperContig_29_v2.gff3"
19 self._obsGFFFileName = "lm_SuperContig_29_v2.gff3"
20 self._obsGFFEmptyFileName = "lm_SuperContig_30_v2.gff3"
21
22 def tearDown(self):
23 self._iDb.dropTable(self._fastaTableName)
24 self._iDb.dropTable(self._inputTableName)
25 self._iDb.close()
26 os.remove(self._obsGFFFileName)
27 os.remove(self._expGFFFileName)
28 os.remove(self._fastaFileName)
29 os.remove(self._tablesFileName)
30 os.remove(self._inputFileName)
31
32 def test_run_as_script_path_with_seq_withAllFiles(self):
33 self._writeTablesFile("path")
34 self._writePathFile(self._inputFileName)
35 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
36 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
37 self._writeExpPathGFFFile(self._expGFFFileName)
38 expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3"
39 self._writeExpEmptyPathGFFFileWithSeq(expGFFEmptyFileName)
40
41 cmd = "GFF3Maker.py -t %s -f %s -w -a -p"% (self._tablesFileName, self._fastaTableName)
42 os.system(cmd)
43
44 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
45 self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName))
46
47 os.remove(expGFFEmptyFileName)
48 os.remove(self._obsGFFEmptyFileName)
49
50 def test_run_as_script_path_without_seq_withAllFiles(self):
51 self._writeTablesFile("path")
52 self._writePathFile(self._inputFileName)
53 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
54 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
55 self._writeExpPathGFFFile_without_seq(self._expGFFFileName)
56 expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3"
57 self._writeExpEmptyPathGFFFile(expGFFEmptyFileName)
58
59 cmd = "GFF3Maker.py -t %s -f %s -a -p" % (self._tablesFileName, self._fastaTableName)
60 os.system(cmd)
61
62 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
63 self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName))
64
65 os.remove(expGFFEmptyFileName)
66 os.remove(self._obsGFFEmptyFileName)
67
68 def test_run_without_seq(self):
69 self._writeTablesFile("path")
70 self._writePathFile(self._inputFileName)
71 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
72 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
73 self._writeExpPathGFFFile_without_seq(self._expGFFFileName)
74
75 iGFF3Maker = GFF3Maker()
76 iGFF3Maker.setTablesFileName(self._tablesFileName)
77 iGFF3Maker.setInFastaName(self._fastaTableName)
78 iGFF3Maker.setAreMatchPartCompulsory(True)
79 iGFF3Maker.run()
80
81 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
82 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
83
84 def test_run_without_seq_and_match_part_not_compulsory(self):
85 self._writeTablesFile("path")
86 self._writePathFile(self._inputFileName)
87 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
88 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
89 self._writeExpPathGFFFile_without_seq_and_match_part_not_comulsory(self._expGFFFileName)
90
91 iGFF3Maker = GFF3Maker()
92 iGFF3Maker.setTablesFileName(self._tablesFileName)
93 iGFF3Maker.setInFastaName(self._fastaTableName)
94 iGFF3Maker.run()
95
96 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
97 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
98
99 def test_run_as_script_pathReversed(self):
100 self._writeTablesFile("path")
101 self._writePathFileReverse(self._inputFileName)
102 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
103 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
104 self._writeExpPathGFFFileReversed(self._expGFFFileName)
105
106 cmd = "GFF3Maker.py -t %s -f %s -w -p" % (self._tablesFileName, self._fastaTableName)
107 os.system(cmd)
108
109 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
110 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
111
112 def test_run_as_script_set(self):
113 self._writeTablesFile("set")
114 self._writeSetFile(self._inputFileName)
115 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_set" % self._projectName
116 self._iDb.createTable(self._inputTableName, "set", self._inputFileName, True)
117 self._writeExpSetGFFFile(self._expGFFFileName)
118
119 cmd = "GFF3Maker.py -t %s -f %s -w -p " % (self._tablesFileName, self._fastaTableName)
120 os.system(cmd)
121
122 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
123 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
124
125 def test_run_as_script_setReversed(self):
126 self._writeTablesFile("set")
127 self._writeSetFileReverse(self._inputFileName)
128 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_set" % self._projectName
129 self._iDb.createTable(self._inputTableName, "set", self._inputFileName, True)
130 self._writeExpSetGFFFileReversed(self._expGFFFileName)
131
132 cmd = "GFF3Maker.py -t %s -f %s -w -p " % (self._tablesFileName, self._fastaTableName)
133 os.system(cmd)
134
135 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
136 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
137
138 def test_run_as_script_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8(self):
139 self._writeTablesFile("path")
140 self._writePathFile_withClassif(self._inputFileName)
141 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
142 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
143
144 self._writeClassifFile("input.classif")
145 inputClassifTableName = "%s_classif" % self._projectName
146 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True)
147
148 self._expGFFFileName = "explm_SuperContig_30_v2.gff3"
149 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3"
150 self._writeExpPathGFFFile_without_seq_withClassif(self._expGFFFileName)
151
152 cmd = "GFF3Maker.py -t %s -f %s -i %s -p" % (self._tablesFileName, self._fastaTableName, inputClassifTableName)
153 os.system(cmd)
154
155 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
156
157 os.remove("input.classif")
158 os.remove("lm_SuperContig_29_v2.gff3")
159 self._iDb.dropTable(inputClassifTableName)
160
161 def test_run_as_script_path_without_seq_withAllFilesWithTargetLength(self):
162 fastaFileName = "genome.fa"
163 self._writeFastaFileExtended(fastaFileName)
164 self._iDb.createTable(self._fastaTableName, "seq", fastaFileName, True)
165 os.remove(fastaFileName)
166
167 self._writeTablesFile_withTESeqTables("path")
168
169 self._writePathFile(self._inputFileName)
170 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
171 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
172
173 inFileName2 = "%s_chr_bankBLRtx.path" % self._projectName
174 self._writePathFile2(inFileName2)
175 bankPathTableName = "%s_chr_bankBLRtx_path" % self._projectName
176 self._iDb.createTable(bankPathTableName, "path", inFileName2, True)
177 os.remove(inFileName2)
178
179 refTEsFastaFileName = "%s_refTEs.fa" % self._projectName
180 self._writeRefTEsSeqFile(refTEsFastaFileName)
181 refTESeqTableName = "%s_refTEs_seq" % self._projectName
182 self._iDb.createTable(refTESeqTableName, "seq", refTEsFastaFileName, True)
183 os.remove(refTEsFastaFileName)
184
185 bankBLRtxFastaFileName = "dummyRepbase.fa"
186 self._writeBankBLRtxSeqFile(bankBLRtxFastaFileName)
187 bankBLRtxTableName = "%s_bankBLRtx_nt_seq" % self._projectName
188 self._iDb.createTable(bankBLRtxTableName, "seq", bankBLRtxFastaFileName, True)
189 os.remove(bankBLRtxFastaFileName)
190
191 self._expGFFFileName = "explm_SuperContig_29_v2.gff3"
192 expSeq2GFFFileName = "expChr1.gff3"
193 self._obsGFFFileName = "lm_SuperContig_29_v2.gff3"
194 obsSeq2GFFFileName = "chr1.gff3"
195 self._writeExpPathGFFFile_without_seq_withTargetLength_seq1(self._expGFFFileName)
196 self._writeExpPathGFFFile_without_seq_withTargetLength_seq2(expSeq2GFFFileName)
197
198 cmd = "GFF3Maker.py -t %s -f %s -p" % (self._tablesFileName, self._fastaTableName)
199 os.system(cmd)
200
201 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
202 self.assertTrue(FileUtils.are2FilesIdentical(expSeq2GFFFileName, obsSeq2GFFFileName))
203
204 os.remove(expSeq2GFFFileName)
205 os.remove(obsSeq2GFFFileName)
206 self._iDb.dropTable(refTESeqTableName)
207 self._iDb.dropTable(bankBLRtxTableName)
208 self._iDb.dropTable(bankPathTableName)
209
210 def test_run_as_script_split_file_by_annotation_method(self):
211 fastaFileName = "dummyDmelChr4.fa"
212 self._writeFastaFile_DmelChr4(fastaFileName)
213 self._iDb.createTable(self._fastaTableName, "seq", fastaFileName, True)
214 os.remove(fastaFileName)
215
216 self._writeTablesFile_withTESeqTables("path")
217
218 self._writePathFile_refTEs_annotation(self._inputFileName)
219 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
220 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
221
222 inFileName2 = "%s_chr_bankBLRtx.path" % self._projectName
223 self._writePathFile_bankBLRtx_annotation(inFileName2)
224 bankPathTableName = "%s_chr_bankBLRtx_path" % self._projectName
225 self._iDb.createTable(bankPathTableName, "path", inFileName2, True)
226 os.remove(inFileName2)
227
228 refTEsFastaFileName = "%s_refTEs.fa" % self._projectName
229 self._writeRefTEsSeqFile(refTEsFastaFileName)
230 refTESeqTableName = "%s_refTEs_seq" % self._projectName
231 self._iDb.createTable(refTESeqTableName, "seq", refTEsFastaFileName, True)
232 os.remove(refTEsFastaFileName)
233
234 bankBLRtxFastaFileName = "dummyRepbase.fa"
235 self._writeBankBLRtxSeqFile(bankBLRtxFastaFileName)
236 bankBLRtxTableName = "%s_bankBLRtx_nt_seq" % self._projectName
237 self._iDb.createTable(bankBLRtxTableName, "seq", bankBLRtxFastaFileName, True)
238 os.remove(bankBLRtxFastaFileName)
239
240 self._expGFFFileName = "expdmel_chr4_Annot1.gff3"
241 expGFFFileName2 = "expdmel_chr1_Annot1.gff3"
242 expGFFFileName3 = "expdmel_chr4_Annot2.gff3"
243 self._obsGFFFileName = "dmel_chr4_Annot1.gff3"
244 obsGFFFileName2 = "dmel_chr1_Annot1.gff3"
245 obsGFFFileName3 = "dmel_chr4_Annot2.gff3"
246 self._writeExpPathGFFFile_split_file1(self._expGFFFileName)
247 self._writeExpPathGFFFile_split_file2(expGFFFileName2)
248 self._writeExpPathGFFFile_split_file3(expGFFFileName3)
249
250 cmd = "GFF3Maker.py -t %s -f %s -s -p" % (self._tablesFileName, self._fastaTableName)
251 os.system(cmd)
252
253 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
254 self.assertTrue(FileUtils.are2FilesIdentical(expGFFFileName2, obsGFFFileName2))
255 self.assertTrue(FileUtils.are2FilesIdentical(expGFFFileName3, obsGFFFileName3))
256
257 os.remove(expGFFFileName2)
258 os.remove(expGFFFileName3)
259 os.remove(obsGFFFileName2)
260 os.remove(obsGFFFileName3)
261 self._iDb.dropTable(refTESeqTableName)
262 self._iDb.dropTable(bankBLRtxTableName)
263 self._iDb.dropTable(bankPathTableName)
264
265 def test_run_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8_withIdenticalMatches(self):
266 self._writeTablesFile("path")
267 self._writePathFile_withClassif_withIdenticalMatches(self._inputFileName)
268 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
269 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
270
271 self._writeClassifFile("input.classif")
272 inputClassifTableName = "%s_classif" % self._projectName
273 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True)
274
275 self._expGFFFileName = "explm_SuperContig_30_v2.gff3"
276 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3"
277 self._writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self._expGFFFileName)
278
279 iGFF3Maker = GFF3Maker()
280 iGFF3Maker.setTablesFileName(self._tablesFileName)
281 iGFF3Maker.setInFastaName(self._fastaTableName)
282 iGFF3Maker.setClassifTable(inputClassifTableName)
283 iGFF3Maker.setDoMergeIdenticalMatches(True)
284 iGFF3Maker.setAreMatchPartCompulsory(True)
285 iGFF3Maker.run()
286
287 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
288
289 os.remove("input.classif")
290 os.remove("lm_SuperContig_29_v2.gff3")
291 self._iDb.dropTable(inputClassifTableName)
292
293 def test_run_as_script_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8_withIdenticalMatches(self):
294 self._writeTablesFile("path")
295 self._writePathFile_withClassif_withIdenticalMatches(self._inputFileName)
296 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
297 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
298
299 self._writeClassifFile("input.classif")
300 inputClassifTableName = "%s_classif" % self._projectName
301 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True)
302
303 self._expGFFFileName = "explm_SuperContig_30_v2.gff3"
304 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3"
305 self._writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self._expGFFFileName)
306
307 cmd = "GFF3Maker.py -t %s -f %s -i %s -m -p" % (self._tablesFileName, self._fastaTableName, inputClassifTableName)
308 os.system(cmd)
309
310 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
311
312 os.remove("input.classif")
313 os.remove("lm_SuperContig_29_v2.gff3")
314 self._iDb.dropTable(inputClassifTableName)
315
316 def _writeRefTEsSeqFile(self, inFileName):
317 with open(inFileName, "w") as f:
318 f.write(">DTX-incomp_DmelChr4-L-B1-Map3\n")
319 f.write("ATCGATCGTT\n")
320 f.write(">DTX-incomp_DmelChr4-B-P0.0-Map3\n")
321 f.write("GCTAGCTA\n")
322
323 def _writeBankBLRtxSeqFile(self, inFileName):
324 with open(inFileName, "w") as f:
325 f.write(">PROTOP_B:classII:TIR\n")
326 f.write("ATCGATCGTT\n")
327 f.write(">DMRT1C:classI:?\n")
328 f.write("GCTAGCTA\n")
329 f.write(">BATUMI_I:classI:LTR_retrotransposon\n")
330 f.write("GCTAATGGCATA\n")
331
332 def _writeExpPathGFFFile_without_seq_withTargetLength_seq1(self, inFileName):
333 with open(inFileName, "w") as f:
334 f.write("##gff-version 3\n")
335 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
336 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;TargetLength=10;Identity=30.56\n")
337 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
338 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;TargetLength=10;Identity=23.99\n")
339 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
340 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;TargetLength=8\n")
341 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
342 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
343
344 def _writeExpPathGFFFile_without_seq_withTargetLength_seq2(self, inFileName):
345 with open(inFileName, "w") as f:
346 f.write("##gff-version 3\n")
347 f.write("##sequence-region chr1 1 18\n")
348 f.write("chr1\tprojectName_REPET_TEs\tmatch\t1\t100\t0.0\t-\t.\tID=ms1_chr1_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 85 228;TargetLength=10;Identity=30.56\n")
349 f.write("chr1\tprojectName_REPET_TEs\tmatch_part\t1\t100\t1e-20\t-\t.\tID=mp1-1_chr1_PROTOP_B:classII:TIR;Parent=ms1_chr1_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 85 228;Identity=30.56\n")
350 f.write("chr1\tprojectName_REPET_TEs\tmatch\t100\t1000\t0.0\t-\t.\tID=ms2_chr1_DMRT1C:classI:?;Target=DMRT1C:classI:? 85 228;TargetLength=8;Identity=30.56\n")
351 f.write("chr1\tprojectName_REPET_TEs\tmatch_part\t100\t1000\t1e-30\t-\t.\tID=mp2-1_chr1_DMRT1C:classI:?;Parent=ms2_chr1_DMRT1C:classI:?;Target=DMRT1C:classI:? 85 228;Identity=30.56\n")
352
353 def _writeExpPathGFFFile_split_file1(self, inFileName):
354 with open(inFileName, 'w') as f:
355 f.write("##gff-version 3\n")
356 f.write("##sequence-region dmel_chr4 1 18\n")
357 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4630\t4889\t0.0\t+\t.\tID=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 232 512;TargetLength=8\n" % self._projectName)
358 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4630\t4704\t0.0\t+\t.\tID=mp21-1_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 232 312;Identity=84.4417\n" % self._projectName)
359 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4837\t4889\t0.0\t+\t.\tID=mp21-2_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 456 512;Identity=84.4417\n" % self._projectName)
360 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4364\t4611\t0.0\t+\t.\tID=ms6_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;TargetLength=10;Identity=91.24\n" % self._projectName)
361 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4364\t4611\t0.0\t+\t.\tID=mp6-1_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms6_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;Identity=91.24\n" % self._projectName)
362
363 def _writeExpPathGFFFile_split_file2(self, inFileName):
364 with open(inFileName, 'w') as f:
365 f.write("##gff-version 3\n")
366 f.write("##sequence-region dmel_chr1 1 25\n")
367 f.write("dmel_chr1\t%s_REPET_TEs\tmatch\t4364\t4611\t0.0\t+\t.\tID=ms35_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;TargetLength=10;Identity=91.24\n" % self._projectName)
368 f.write("dmel_chr1\t%s_REPET_TEs\tmatch_part\t4364\t4611\t0.0\t+\t.\tID=mp35-1_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms35_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;Identity=91.24\n" % self._projectName)
369
370 def _writeExpPathGFFFile_split_file3(self, inFileName):
371 with open(inFileName, 'w') as f:
372 f.write("##gff-version 3\n")
373 f.write("##sequence-region dmel_chr4 1 18\n")
374 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t3143\t4364\t0.0\t-\t.\tID=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 24 2206;TargetLength=8\n" % self._projectName)
375 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3143\t3361\t0.0\t-\t.\tID=mp66-1_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1988 2206;Identity=89.7202\n" % self._projectName)
376 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3345\t3410\t0.0\t-\t.\tID=mp66-2_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1654 1719;Identity=54.55\n" % self._projectName)
377 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3448\t3572\t0.0\t-\t.\tID=mp66-3_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1495 1619;Identity=77.3433\n" % self._projectName)
378 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3564\t3693\t0.0\t-\t.\tID=mp66-4_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1380 1509;Identity=83.7306\n" % self._projectName)
379 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3705\t3857\t0.0\t-\t.\tID=mp66-5_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 539 691;Identity=84.5762\n" % self._projectName)
380 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3861\t3992\t0.0\t-\t.\tID=mp66-6_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 403 534;Identity=67.1422\n" % self._projectName)
381 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3985\t4106\t0.0\t-\t.\tID=mp66-7_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 290 411;Identity=76.612\n" % self._projectName)
382 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4102\t4248\t0.0\t-\t.\tID=mp66-8_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 139 285;Identity=75.3027\n" % self._projectName)
383 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4236\t4364\t0.0\t-\t.\tID=mp66-9_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 24 152;Identity=66.6657\n" % self._projectName)
384 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4412\t4889\t0.0\t-\t.\tID=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 553 1082;TargetLength=10\n" % self._projectName)
385 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4412\t4501\t5e-108\t-\t.\tID=mp27-1_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 993 1082;Identity=91.3066\n" % self._projectName)
386 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4483\t4652\t9e-146\t-\t.\tID=mp27-2_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 821 990;Identity=86.5572\n" % self._projectName)
387 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4672\t4775\t9e-146\t-\t.\tID=mp27-3_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 684 787;Identity=88.8013\n" % self._projectName)
388 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4782\t4889\t9e-141\t-\t.\tID=mp27-4_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 553 661;Identity=53.7067\n" % self._projectName)
389 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4917\t5195\t0.0\t+\t.\tID=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7030 7303;TargetLength=12\n" % self._projectName)
390 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4917\t5007\t2e-144\t+\t.\tID=mp141-1_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Parent=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7030 7120;Identity=94.1756\n" % self._projectName)
391 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4976\t5195\t2e-144\t+\t.\tID=mp141-2_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Parent=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7087 7303;Identity=82.2343\n" % self._projectName)
392
393 def _writeClassifFile(self, inputFileName):
394 f = open(inputFileName, "w")
395 f.write("PotentialHostGene-chim_fTest05105818-B-G11-Map20\t1240\t+\tPotentialChimeric\tNA\tPotentialHostGene\tNA\tCI=100; coding=(HG_BLRn: FBtr0089196_Dmel_r4.3: 95.65%); other=(TE_BLRtx: PROTOP:classII:TIR: 12.03%, PROTOP_A:classII:TIR: 49.14%; TermRepeats: termTIR: 49; SSRCoverage=0.25<0.75)\n")
396 f.write("DTX-comp-chim_fTest05105818-B-G7-Map3_reversed\t1944\t-\tPotentialChimeric\tII\tTIR\tcomplete\tCI=33; coding=(TE_BLRtx: PROTOP:classII:TIR: 12.77%, PROTOP_A:classII:TIR: 25.16%, PROTOP_A:classII:TIR: 100.00%); struct=(TElength: <700bps; TermRepeats: termTIR: 844); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 29.48%; SSRCoverage=0.24<0.75)\n")
397 f.write("DTX-incomp_fTest05105818-B-G9-Map3_reversed\t1590\t-\tok\tII\tTIR\tincomplete\tCI=33; coding=(TE_BLRtx: PROTOP:classII:TIR: 10.92%, PROTOP:classII:TIR: 11.03%, PROTOP_A:classII:TIR: 55.20%); struct=(TElength: >700bps); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60%; SSRCoverage=0.21<0.75)\n")
398 f.write("DTX-incomp_fTest05105818-B-P0.0-Map3\t1042\t.\tok\tII\tTIR\tincomplete\tCI=50; coding=(TE_BLRtx: PROTOP:classII:TIR: 17.39%, PROTOP_A:classII:TIR: 22.17%); struct=(TElength: >700bps; TermRepeats: termTIR: 50); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 47.22%; SSRCoverage=0.25<0.75)\n")
399 f.write("DTX-comp_fTest05105818-B-P1.0-Map9_reversed\t1137\t-\tok\tII\tTIR\tcomplete\tCI=50; coding=(TE_BLRtx: PROTOP:classII:TIR: 6.70%, PROTOP_A:classII:TIR: 66.43%, PROTOP_B:classII:TIR: 6.42%); struct=(TElength: >700bps; TermRepeats: termTIR: 52); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 51.19%; SSRCoverage=0.22<0.75)\n")
400 f.write("RLX-incomp_fTest05105818-B-R12-Map3_reversed\t2284\t-\tok\tI\tLTR\tincomplete\tCI=28; coding=(TE_BLRtx: ROOA_I:classI:LTR_retrotransposon: 27.57%, ROOA_LTR:classI:LTR_retrotransposon: 94.56%; TE_BLRx: BEL11_AGp:classI:LTR_retrotransposon: 19.47%, BEL2-I_Dmoj_1p:classI:LTR_retrotransposon: 11.49%); struct=(TElength: >700bps); other=(SSRCoverage=0.07<0.75)\n")
401 f.write("DTX-incomp_fTest05105818-B-R19-Map4\t705\t+\tok\tII\tTIR\tincomplete\tCI=66; coding=(TE_BLRtx: TC1-2_DM:classII:TIR: 42.70%; TE_BLRx: TC1-2_DMp:classII:TIR: 41.18%); struct=(TElength: >700bps); other=(SSRCoverage=0.14<0.75)\n")
402 f.write("DHX-incomp_fTest05105818-B-R1-Map4\t2367\t.\tok\tII\tHelitron\tincomplete\tCI=20; coding=(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00%, DNAREP1_DYak:classII:Helitron: 9.08%); struct=(TElength: >700bps); other=(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52%; SSRCoverage=0.18<0.75)\n")
403 f.write("noCat_fTest05105818-B-R2-Map6\t4638\t.\tok\tnoCat\tnoCat\tNA\tCI=NA; coding=(HG_BLRn: FBtr0089179_Dmel_r4.3: 73.65%); struct=(SSRCoverage=0.05<0.75)\n")
404 f.write("PotentialHostGene-chim_fTest05105818-B-R4-Map5_reversed\t1067\t-\tPotentialChimeric\tNA\tPotentialHostGene\tNA\tCI=100; coding=(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%); other=(TE_BLRtx: PROTOP:classII:TIR: 13.06%, PROTOP_A:classII:TIR: 37.47%; SSRCoverage=0.27<0.75)\n")
405 f.write("DTX-incomp_fTest05105818-B-R9-Map3_reversed\t714\t-\tok\tII\tTIR\tincomplete\tCI=66; coding=(TE_BLRtx: TC1_DM:classII:TIR: 40.88%; TE_BLRx: Tc1-1_TCa_1p:classII:TIR: 30.18%, Tc1-3_FR_1p:classII:TIR: 9.97%); struct=(TElength: >700bps); other=(SSRCoverage=0.08<0.75)\n")
406 f.close()
407
408 def _writePathFile_withClassif(self,inFileName):
409 f = open(inFileName,'w')
410 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030\t228\t85\t1e-40\t84\t30.56\n')
411 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030\t522\t229\t1e-40\t106\t23.99\n')
412 f.write('3\tlm_SuperContig_30_v2\t78081\t78088\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n')
413 f.write('3\tlm_SuperContig_30_v2\t78089\t78588\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n')
414 f.write('4\tlm_SuperContig_30_v2\t88031\t88080\tDTX-incomp_fTest05105818-B-G1-Map3\t370\t420\t3e-23\t101\t31.89\n')
415 f.write('5\tlm_SuperContig_30_v2\t108588\t108081\tDTX-incomp_fTest05105818-B-G9-Map3_reversed\t590\t820\t3e-24\t101\t32.89\n')
416 f.write('6\tlm_SuperContig_30_v2\t118081\t118588\tPotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed\t154\t289\t3e-25\t101\t33.89\n')
417 f.write('7\tlm_SuperContig_30_v2\t288031\t288080\tnoCat_Blc22_fTest05105818-B-R2-Map6\t1900\t2090\t3e-26\t101\t34.89\n')
418 f.close()
419
420 def _writeExpPathGFFFile_without_seq_withClassif(self, inFileName):
421 f = open(inFileName, 'w')
422 f.write("##gff-version 3\n")
423 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
424 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t78081\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;TargetDescription=CI:20 coding:(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00% | DNAREP1_DYak:classII:Helitron: 9.08%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52% SSRCoverage:0.18<0.75)\n")
425 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78088\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209;Identity=30.89\n")
426 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78089\t78588\t3e-22\t+\t.\tID=mp3-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350;Identity=35.89\n")
427
428 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t88031\t88080\t0.0\t+\t.\tID=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
429 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t88031\t88080\t3e-23\t+\t.\tID=mp4-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
430
431 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t108081\t108588\t0.0\t-\t.\tID=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;TargetDescription=CI:33 coding:(TE_BLRtx: PROTOP:classII:TIR: 10.92% | PROTOP:classII:TIR: 11.03% | PROTOP_A:classII:TIR: 55.20%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60% SSRCoverage:0.21<0.75);Identity=32.89\n")
432 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t108081\t108588\t3e-24\t-\t.\tID=mp5-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Parent=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;Identity=32.89\n")
433
434 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t118081\t118588\t0.0\t+\t.\tID=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;TargetDescription=CI:100 coding:(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%) other:(TE_BLRtx: PROTOP:classII:TIR: 13.06% | PROTOP_A:classII:TIR: 37.47% SSRCoverage:0.27<0.75);Identity=33.89\n")
435 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t118081\t118588\t3e-25\t+\t.\tID=mp6-1_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Parent=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;Identity=33.89\n")
436
437 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms7_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Target=noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090;TargetDescription=CI:NA coding:(HG_BLRn: FBtr0089179_Dmel_r4.3: 73.65%) struct:(SSRCoverage:0.05<0.75);Identity=34.89\n")
438 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp7-1_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Parent=ms7_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Target=noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090;Identity=34.89\n")
439
440 f.close()
441
442 def _writePathFile_withClassif_withIdenticalMatches(self,inFileName):
443 f = open(inFileName,'w')
444 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030\t228\t85\t1e-40\t84\t30.56\n')
445 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030\t522\t229\t1e-40\t106\t23.99\n')
446 f.write('3\tlm_SuperContig_30_v2\t78081\t78088\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n')
447 f.write('3\tlm_SuperContig_30_v2\t78089\t78588\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n')
448 f.write('4\tlm_SuperContig_30_v2\t88031\t88080\tDTX-incomp_fTest05105818-B-G1-Map3\t370\t420\t3e-23\t101\t31.89\n')
449 f.write('5\tlm_SuperContig_30_v2\t108588\t108081\tDTX-incomp_fTest05105818-B-G9-Map3_reversed\t590\t820\t3e-24\t101\t32.89\n')
450 f.write('6\tlm_SuperContig_30_v2\t118081\t118588\tPotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed\t154\t289\t3e-25\t101\t33.89\n')
451
452 f.write('7\tlm_SuperContig_30_v2\t288031\t288080\tnoCat_Blc22_fTest05105818-B-R2-Map6\t1900\t2090\t3e-26\t101\t34.89\n')
453 f.write('8\tlm_SuperContig_30_v2\t288031\t288080\tDTX-incomp_fTest05105818-B-P0.0-Map3\t100\t190\t3e-26\t101\t39.89\n')
454 f.write('9\tlm_SuperContig_30_v2\t288031\t288080\tRLX-incomp_fTest05105818-B-R12-Map3_reversed\t1100\t1290\t3e-26\t101\t40.89\n')
455 f.write('10\tlm_SuperContig_30_v2\t288031\t288080\tPotentialHostGene-chim_fTest05105818-B-G11-Map20\t990\t1890\t3e-26\t101\t38.09\n')
456
457 f.write('11\tlm_SuperContig_30_v2\t288031\t288080\tDTX-incomp_fTest05105818-B-G1-Map3\t990\t1890\t3e-26\t301\t38.09\n')
458
459 f.write('12\tlm_SuperContig_30_v2\t388031\t388080\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n')
460 f.write('12\tlm_SuperContig_30_v2\t388081\t388380\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n')
461
462 f.write('13\tlm_SuperContig_30_v2\t388031\t388080\tDTX-incomp_fTest05105818-B-P0.0-Map3\t119\t309\t3e-21\t101\t30.89\n')
463 f.write('13\tlm_SuperContig_30_v2\t388081\t388380\tDTX-incomp_fTest05105818-B-P0.0-Map3\t250\t450\t3e-22\t101\t35.89\n')
464 f.close()
465
466 def _writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self, inFileName):
467 f = open(inFileName, 'w')
468 f.write("##gff-version 3\n")
469 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
470 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t78081\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;TargetDescription=CI:20 coding:(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00% | DNAREP1_DYak:classII:Helitron: 9.08%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52% SSRCoverage:0.18<0.75)\n")
471 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78088\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209;Identity=30.89\n")
472 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78089\t78588\t3e-22\t+\t.\tID=mp3-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350;Identity=35.89\n")
473
474 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t88031\t88080\t0.0\t+\t.\tID=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
475 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t88031\t88080\t3e-23\t+\t.\tID=mp4-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
476
477 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t108081\t108588\t0.0\t-\t.\tID=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;TargetDescription=CI:33 coding:(TE_BLRtx: PROTOP:classII:TIR: 10.92% | PROTOP:classII:TIR: 11.03% | PROTOP_A:classII:TIR: 55.20%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60% SSRCoverage:0.21<0.75);Identity=32.89\n")
478 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t108081\t108588\t3e-24\t-\t.\tID=mp5-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Parent=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;Identity=32.89\n")
479
480 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t118081\t118588\t0.0\t+\t.\tID=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;TargetDescription=CI:100 coding:(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%) other:(TE_BLRtx: PROTOP:classII:TIR: 13.06% | PROTOP_A:classII:TIR: 37.47% SSRCoverage:0.27<0.75);Identity=33.89\n")
481 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t118081\t118588\t3e-25\t+\t.\tID=mp6-1_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Parent=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;Identity=33.89\n")
482
483 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms10_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Target=PotentialHostGene-chim_fTest05105818-B-G11-Map20 990 1890;OtherTargets=RLX-incomp_fTest05105818-B-R12-Map3_reversed 1100 1290, DTX-incomp_fTest05105818-B-P0.0-Map3 100 190, noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090\n")
484 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp10-1_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Parent=ms10_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Target=PotentialHostGene-chim_fTest05105818-B-G11-Map20 990 1890\n")
485
486 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms11_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 990 1890;Identity=38.09\n")
487 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp11-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms11_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 990 1890;Identity=38.09\n")
488
489 #TODO:
490 #Should this case really occur : If merging multiple match-parts, the current behaviour needs to be fixed to get correct subject start/end coordinates
491 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t388031\t388380\t0.0\t+\t.\tID=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;OtherTargets=DTX-incomp_fTest05105818-B-P0.0-Map3 119 309\n")
492 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t388031\t388080\t3e-21\t+\t.\tID=mp12-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209\n")
493 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t388081\t388380\t3e-22\t+\t.\tID=mp12-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350\n")
494 f.close()
495
496 def _writeTablesFile_withTESeqTables(self, tableType):
497 tableFile = open( self._tablesFileName, "w" )
498 string = "%s_REPET_TEs\t%s\t%s_chr_allTEs_nr_noSSR_join_%s\t%s_refTEs_seq\n" % (self._projectName, tableType, self._projectName, tableType, self._projectName)
499 tableFile.write(string)
500 string = "%s_REPET_TEs\t%s\t%s_chr_bankBLRtx_%s\t%s_bankBLRtx_nt_seq\n" % (self._projectName, tableType, self._projectName, tableType, self._projectName)
501 tableFile.write(string)
502 tableFile.close()
503
504 def _writeTablesFile(self, tableType):
505 tableFile = open( self._tablesFileName, "w" )
506 string = "%s_REPET_TEs\t%s\t%s_chr_allTEs_nr_noSSR_join_%s\n" % (self._projectName, tableType, self._projectName, tableType)
507 tableFile.write(string)
508 tableFile.close()
509
510 def _writePathFile(self,inFileName):
511 f = open(inFileName,'w')
512 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\tDTX-incomp_DmelChr4-L-B1-Map3\t228\t85\t1e-40\t84\t30.56\n')
513 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\tDTX-incomp_DmelChr4-L-B1-Map3\t522\t229\t1e-40\t106\t23.99\n')
514 f.write('3\tlm_SuperContig_29_v2\t78031\t78080\tDTX-incomp_DmelChr4-B-P0.0-Map3\t19\t209\t3e-21\t101\t30.89\n')
515 f.write('3\tlm_SuperContig_29_v2\t78081\t78588\tDTX-incomp_DmelChr4-B-P0.0-Map3\t19\t209\t3e-21\t101\t30.89\n')
516 f.close()
517
518 def _writePathFile2(self,inFileName):
519 f = open(inFileName,'w')
520 f.write('1\tchr1\t1\t100\tPROTOP_B:classII:TIR\t228\t85\t1e-20\t84\t30.56\n')
521 f.write('2\tchr1\t100\t1000\tDMRT1C:classI:?\t228\t85\t1e-30\t84\t30.56\n')
522 f.close()
523
524 def _writePathFile_refTEs_annotation(self,inFileName):
525 f = open(inFileName,'w')
526 f.write('6\tdmel_chr4\t4364\t4611\tDTX-incomp_DmelChr4-L-B1-Map3\t854\t1150\t0\t1475\t91.24\n')
527 f.write('21\tdmel_chr4\t4630\t4704\tDTX-incomp_DmelChr4-B-P0.0-Map3\t232\t312\t0\t65\t84.4417\n')
528 f.write('21\tdmel_chr4\t4837\t4889\tDTX-incomp_DmelChr4-B-P0.0-Map3\t456\t512\t0\t46\t84.4417\n')
529 f.write('35\tdmel_chr1\t4364\t4611\tDTX-incomp_DmelChr4-L-B1-Map3\t854\t1150\t0\t1475\t91.24\n')
530 f.close()
531
532 def _writePathFile_bankBLRtx_annotation(self,inFileName):
533 f = open(inFileName, 'w')
534 f.write('27\tdmel_chr4\t4412\t4501\tPROTOP_B:classII:TIR\t1082\t993\t5e-108\t702\t91.3066\n')
535 f.write('27\tdmel_chr4\t4483\t4652\tPROTOP_B:classII:TIR\t990\t821\t9e-146\t707\t86.5572\n')
536 f.write('27\tdmel_chr4\t4672\t4775\tPROTOP_B:classII:TIR\t787\t684\t9e-146\t707\t88.8013\n')
537 f.write('27\tdmel_chr4\t4782\t4889\tPROTOP_B:classII:TIR\t661\t553\t9e-141\t356\t53.7067\n')
538 f.write('66\tdmel_chr4\t3143\t3361\tDMRT1C:classI:?\t2206\t1988\t0\t1878\t89.7202\n')
539 f.write('66\tdmel_chr4\t3345\t3410\tDMRT1C:classI:?\t1719\t1654\t0\t313\t54.55\n')
540 f.write('66\tdmel_chr4\t3448\t3572\tDMRT1C:classI:?\t1619\t1495\t0\t1252\t77.3433\n')
541 f.write('66\tdmel_chr4\t3564\t3693\tDMRT1C:classI:?\t1509\t1380\t0\t1565\t83.7306\n')
542 f.write('66\tdmel_chr4\t3705\t3857\tDMRT1C:classI:?\t691\t539\t0\t1252\t84.5762\n')
543 f.write('66\tdmel_chr4\t3861\t3992\tDMRT1C:classI:?\t534\t403\t0\t1565\t67.1422\n')
544 f.write('66\tdmel_chr4\t3985\t4106\tDMRT1C:classI:?\t411\t290\t0\t1252\t76.612\n')
545 f.write('66\tdmel_chr4\t4102\t4248\tDMRT1C:classI:?\t285\t139\t0\t1565\t75.3027\n')
546 f.write('66\tdmel_chr4\t4236\t4364\tDMRT1C:classI:?\t152\t24\t0\t1565\t66.6657\n')
547 f.write('141\tdmel_chr4\t4917\t5007\tBATUMI_I:classI:LTR_retrotransposon\t7030\t7120\t2e-144\t984\t94.1756\n')
548 f.write('141\tdmel_chr4\t4976\t5195\tBATUMI_I:classI:LTR_retrotransposon\t7087\t7303\t2e-144\t2098\t82.2343\n')
549 f.close()
550
551 def _writePathFileReverse(self,inFileName):
552 f = open(inFileName,'w')
553 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein)\t228\t85\t1e-40\t84\t30.56\n')
554 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein)\t522\t229\t1e-40\t106\t23.99\n')
555 f.write('3\tlm_SuperContig_29_v2\t78080\t78031\txnc164_090 related to multidrug resistance protein\t19\t209\t3e-21\t101\t30.89\n')
556 f.write('3\tlm_SuperContig_29_v2\t78588\t78081\txnc164_090 related to multidrug resistance protein\t19\t209\t3e-21\t101\t30.89\n')
557 f.close()
558
559 def _writeSetFile(self,inFileName):
560 f = open(inFileName,'w')
561 f.write('1\tset1\tlm_SuperContig_29_v2\t193781\t194212\n')
562 f.write('2\tset2\tlm_SuperContig_29_v2\t192832\t193704\n')
563 f.write('3\tset3\tlm_SuperContig_29_v2\t78031\t78080\n')
564 f.write('3\tset3\tlm_SuperContig_29_v2\t78081\t78588\n')
565 f.close()
566
567 def _writeSetFileReverse(self,inFileName):
568 f = open(inFileName,'w')
569 f.write('1\tset1\tlm_SuperContig_29_v2\t193781\t194212\n')
570 f.write('2\tset2\tlm_SuperContig_29_v2\t192832\t193704\n')
571 f.write('3\tset3\tlm_SuperContig_29_v2\t78080\t78031\n')
572 f.write('3\tset3\tlm_SuperContig_29_v2\t78588\t78081\n')
573 f.close()
574
575 def _writeExpEmptyPathGFFFile(self, inFileName):
576 f = open(inFileName, 'w')
577 f.write("##gff-version 3\n")
578 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
579 f.close()
580
581 def _writeExpPathGFFFile(self, inFileName):
582 f = open(inFileName, 'w')
583 f.write("##gff-version 3\n")
584 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
585 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
586 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
587 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
588 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
589 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n")
590 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
591 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
592 f.write("##FASTA\n")
593 self._writeSeq1(f)
594 f.close()
595
596 def _writeExpEmptyPathGFFFileWithSeq(self, inFileName):
597 f = open(inFileName, 'w')
598 f.write("##gff-version 3\n")
599 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
600 f.write("##FASTA\n")
601 self._writeSeq2(f)
602 f.close()
603
604 def _writeExpPathGFFFile_without_seq(self, inFileName):
605 f = open(inFileName, 'w')
606 f.write("##gff-version 3\n")
607 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
608 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
609 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
610 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
611 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
612 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n")
613 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
614 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
615 f.close()
616
617 def _writeExpPathGFFFile_without_seq_and_match_part_not_comulsory(self, inFileName):
618 f = open(inFileName, 'w')
619 f.write("##gff-version 3\n")
620 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
621 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
622 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
623 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n")
624 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
625 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
626 f.close()
627
628 def _writeExpPathGFFFileReversed(self, inFileName):
629 f = open(inFileName, 'w')
630 f.write("##gff-version 3\n")
631 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
632 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 85 228;Identity=30.56\n")
633 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Parent=ms1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 85 228;Identity=30.56\n")
634 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 229 522;Identity=23.99\n")
635 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Parent=ms2_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 229 522;Identity=23.99\n")
636 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t-\t.\tID=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209\n")
637 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t-\t.\tID=mp3-1_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Parent=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209;Identity=30.89\n")
638 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t-\t.\tID=mp3-2_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Parent=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209;Identity=30.89\n")
639 f.write("##FASTA\n")
640 self._writeSeq1(f)
641 f.close()
642
643 def _writeExpSetGFFFile(self, inFileName):
644 f = open(inFileName, 'w')
645 f.write("##gff-version 3\n")
646 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
647 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t+\t.\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
648 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t0.0\t+\t.\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
649 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t+\t.\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
650 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t0.0\t+\t.\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
651 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\n")
652 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t0.0\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\n")
653 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t0.0\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\n")
654 f.write("##FASTA\n")
655 self._writeSeq1(f)
656 f.close()
657
658 def _writeExpSetGFFFileReversed(self, inFileName):
659 f = open(inFileName, 'w')
660 f.write("##gff-version 3\n")
661 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
662 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t+\t.\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
663 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t0.0\t+\t.\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
664 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t+\t.\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
665 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t0.0\t+\t.\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
666 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t-\t.\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\n")
667 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t0.0\t-\t.\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\n")
668 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t0.0\t-\t.\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\n")
669 f.write("##FASTA\n")
670 self._writeSeq1(f)
671 f.close()
672
673 def _writeFastaFile(self, inFileName):
674 f = open(inFileName,'w')
675 self._writeSeq2(f)
676 self._writeSeq1(f)
677 f.close()
678
679 def _writeFastaFileExtended(self, inFileName):
680 f = open(inFileName,'w')
681 self._writeSeq2(f)
682 self._writeSeq1(f)
683 f.write(">chr1\n")
684 f.write("CTAAGCTGCGCTATGTAG\n")
685 f.close()
686
687 def _writeSeq1(self, f):
688 f.write('>lm_SuperContig_29_v2\n')
689 f.write('CCTAGACAATTAATTATAATAATTAATAAACTATTAGGCTAGTAGTAGGTAATAATAAAA\n')
690 f.write('GGATTACTACTAAGCTGCGCTATGTAGATATTTAAAACATGTGGCTTAGGCAAGAGTATA\n')
691
692 def _writeSeq2(self, f):
693 f.write('>lm_SuperContig_30_v2\n')
694 f.write('TGTTCATATTCATAGGATGGAGCTAGTAAGCGATGTCGGCTTAGCTCATCCACATGAATG\n')
695 f.write('CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACACGAGCTGGGTTAAAT\n')
696
697 def _writeFastaFile_DmelChr4(self, inFileName):
698 f = open(inFileName,'w')
699 f.write(">dmel_chr4\n")
700 f.write("CTAAGCTGCGCTATGTAG\n")
701 f.write(">dmel_chr1\n")
702 f.write("CGTAACGCTAGCGCTTATAGTGAGC\n")
703 f.close()
704
705
706 if __name__ == "__main__":
707 unittest.main()