0
|
1 #!/usr/bin/env python3
|
|
2 '''
|
|
3 Basic Tarean and RepeatExplorer tests
|
|
4 '''
|
|
5 import subprocess
|
|
6 import tempfile
|
|
7 import unittest
|
|
8 import os
|
|
9 import shutil
|
|
10
|
|
11 def check_for_missing_files(directory, file_list):
|
|
12 ''' check if files exists in the directory '''
|
|
13 missing_files = []
|
|
14 for f in file_list:
|
|
15 path = os.path.join(directory, f)
|
|
16 if os.path.exists(path):
|
|
17 continue
|
|
18 else:
|
|
19 missing_files.append(f)
|
|
20 return missing_files
|
|
21
|
|
22
|
|
23 class TestBasic(unittest.TestCase):
|
|
24 ''' basic repex-tarean testcase '''
|
|
25 EXECUTABLE = "./seqclust"
|
|
26
|
|
27 # file lists to check
|
|
28 FILE_LIST_BASIC = [
|
|
29 "./seqclust/clustering/clusters/dir_CL0001/hitsort_part.csv",
|
|
30 "./seqclust/clustering/clusters/dir_CL0001/reads.fasta",
|
|
31 "./seqclust/clustering/clusters/dir_CL0001/reads_selection.fasta",
|
|
32 "./seqclust/clustering/clusters/dir_CL0001/dna_database_annotation.csv",
|
|
33 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.GL",
|
|
34 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.png",
|
|
35 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_tmb.png",
|
|
36 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_directed.RData",
|
|
37 "./logfile.txt", "./style1.css", "./documentation.html",
|
|
38 "./tarean_report.html", "./cluster_report.html",
|
|
39 "./summary_histogram.png", "./index.html", "./sequences.db",
|
|
40 "./hitsort.db", "./TAREAN_consensus_rank_1.fasta",
|
|
41 "./TAREAN_consensus_rank_2.fasta", "./TAREAN_consensus_rank_3.fasta",
|
|
42 "./TAREAN_consensus_rank_4.fasta", "./seqclust/clustering/hitsort",
|
|
43 "./seqclust/clustering/hitsort.cls"
|
|
44 ]
|
|
45 FILE_LIST_ASSEMBLY = [
|
|
46 "./seqclust/small_clusters_assembly/small_clusters.aln",
|
|
47 "./seqclust/small_clusters_assembly/small_clusters.ace",
|
|
48 "./seqclust/small_clusters_assembly/small_clusters.fasta"
|
|
49 ]
|
|
50 FILE_LIST_FILTERING = ["./seqclust/prerun/filter_sequences.fasta"]
|
|
51 FILE_LIST_COMPARATIVE = ["COMPARATIVE_ANALYSIS_COUNTS.csv"]
|
|
52 FILE_LIST_CUSTOM_DATABASE = [
|
|
53 "./seqclust/custom_databases/extra_database",
|
|
54 "./seqclust/clustering/clusters/dir_CL0001/custom_db_extra_database_annotation.csv"
|
|
55 ]
|
|
56 def setUp(self):
|
|
57 pass
|
|
58
|
|
59 # helper function
|
|
60 def tarean_run(self, cmd_options, file_list):
|
|
61 ''' Basic taren run '''
|
|
62 # output goes to tmp directory
|
|
63 tmpdir = tempfile.mkdtemp()
|
|
64 logfile = tempfile.NamedTemporaryFile(delete=False)
|
|
65 print("\n------------------------------------------------------")
|
|
66 print("Temp files:")
|
|
67 print(" tmpdir : ", tmpdir)
|
|
68 print(" logfile : ", logfile.name)
|
|
69 print("------------------------------------------------------")
|
|
70 print([self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir] + cmd_options)
|
|
71 p = subprocess.Popen(
|
|
72 args=[self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir
|
|
73 ] + cmd_options)
|
|
74 p.wait()
|
|
75 status = p.returncode
|
|
76 missing_files = check_for_missing_files(directory=tmpdir,
|
|
77 file_list=file_list)
|
|
78 if status:
|
|
79 # print log file
|
|
80 print("Non zero exit status!")
|
|
81 with open(logfile.name) as f:
|
|
82 print(f.read())
|
|
83
|
|
84 self.assertEqual(status, 0)
|
|
85 self.assertEqual(
|
|
86 len(missing_files),
|
|
87 0,
|
|
88 msg="\n missing files: \n" + "\n".join(missing_files))
|
|
89 shutil.rmtree(tmpdir)
|
|
90 os.remove(logfile.name)
|
|
91
|
|
92
|
|
93 def test_help(self):
|
|
94 '''Test if help option works '''
|
|
95 p = subprocess.Popen(args=[self.EXECUTABLE, "-h"],
|
|
96 stdout=subprocess.PIPE)
|
|
97 output = str(p.stdout.readlines())
|
|
98 p.stdout.close()
|
|
99 p.wait()
|
|
100 status = p.returncode
|
|
101 self.assertRegex(output, "usage")
|
|
102 self.assertRegex(output, "optional arguments")
|
|
103 self.assertEqual(status, 0)
|
|
104
|
|
105 def test_basic_no_merging_tarean(self):
|
|
106 ''' Basic taren run '''
|
|
107 cmd_options = ['-t', '-p', '-s', '6000', 'test_data/LAS_paired_10k.fas']
|
|
108 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
|
|
109
|
|
110 def test_basic_with_merging_tarean(self):
|
|
111 ''' Basic taren run '''
|
|
112 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000',
|
|
113 'test_data/LAS_paired_10k.fas']
|
|
114 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
|
|
115
|
|
116
|
|
117 def test_basic_with_merging_tarean_dust_off(self):
|
|
118 ''' Basic taren run '''
|
|
119 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', "-opt", "ILLUMINA_DUST_OFF",
|
|
120 'test_data/LAS_paired_10k.fas']
|
|
121 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
|
|
122
|
|
123 def test_long_with_merging_tarean(self):
|
|
124 '''Using more data with tarean'''
|
|
125 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01',
|
|
126 'test_data/LAS_paired_25k.fas']
|
|
127 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
|
|
128
|
|
129 def test_long_with_merging2_tarean(self):
|
|
130 '''Using more data with tarean 300k reads'''
|
|
131 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01',
|
|
132 'test_data/LAS_paired_300k.fas']
|
|
133 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
|
|
134
|
|
135 def test_short_comparative_re(self):
|
|
136 '''comparative analysis, two species, small run'''
|
|
137 cmd_options = ['-P','3', '-p', '-m', '0.01',
|
|
138 'test_data/sequences_comparative.fasta']
|
|
139 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_COMPARATIVE)
|
|
140
|
|
141 # REPEATEXPLORER - full runs
|
|
142 def test_basic_no_merging_re(self):
|
|
143 ''' Basic taren run '''
|
|
144 cmd_options = ['-p', '-s', '6000', 'test_data/LAS_paired_10k.fas']
|
|
145 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
|
|
146
|
|
147 def test_basic_no_merging_re_diamond(self):
|
|
148 ''' Basic taren run '''
|
|
149 cmd_options = ['-p', '-s', '6000','-D','DIAMOND', 'test_data/LAS_paired_10k.fas']
|
|
150 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
|
|
151
|
|
152
|
|
153
|
|
154 def test_basic_with_merging_re(self):
|
|
155 ''' Basic taren run '''
|
|
156 cmd_options = ['-p', '-M', '0.2', '-s', '6000',
|
|
157 'test_data/LAS_paired_10k.fas']
|
|
158 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
|
|
159
|
|
160 def test_long_with_merging_re(self):
|
|
161 '''Using more data with tarean'''
|
|
162 cmd_options = ['-p', '-M', '0.1', '-m', '0.01',
|
|
163 'test_data/LAS_paired_25k.fas']
|
|
164 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
|
|
165
|
|
166 def test_long_with_merging_re_diamond(self):
|
|
167 '''Using more data with tarean and using diamond'''
|
|
168 cmd_options = ['-p', '-M', '0.1', '-m', '0.01','-D','DIAMOND',
|
|
169 'test_data/LAS_paired_25k.fas']
|
|
170 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
|
|
171
|
|
172 def test_long_with_merging2_re(self):
|
|
173 '''Using more data with tarean 300k reads'''
|
|
174 cmd_options = ['-p', '-M', '0.1', '-m', '0.01',
|
|
175 'test_data/LAS_paired_300k.fas']
|
|
176 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
|
|
177
|
|
178 def test_long_with_merging_and_filtering_re(self):
|
|
179 '''Using more data with tarean, test of automatic filtering'''
|
|
180 cmd_options = ['-A', '-p', '-M', '0.2', '-m', '0.01',
|
|
181 'test_data/ceu_200k.fasta']
|
|
182 self.tarean_run(
|
|
183 cmd_options,
|
|
184 file_list=self.FILE_LIST_BASIC + self.FILE_LIST_FILTERING + self.FILE_LIST_ASSEMBLY)
|
|
185
|
|
186 def test_custom_database_re(self):
|
|
187 ''' Basic taren run '''
|
|
188 cmd_options = ['-p', '-d', 'test_data/extra_database', 'extra_database', 'test_data/LAS_paired_10k.fas']
|
|
189 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_CUSTOM_DATABASE)
|
|
190
|
|
191 def tearDown(self):
|
|
192 pass
|
|
193
|
|
194
|
|
195 SHORT_TASK_NAME_LIST_TAREAN = ['test_help', 'test_basic_no_merging_tarean',
|
|
196 'test_basic_with_merging_tarean',
|
|
197 'test_basic_with_merging_tarean_dust_off']
|
|
198 LONG_TASK_NAME_LIST_TAREAN = ['test_long_with_merging_tarean',
|
|
199 'test_long_with_merging2_tarean']
|
|
200 SHORT_TASK_NAME_LIST_RE = ['test_basic_no_merging_re',
|
|
201 'test_basic_with_merging_re',
|
|
202 'test_basic_no_merging_re_diamond']
|
|
203 LONG_TASK_NAME_LIST_RE = ['test_long_with_merging_re',
|
|
204 'test_long_with_merging2_re',
|
|
205 'test_long_with_merging_and_filtering_re',
|
|
206 'test_long_with_merging_re_diamond']
|
|
207
|
|
208 COMPARATIVE_LIST = ['test_short_comparative_re']
|
|
209 CUSTOM_DATABASE_LIST = ['test_short_custom_database']
|
|
210
|
|
211 # Test suites:
|
|
212 SHORT_TAREAN_SUITE = unittest.TestSuite([TestBasic(i)
|
|
213 for i in SHORT_TASK_NAME_LIST_TAREAN])
|
|
214 LONG_TAREAN_SUITE = unittest.TestSuite([TestBasic(i)
|
|
215 for i in LONG_TASK_NAME_LIST_TAREAN])
|
|
216 COMPARATIVE_SUITE = unittest.TestSuite([TestBasic(i) for i in COMPARATIVE_LIST])
|
|
217 CUSTOM_DB_SUITE = unittest.TestSuite([TestBasic('test_custom_database_re')])
|
|
218
|
|
219 SHORT_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in SHORT_TASK_NAME_LIST_RE])
|
|
220 LONG_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in LONG_TASK_NAME_LIST_RE])
|
|
221
|
|
222 SHORT_SUITE = unittest.TestSuite([SHORT_RE_SUITE, SHORT_TAREAN_SUITE,
|
|
223 COMPARATIVE_SUITE, CUSTOM_DB_SUITE])
|
|
224
|
|
225 LONG_LONG = unittest.TestSuite([LONG_RE_SUITE, LONG_TAREAN_SUITE])
|
|
226
|
|
227 # for single test tesing
|
|
228 if __name__ == '__main__':
|
|
229 unittest.main(verbosity=2)
|