comparison test_repex_pipeline.py @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1d1b9e1b2e2f
1 #!/usr/bin/env python3
2 '''
3 Basic Tarean and RepeatExplorer tests
4 '''
5 import subprocess
6 import tempfile
7 import unittest
8 import os
9 import shutil
10
11 def check_for_missing_files(directory, file_list):
12 ''' check if files exists in the directory '''
13 missing_files = []
14 for f in file_list:
15 path = os.path.join(directory, f)
16 if os.path.exists(path):
17 continue
18 else:
19 missing_files.append(f)
20 return missing_files
21
22
23 class TestBasic(unittest.TestCase):
24 ''' basic repex-tarean testcase '''
25 EXECUTABLE = "./seqclust"
26
27 # file lists to check
28 FILE_LIST_BASIC = [
29 "./seqclust/clustering/clusters/dir_CL0001/hitsort_part.csv",
30 "./seqclust/clustering/clusters/dir_CL0001/reads.fasta",
31 "./seqclust/clustering/clusters/dir_CL0001/reads_selection.fasta",
32 "./seqclust/clustering/clusters/dir_CL0001/dna_database_annotation.csv",
33 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.GL",
34 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.png",
35 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_tmb.png",
36 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_directed.RData",
37 "./logfile.txt", "./style1.css", "./documentation.html",
38 "./tarean_report.html", "./cluster_report.html",
39 "./summary_histogram.png", "./index.html", "./sequences.db",
40 "./hitsort.db", "./TAREAN_consensus_rank_1.fasta",
41 "./TAREAN_consensus_rank_2.fasta", "./TAREAN_consensus_rank_3.fasta",
42 "./TAREAN_consensus_rank_4.fasta", "./seqclust/clustering/hitsort",
43 "./seqclust/clustering/hitsort.cls"
44 ]
45 FILE_LIST_ASSEMBLY = [
46 "./seqclust/small_clusters_assembly/small_clusters.aln",
47 "./seqclust/small_clusters_assembly/small_clusters.ace",
48 "./seqclust/small_clusters_assembly/small_clusters.fasta"
49 ]
50 FILE_LIST_FILTERING = ["./seqclust/prerun/filter_sequences.fasta"]
51 FILE_LIST_COMPARATIVE = ["COMPARATIVE_ANALYSIS_COUNTS.csv"]
52 FILE_LIST_CUSTOM_DATABASE = [
53 "./seqclust/custom_databases/extra_database",
54 "./seqclust/clustering/clusters/dir_CL0001/custom_db_extra_database_annotation.csv"
55 ]
56 def setUp(self):
57 pass
58
59 # helper function
60 def tarean_run(self, cmd_options, file_list):
61 ''' Basic taren run '''
62 # output goes to tmp directory
63 tmpdir = tempfile.mkdtemp()
64 logfile = tempfile.NamedTemporaryFile(delete=False)
65 print("\n------------------------------------------------------")
66 print("Temp files:")
67 print(" tmpdir : ", tmpdir)
68 print(" logfile : ", logfile.name)
69 print("------------------------------------------------------")
70 print([self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir] + cmd_options)
71 p = subprocess.Popen(
72 args=[self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir
73 ] + cmd_options)
74 p.wait()
75 status = p.returncode
76 missing_files = check_for_missing_files(directory=tmpdir,
77 file_list=file_list)
78 if status:
79 # print log file
80 print("Non zero exit status!")
81 with open(logfile.name) as f:
82 print(f.read())
83
84 self.assertEqual(status, 0)
85 self.assertEqual(
86 len(missing_files),
87 0,
88 msg="\n missing files: \n" + "\n".join(missing_files))
89 shutil.rmtree(tmpdir)
90 os.remove(logfile.name)
91
92
93 def test_help(self):
94 '''Test if help option works '''
95 p = subprocess.Popen(args=[self.EXECUTABLE, "-h"],
96 stdout=subprocess.PIPE)
97 output = str(p.stdout.readlines())
98 p.stdout.close()
99 p.wait()
100 status = p.returncode
101 self.assertRegex(output, "usage")
102 self.assertRegex(output, "optional arguments")
103 self.assertEqual(status, 0)
104
105 def test_basic_no_merging_tarean(self):
106 ''' Basic taren run '''
107 cmd_options = ['-t', '-p', '-s', '6000', 'test_data/LAS_paired_10k.fas']
108 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
109
110 def test_basic_with_merging_tarean(self):
111 ''' Basic taren run '''
112 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000',
113 'test_data/LAS_paired_10k.fas']
114 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
115
116
117 def test_basic_with_merging_tarean_dust_off(self):
118 ''' Basic taren run '''
119 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', "-opt", "ILLUMINA_DUST_OFF",
120 'test_data/LAS_paired_10k.fas']
121 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
122
123 def test_long_with_merging_tarean(self):
124 '''Using more data with tarean'''
125 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01',
126 'test_data/LAS_paired_25k.fas']
127 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
128
129 def test_long_with_merging2_tarean(self):
130 '''Using more data with tarean 300k reads'''
131 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01',
132 'test_data/LAS_paired_300k.fas']
133 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC)
134
135 def test_short_comparative_re(self):
136 '''comparative analysis, two species, small run'''
137 cmd_options = ['-P','3', '-p', '-m', '0.01',
138 'test_data/sequences_comparative.fasta']
139 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_COMPARATIVE)
140
141 # REPEATEXPLORER - full runs
142 def test_basic_no_merging_re(self):
143 ''' Basic taren run '''
144 cmd_options = ['-p', '-s', '6000', 'test_data/LAS_paired_10k.fas']
145 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
146
147 def test_basic_no_merging_re_diamond(self):
148 ''' Basic taren run '''
149 cmd_options = ['-p', '-s', '6000','-D','DIAMOND', 'test_data/LAS_paired_10k.fas']
150 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
151
152
153
154 def test_basic_with_merging_re(self):
155 ''' Basic taren run '''
156 cmd_options = ['-p', '-M', '0.2', '-s', '6000',
157 'test_data/LAS_paired_10k.fas']
158 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
159
160 def test_long_with_merging_re(self):
161 '''Using more data with tarean'''
162 cmd_options = ['-p', '-M', '0.1', '-m', '0.01',
163 'test_data/LAS_paired_25k.fas']
164 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
165
166 def test_long_with_merging_re_diamond(self):
167 '''Using more data with tarean and using diamond'''
168 cmd_options = ['-p', '-M', '0.1', '-m', '0.01','-D','DIAMOND',
169 'test_data/LAS_paired_25k.fas']
170 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
171
172 def test_long_with_merging2_re(self):
173 '''Using more data with tarean 300k reads'''
174 cmd_options = ['-p', '-M', '0.1', '-m', '0.01',
175 'test_data/LAS_paired_300k.fas']
176 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY)
177
178 def test_long_with_merging_and_filtering_re(self):
179 '''Using more data with tarean, test of automatic filtering'''
180 cmd_options = ['-A', '-p', '-M', '0.2', '-m', '0.01',
181 'test_data/ceu_200k.fasta']
182 self.tarean_run(
183 cmd_options,
184 file_list=self.FILE_LIST_BASIC + self.FILE_LIST_FILTERING + self.FILE_LIST_ASSEMBLY)
185
186 def test_custom_database_re(self):
187 ''' Basic taren run '''
188 cmd_options = ['-p', '-d', 'test_data/extra_database', 'extra_database', 'test_data/LAS_paired_10k.fas']
189 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_CUSTOM_DATABASE)
190
191 def tearDown(self):
192 pass
193
194
195 SHORT_TASK_NAME_LIST_TAREAN = ['test_help', 'test_basic_no_merging_tarean',
196 'test_basic_with_merging_tarean',
197 'test_basic_with_merging_tarean_dust_off']
198 LONG_TASK_NAME_LIST_TAREAN = ['test_long_with_merging_tarean',
199 'test_long_with_merging2_tarean']
200 SHORT_TASK_NAME_LIST_RE = ['test_basic_no_merging_re',
201 'test_basic_with_merging_re',
202 'test_basic_no_merging_re_diamond']
203 LONG_TASK_NAME_LIST_RE = ['test_long_with_merging_re',
204 'test_long_with_merging2_re',
205 'test_long_with_merging_and_filtering_re',
206 'test_long_with_merging_re_diamond']
207
208 COMPARATIVE_LIST = ['test_short_comparative_re']
209 CUSTOM_DATABASE_LIST = ['test_short_custom_database']
210
211 # Test suites:
212 SHORT_TAREAN_SUITE = unittest.TestSuite([TestBasic(i)
213 for i in SHORT_TASK_NAME_LIST_TAREAN])
214 LONG_TAREAN_SUITE = unittest.TestSuite([TestBasic(i)
215 for i in LONG_TASK_NAME_LIST_TAREAN])
216 COMPARATIVE_SUITE = unittest.TestSuite([TestBasic(i) for i in COMPARATIVE_LIST])
217 CUSTOM_DB_SUITE = unittest.TestSuite([TestBasic('test_custom_database_re')])
218
219 SHORT_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in SHORT_TASK_NAME_LIST_RE])
220 LONG_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in LONG_TASK_NAME_LIST_RE])
221
222 SHORT_SUITE = unittest.TestSuite([SHORT_RE_SUITE, SHORT_TAREAN_SUITE,
223 COMPARATIVE_SUITE, CUSTOM_DB_SUITE])
224
225 LONG_LONG = unittest.TestSuite([LONG_RE_SUITE, LONG_TAREAN_SUITE])
226
227 # for single test tesing
228 if __name__ == '__main__':
229 unittest.main(verbosity=2)