Mercurial > repos > petr-novak > repeatrxplorer
comparison test_repex_pipeline.py @ 0:1d1b9e1b2e2f draft
Uploaded
author | petr-novak |
---|---|
date | Thu, 19 Dec 2019 10:24:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1d1b9e1b2e2f |
---|---|
1 #!/usr/bin/env python3 | |
2 ''' | |
3 Basic Tarean and RepeatExplorer tests | |
4 ''' | |
5 import subprocess | |
6 import tempfile | |
7 import unittest | |
8 import os | |
9 import shutil | |
10 | |
11 def check_for_missing_files(directory, file_list): | |
12 ''' check if files exists in the directory ''' | |
13 missing_files = [] | |
14 for f in file_list: | |
15 path = os.path.join(directory, f) | |
16 if os.path.exists(path): | |
17 continue | |
18 else: | |
19 missing_files.append(f) | |
20 return missing_files | |
21 | |
22 | |
23 class TestBasic(unittest.TestCase): | |
24 ''' basic repex-tarean testcase ''' | |
25 EXECUTABLE = "./seqclust" | |
26 | |
27 # file lists to check | |
28 FILE_LIST_BASIC = [ | |
29 "./seqclust/clustering/clusters/dir_CL0001/hitsort_part.csv", | |
30 "./seqclust/clustering/clusters/dir_CL0001/reads.fasta", | |
31 "./seqclust/clustering/clusters/dir_CL0001/reads_selection.fasta", | |
32 "./seqclust/clustering/clusters/dir_CL0001/dna_database_annotation.csv", | |
33 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.GL", | |
34 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.png", | |
35 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_tmb.png", | |
36 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_directed.RData", | |
37 "./logfile.txt", "./style1.css", "./documentation.html", | |
38 "./tarean_report.html", "./cluster_report.html", | |
39 "./summary_histogram.png", "./index.html", "./sequences.db", | |
40 "./hitsort.db", "./TAREAN_consensus_rank_1.fasta", | |
41 "./TAREAN_consensus_rank_2.fasta", "./TAREAN_consensus_rank_3.fasta", | |
42 "./TAREAN_consensus_rank_4.fasta", "./seqclust/clustering/hitsort", | |
43 "./seqclust/clustering/hitsort.cls" | |
44 ] | |
45 FILE_LIST_ASSEMBLY = [ | |
46 "./seqclust/small_clusters_assembly/small_clusters.aln", | |
47 "./seqclust/small_clusters_assembly/small_clusters.ace", | |
48 "./seqclust/small_clusters_assembly/small_clusters.fasta" | |
49 ] | |
50 FILE_LIST_FILTERING = ["./seqclust/prerun/filter_sequences.fasta"] | |
51 FILE_LIST_COMPARATIVE = ["COMPARATIVE_ANALYSIS_COUNTS.csv"] | |
52 FILE_LIST_CUSTOM_DATABASE = [ | |
53 "./seqclust/custom_databases/extra_database", | |
54 "./seqclust/clustering/clusters/dir_CL0001/custom_db_extra_database_annotation.csv" | |
55 ] | |
56 def setUp(self): | |
57 pass | |
58 | |
59 # helper function | |
60 def tarean_run(self, cmd_options, file_list): | |
61 ''' Basic taren run ''' | |
62 # output goes to tmp directory | |
63 tmpdir = tempfile.mkdtemp() | |
64 logfile = tempfile.NamedTemporaryFile(delete=False) | |
65 print("\n------------------------------------------------------") | |
66 print("Temp files:") | |
67 print(" tmpdir : ", tmpdir) | |
68 print(" logfile : ", logfile.name) | |
69 print("------------------------------------------------------") | |
70 print([self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir] + cmd_options) | |
71 p = subprocess.Popen( | |
72 args=[self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir | |
73 ] + cmd_options) | |
74 p.wait() | |
75 status = p.returncode | |
76 missing_files = check_for_missing_files(directory=tmpdir, | |
77 file_list=file_list) | |
78 if status: | |
79 # print log file | |
80 print("Non zero exit status!") | |
81 with open(logfile.name) as f: | |
82 print(f.read()) | |
83 | |
84 self.assertEqual(status, 0) | |
85 self.assertEqual( | |
86 len(missing_files), | |
87 0, | |
88 msg="\n missing files: \n" + "\n".join(missing_files)) | |
89 shutil.rmtree(tmpdir) | |
90 os.remove(logfile.name) | |
91 | |
92 | |
93 def test_help(self): | |
94 '''Test if help option works ''' | |
95 p = subprocess.Popen(args=[self.EXECUTABLE, "-h"], | |
96 stdout=subprocess.PIPE) | |
97 output = str(p.stdout.readlines()) | |
98 p.stdout.close() | |
99 p.wait() | |
100 status = p.returncode | |
101 self.assertRegex(output, "usage") | |
102 self.assertRegex(output, "optional arguments") | |
103 self.assertEqual(status, 0) | |
104 | |
105 def test_basic_no_merging_tarean(self): | |
106 ''' Basic taren run ''' | |
107 cmd_options = ['-t', '-p', '-s', '6000', 'test_data/LAS_paired_10k.fas'] | |
108 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
109 | |
110 def test_basic_with_merging_tarean(self): | |
111 ''' Basic taren run ''' | |
112 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', | |
113 'test_data/LAS_paired_10k.fas'] | |
114 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
115 | |
116 | |
117 def test_basic_with_merging_tarean_dust_off(self): | |
118 ''' Basic taren run ''' | |
119 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', "-opt", "ILLUMINA_DUST_OFF", | |
120 'test_data/LAS_paired_10k.fas'] | |
121 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
122 | |
123 def test_long_with_merging_tarean(self): | |
124 '''Using more data with tarean''' | |
125 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01', | |
126 'test_data/LAS_paired_25k.fas'] | |
127 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
128 | |
129 def test_long_with_merging2_tarean(self): | |
130 '''Using more data with tarean 300k reads''' | |
131 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01', | |
132 'test_data/LAS_paired_300k.fas'] | |
133 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
134 | |
135 def test_short_comparative_re(self): | |
136 '''comparative analysis, two species, small run''' | |
137 cmd_options = ['-P','3', '-p', '-m', '0.01', | |
138 'test_data/sequences_comparative.fasta'] | |
139 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_COMPARATIVE) | |
140 | |
141 # REPEATEXPLORER - full runs | |
142 def test_basic_no_merging_re(self): | |
143 ''' Basic taren run ''' | |
144 cmd_options = ['-p', '-s', '6000', 'test_data/LAS_paired_10k.fas'] | |
145 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
146 | |
147 def test_basic_no_merging_re_diamond(self): | |
148 ''' Basic taren run ''' | |
149 cmd_options = ['-p', '-s', '6000','-D','DIAMOND', 'test_data/LAS_paired_10k.fas'] | |
150 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
151 | |
152 | |
153 | |
154 def test_basic_with_merging_re(self): | |
155 ''' Basic taren run ''' | |
156 cmd_options = ['-p', '-M', '0.2', '-s', '6000', | |
157 'test_data/LAS_paired_10k.fas'] | |
158 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
159 | |
160 def test_long_with_merging_re(self): | |
161 '''Using more data with tarean''' | |
162 cmd_options = ['-p', '-M', '0.1', '-m', '0.01', | |
163 'test_data/LAS_paired_25k.fas'] | |
164 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
165 | |
166 def test_long_with_merging_re_diamond(self): | |
167 '''Using more data with tarean and using diamond''' | |
168 cmd_options = ['-p', '-M', '0.1', '-m', '0.01','-D','DIAMOND', | |
169 'test_data/LAS_paired_25k.fas'] | |
170 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
171 | |
172 def test_long_with_merging2_re(self): | |
173 '''Using more data with tarean 300k reads''' | |
174 cmd_options = ['-p', '-M', '0.1', '-m', '0.01', | |
175 'test_data/LAS_paired_300k.fas'] | |
176 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
177 | |
178 def test_long_with_merging_and_filtering_re(self): | |
179 '''Using more data with tarean, test of automatic filtering''' | |
180 cmd_options = ['-A', '-p', '-M', '0.2', '-m', '0.01', | |
181 'test_data/ceu_200k.fasta'] | |
182 self.tarean_run( | |
183 cmd_options, | |
184 file_list=self.FILE_LIST_BASIC + self.FILE_LIST_FILTERING + self.FILE_LIST_ASSEMBLY) | |
185 | |
186 def test_custom_database_re(self): | |
187 ''' Basic taren run ''' | |
188 cmd_options = ['-p', '-d', 'test_data/extra_database', 'extra_database', 'test_data/LAS_paired_10k.fas'] | |
189 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_CUSTOM_DATABASE) | |
190 | |
191 def tearDown(self): | |
192 pass | |
193 | |
194 | |
195 SHORT_TASK_NAME_LIST_TAREAN = ['test_help', 'test_basic_no_merging_tarean', | |
196 'test_basic_with_merging_tarean', | |
197 'test_basic_with_merging_tarean_dust_off'] | |
198 LONG_TASK_NAME_LIST_TAREAN = ['test_long_with_merging_tarean', | |
199 'test_long_with_merging2_tarean'] | |
200 SHORT_TASK_NAME_LIST_RE = ['test_basic_no_merging_re', | |
201 'test_basic_with_merging_re', | |
202 'test_basic_no_merging_re_diamond'] | |
203 LONG_TASK_NAME_LIST_RE = ['test_long_with_merging_re', | |
204 'test_long_with_merging2_re', | |
205 'test_long_with_merging_and_filtering_re', | |
206 'test_long_with_merging_re_diamond'] | |
207 | |
208 COMPARATIVE_LIST = ['test_short_comparative_re'] | |
209 CUSTOM_DATABASE_LIST = ['test_short_custom_database'] | |
210 | |
211 # Test suites: | |
212 SHORT_TAREAN_SUITE = unittest.TestSuite([TestBasic(i) | |
213 for i in SHORT_TASK_NAME_LIST_TAREAN]) | |
214 LONG_TAREAN_SUITE = unittest.TestSuite([TestBasic(i) | |
215 for i in LONG_TASK_NAME_LIST_TAREAN]) | |
216 COMPARATIVE_SUITE = unittest.TestSuite([TestBasic(i) for i in COMPARATIVE_LIST]) | |
217 CUSTOM_DB_SUITE = unittest.TestSuite([TestBasic('test_custom_database_re')]) | |
218 | |
219 SHORT_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in SHORT_TASK_NAME_LIST_RE]) | |
220 LONG_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in LONG_TASK_NAME_LIST_RE]) | |
221 | |
222 SHORT_SUITE = unittest.TestSuite([SHORT_RE_SUITE, SHORT_TAREAN_SUITE, | |
223 COMPARATIVE_SUITE, CUSTOM_DB_SUITE]) | |
224 | |
225 LONG_LONG = unittest.TestSuite([LONG_RE_SUITE, LONG_TAREAN_SUITE]) | |
226 | |
227 # for single test tesing | |
228 if __name__ == '__main__': | |
229 unittest.main(verbosity=2) |