Mercurial > repos > petr-novak > repeatrxplorer
diff test_repex_pipeline.py @ 0:1d1b9e1b2e2f draft
Uploaded
author | petr-novak |
---|---|
date | Thu, 19 Dec 2019 10:24:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_repex_pipeline.py Thu Dec 19 10:24:45 2019 -0500 @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 +''' +Basic Tarean and RepeatExplorer tests +''' +import subprocess +import tempfile +import unittest +import os +import shutil + +def check_for_missing_files(directory, file_list): + ''' check if files exists in the directory ''' + missing_files = [] + for f in file_list: + path = os.path.join(directory, f) + if os.path.exists(path): + continue + else: + missing_files.append(f) + return missing_files + + +class TestBasic(unittest.TestCase): + ''' basic repex-tarean testcase ''' + EXECUTABLE = "./seqclust" + + # file lists to check + FILE_LIST_BASIC = [ + "./seqclust/clustering/clusters/dir_CL0001/hitsort_part.csv", + "./seqclust/clustering/clusters/dir_CL0001/reads.fasta", + "./seqclust/clustering/clusters/dir_CL0001/reads_selection.fasta", + "./seqclust/clustering/clusters/dir_CL0001/dna_database_annotation.csv", + "./seqclust/clustering/clusters/dir_CL0001/graph_layout.GL", + "./seqclust/clustering/clusters/dir_CL0001/graph_layout.png", + "./seqclust/clustering/clusters/dir_CL0001/graph_layout_tmb.png", + "./seqclust/clustering/clusters/dir_CL0001/graph_layout_directed.RData", + "./logfile.txt", "./style1.css", "./documentation.html", + "./tarean_report.html", "./cluster_report.html", + "./summary_histogram.png", "./index.html", "./sequences.db", + "./hitsort.db", "./TAREAN_consensus_rank_1.fasta", + "./TAREAN_consensus_rank_2.fasta", "./TAREAN_consensus_rank_3.fasta", + "./TAREAN_consensus_rank_4.fasta", "./seqclust/clustering/hitsort", + "./seqclust/clustering/hitsort.cls" + ] + FILE_LIST_ASSEMBLY = [ + "./seqclust/small_clusters_assembly/small_clusters.aln", + "./seqclust/small_clusters_assembly/small_clusters.ace", + "./seqclust/small_clusters_assembly/small_clusters.fasta" + ] + FILE_LIST_FILTERING = ["./seqclust/prerun/filter_sequences.fasta"] + FILE_LIST_COMPARATIVE = ["COMPARATIVE_ANALYSIS_COUNTS.csv"] + FILE_LIST_CUSTOM_DATABASE = [ + "./seqclust/custom_databases/extra_database", + "./seqclust/clustering/clusters/dir_CL0001/custom_db_extra_database_annotation.csv" + ] + def setUp(self): + pass + + # helper function + def tarean_run(self, cmd_options, file_list): + ''' Basic taren run ''' + # output goes to tmp directory + tmpdir = tempfile.mkdtemp() + logfile = tempfile.NamedTemporaryFile(delete=False) + print("\n------------------------------------------------------") + print("Temp files:") + print(" tmpdir : ", tmpdir) + print(" logfile : ", logfile.name) + print("------------------------------------------------------") + print([self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir] + cmd_options) + p = subprocess.Popen( + args=[self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir + ] + cmd_options) + p.wait() + status = p.returncode + missing_files = check_for_missing_files(directory=tmpdir, + file_list=file_list) + if status: + # print log file + print("Non zero exit status!") + with open(logfile.name) as f: + print(f.read()) + + self.assertEqual(status, 0) + self.assertEqual( + len(missing_files), + 0, + msg="\n missing files: \n" + "\n".join(missing_files)) + shutil.rmtree(tmpdir) + os.remove(logfile.name) + + + def test_help(self): + '''Test if help option works ''' + p = subprocess.Popen(args=[self.EXECUTABLE, "-h"], + stdout=subprocess.PIPE) + output = str(p.stdout.readlines()) + p.stdout.close() + p.wait() + status = p.returncode + self.assertRegex(output, "usage") + self.assertRegex(output, "optional arguments") + self.assertEqual(status, 0) + + def test_basic_no_merging_tarean(self): + ''' Basic taren run ''' + cmd_options = ['-t', '-p', '-s', '6000', 'test_data/LAS_paired_10k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) + + def test_basic_with_merging_tarean(self): + ''' Basic taren run ''' + cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', + 'test_data/LAS_paired_10k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) + + + def test_basic_with_merging_tarean_dust_off(self): + ''' Basic taren run ''' + cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', "-opt", "ILLUMINA_DUST_OFF", + 'test_data/LAS_paired_10k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) + + def test_long_with_merging_tarean(self): + '''Using more data with tarean''' + cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01', + 'test_data/LAS_paired_25k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) + + def test_long_with_merging2_tarean(self): + '''Using more data with tarean 300k reads''' + cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01', + 'test_data/LAS_paired_300k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) + + def test_short_comparative_re(self): + '''comparative analysis, two species, small run''' + cmd_options = ['-P','3', '-p', '-m', '0.01', + 'test_data/sequences_comparative.fasta'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_COMPARATIVE) + + # REPEATEXPLORER - full runs + def test_basic_no_merging_re(self): + ''' Basic taren run ''' + cmd_options = ['-p', '-s', '6000', 'test_data/LAS_paired_10k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) + + def test_basic_no_merging_re_diamond(self): + ''' Basic taren run ''' + cmd_options = ['-p', '-s', '6000','-D','DIAMOND', 'test_data/LAS_paired_10k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) + + + + def test_basic_with_merging_re(self): + ''' Basic taren run ''' + cmd_options = ['-p', '-M', '0.2', '-s', '6000', + 'test_data/LAS_paired_10k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) + + def test_long_with_merging_re(self): + '''Using more data with tarean''' + cmd_options = ['-p', '-M', '0.1', '-m', '0.01', + 'test_data/LAS_paired_25k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) + + def test_long_with_merging_re_diamond(self): + '''Using more data with tarean and using diamond''' + cmd_options = ['-p', '-M', '0.1', '-m', '0.01','-D','DIAMOND', + 'test_data/LAS_paired_25k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) + + def test_long_with_merging2_re(self): + '''Using more data with tarean 300k reads''' + cmd_options = ['-p', '-M', '0.1', '-m', '0.01', + 'test_data/LAS_paired_300k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) + + def test_long_with_merging_and_filtering_re(self): + '''Using more data with tarean, test of automatic filtering''' + cmd_options = ['-A', '-p', '-M', '0.2', '-m', '0.01', + 'test_data/ceu_200k.fasta'] + self.tarean_run( + cmd_options, + file_list=self.FILE_LIST_BASIC + self.FILE_LIST_FILTERING + self.FILE_LIST_ASSEMBLY) + + def test_custom_database_re(self): + ''' Basic taren run ''' + cmd_options = ['-p', '-d', 'test_data/extra_database', 'extra_database', 'test_data/LAS_paired_10k.fas'] + self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_CUSTOM_DATABASE) + + def tearDown(self): + pass + + +SHORT_TASK_NAME_LIST_TAREAN = ['test_help', 'test_basic_no_merging_tarean', + 'test_basic_with_merging_tarean', + 'test_basic_with_merging_tarean_dust_off'] +LONG_TASK_NAME_LIST_TAREAN = ['test_long_with_merging_tarean', + 'test_long_with_merging2_tarean'] +SHORT_TASK_NAME_LIST_RE = ['test_basic_no_merging_re', + 'test_basic_with_merging_re', + 'test_basic_no_merging_re_diamond'] +LONG_TASK_NAME_LIST_RE = ['test_long_with_merging_re', + 'test_long_with_merging2_re', + 'test_long_with_merging_and_filtering_re', + 'test_long_with_merging_re_diamond'] + +COMPARATIVE_LIST = ['test_short_comparative_re'] +CUSTOM_DATABASE_LIST = ['test_short_custom_database'] + +# Test suites: +SHORT_TAREAN_SUITE = unittest.TestSuite([TestBasic(i) + for i in SHORT_TASK_NAME_LIST_TAREAN]) +LONG_TAREAN_SUITE = unittest.TestSuite([TestBasic(i) + for i in LONG_TASK_NAME_LIST_TAREAN]) +COMPARATIVE_SUITE = unittest.TestSuite([TestBasic(i) for i in COMPARATIVE_LIST]) +CUSTOM_DB_SUITE = unittest.TestSuite([TestBasic('test_custom_database_re')]) + +SHORT_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in SHORT_TASK_NAME_LIST_RE]) +LONG_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in LONG_TASK_NAME_LIST_RE]) + +SHORT_SUITE = unittest.TestSuite([SHORT_RE_SUITE, SHORT_TAREAN_SUITE, + COMPARATIVE_SUITE, CUSTOM_DB_SUITE]) + +LONG_LONG = unittest.TestSuite([LONG_RE_SUITE, LONG_TAREAN_SUITE]) + +# for single test tesing +if __name__ == '__main__': + unittest.main(verbosity=2)