Mercurial > repos > mvdbeek > dedup_hash
comparison test/test_dedup_hash.py @ 0:f33e9e6a6c88 draft default tip
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author | mvdbeek |
---|---|
date | Wed, 23 Nov 2016 07:49:05 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f33e9e6a6c88 |
---|---|
1 import hashlib | |
2 import inspect | |
3 import os | |
4 import subprocess | |
5 import sys | |
6 import tempfile | |
7 | |
8 | |
9 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) | |
10 parent_dir = os.path.dirname(currentdir) | |
11 sys.path.insert(0, os.path.join(parent_dir, 'dedup_hash/')) | |
12 import dedup_hash | |
13 | |
14 | |
15 TEST_DATA_DIR = os.path.join(parent_dir, 'test-data/') | |
16 UNCOMPRESSED_IN = ['r1.fastq', 'r2.fastq'] | |
17 COMPRESSED_IN = ['r1.fastq.gz', 'r2.fastq.gz'] | |
18 UNCOMPRESSED_OUT = ['r1_dedup.fastq', 'r2_dedup.fastq'] | |
19 SINGLE_IN = ['r1.fastq'] | |
20 SINGLE_OUT = ['r1_dedup.fastq'] | |
21 | |
22 | |
23 | |
24 def run(input): | |
25 args = prepare_args(input) | |
26 run_dedup(args) | |
27 compare_output(args) | |
28 | |
29 | |
30 def compare_output(args): | |
31 ref_out1 = os.path.join(TEST_DATA_DIR, 'r1_dedup.fastq') | |
32 try: | |
33 assert md5(args['outfiles'][0]) == md5(ref_out1) | |
34 except AssertionError: | |
35 cmd = "diff -Nru %s %s" % (args['outfiles'][0], ref_out1) | |
36 subprocess.check_call(cmd.split(' ')) | |
37 print('all good') | |
38 | |
39 | |
40 def prepare_args(test_files): | |
41 infiles = [os.path.join(TEST_DATA_DIR, test_file) for test_file in test_files] | |
42 outfiles = [tempfile.NamedTemporaryFile(delete=False).name for test_file in test_files] # Same number of output files as input files | |
43 kwargs = {'infiles': infiles, | |
44 'outfiles': outfiles, | |
45 'write_gzip': False} | |
46 return kwargs | |
47 | |
48 | |
49 def run_dedup(kwargs): | |
50 fastq_pairs_instance = dedup_hash.get_unique_fastq_instance() | |
51 fastq_pairs_instance(**kwargs) | |
52 | |
53 def md5(fname): | |
54 hash_md5 = hashlib.md5() | |
55 with open(fname, "rb") as f: | |
56 for chunk in iter(lambda: f.read(4096), b""): | |
57 hash_md5.update(chunk) | |
58 return hash_md5.hexdigest() | |
59 | |
60 if __name__ == '__main__': | |
61 run(UNCOMPRESSED_IN) | |
62 run(COMPRESSED_IN) | |
63 run(SINGLE_IN) |