Mercurial > repos > mvdbeek > dedup_hash
comparison test/test_dedup_hash.py @ 0:f33e9e6a6c88 draft default tip
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
| author | mvdbeek |
|---|---|
| date | Wed, 23 Nov 2016 07:49:05 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f33e9e6a6c88 |
|---|---|
| 1 import hashlib | |
| 2 import inspect | |
| 3 import os | |
| 4 import subprocess | |
| 5 import sys | |
| 6 import tempfile | |
| 7 | |
| 8 | |
| 9 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) | |
| 10 parent_dir = os.path.dirname(currentdir) | |
| 11 sys.path.insert(0, os.path.join(parent_dir, 'dedup_hash/')) | |
| 12 import dedup_hash | |
| 13 | |
| 14 | |
| 15 TEST_DATA_DIR = os.path.join(parent_dir, 'test-data/') | |
| 16 UNCOMPRESSED_IN = ['r1.fastq', 'r2.fastq'] | |
| 17 COMPRESSED_IN = ['r1.fastq.gz', 'r2.fastq.gz'] | |
| 18 UNCOMPRESSED_OUT = ['r1_dedup.fastq', 'r2_dedup.fastq'] | |
| 19 SINGLE_IN = ['r1.fastq'] | |
| 20 SINGLE_OUT = ['r1_dedup.fastq'] | |
| 21 | |
| 22 | |
| 23 | |
| 24 def run(input): | |
| 25 args = prepare_args(input) | |
| 26 run_dedup(args) | |
| 27 compare_output(args) | |
| 28 | |
| 29 | |
| 30 def compare_output(args): | |
| 31 ref_out1 = os.path.join(TEST_DATA_DIR, 'r1_dedup.fastq') | |
| 32 try: | |
| 33 assert md5(args['outfiles'][0]) == md5(ref_out1) | |
| 34 except AssertionError: | |
| 35 cmd = "diff -Nru %s %s" % (args['outfiles'][0], ref_out1) | |
| 36 subprocess.check_call(cmd.split(' ')) | |
| 37 print('all good') | |
| 38 | |
| 39 | |
| 40 def prepare_args(test_files): | |
| 41 infiles = [os.path.join(TEST_DATA_DIR, test_file) for test_file in test_files] | |
| 42 outfiles = [tempfile.NamedTemporaryFile(delete=False).name for test_file in test_files] # Same number of output files as input files | |
| 43 kwargs = {'infiles': infiles, | |
| 44 'outfiles': outfiles, | |
| 45 'write_gzip': False} | |
| 46 return kwargs | |
| 47 | |
| 48 | |
| 49 def run_dedup(kwargs): | |
| 50 fastq_pairs_instance = dedup_hash.get_unique_fastq_instance() | |
| 51 fastq_pairs_instance(**kwargs) | |
| 52 | |
| 53 def md5(fname): | |
| 54 hash_md5 = hashlib.md5() | |
| 55 with open(fname, "rb") as f: | |
| 56 for chunk in iter(lambda: f.read(4096), b""): | |
| 57 hash_md5.update(chunk) | |
| 58 return hash_md5.hexdigest() | |
| 59 | |
| 60 if __name__ == '__main__': | |
| 61 run(UNCOMPRESSED_IN) | |
| 62 run(COMPRESSED_IN) | |
| 63 run(SINGLE_IN) |
