Mercurial > repos > mvdbeek > dedup_hash
view test/test_dedup_hash.py @ 0:f33e9e6a6c88 draft default tip
planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author | mvdbeek |
---|---|
date | Wed, 23 Nov 2016 07:49:05 -0500 |
parents | |
children |
line wrap: on
line source
import hashlib import inspect import os import subprocess import sys import tempfile currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parent_dir = os.path.dirname(currentdir) sys.path.insert(0, os.path.join(parent_dir, 'dedup_hash/')) import dedup_hash TEST_DATA_DIR = os.path.join(parent_dir, 'test-data/') UNCOMPRESSED_IN = ['r1.fastq', 'r2.fastq'] COMPRESSED_IN = ['r1.fastq.gz', 'r2.fastq.gz'] UNCOMPRESSED_OUT = ['r1_dedup.fastq', 'r2_dedup.fastq'] SINGLE_IN = ['r1.fastq'] SINGLE_OUT = ['r1_dedup.fastq'] def run(input): args = prepare_args(input) run_dedup(args) compare_output(args) def compare_output(args): ref_out1 = os.path.join(TEST_DATA_DIR, 'r1_dedup.fastq') try: assert md5(args['outfiles'][0]) == md5(ref_out1) except AssertionError: cmd = "diff -Nru %s %s" % (args['outfiles'][0], ref_out1) subprocess.check_call(cmd.split(' ')) print('all good') def prepare_args(test_files): infiles = [os.path.join(TEST_DATA_DIR, test_file) for test_file in test_files] outfiles = [tempfile.NamedTemporaryFile(delete=False).name for test_file in test_files] # Same number of output files as input files kwargs = {'infiles': infiles, 'outfiles': outfiles, 'write_gzip': False} return kwargs def run_dedup(kwargs): fastq_pairs_instance = dedup_hash.get_unique_fastq_instance() fastq_pairs_instance(**kwargs) def md5(fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() if __name__ == '__main__': run(UNCOMPRESSED_IN) run(COMPRESSED_IN) run(SINGLE_IN)