diff scripts/ReMatCh/modules/rematch_module.py @ 0:965517909457 draft

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Wed, 22 Jan 2020 08:41:44 -0500
parents
children 0cbed1c0a762
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/ReMatCh/modules/rematch_module.py	Wed Jan 22 08:41:44 2020 -0500
@@ -0,0 +1,1042 @@
+import os.path
+import multiprocessing
+import utils
+import functools
+import sys
+import pickle
+
+
+def index_fasta_samtools(fasta, region_None, region_outfile_none, print_comand_True):
+    command = ['samtools', 'faidx', fasta, '', '', '']
+    shell_true = False
+    if region_None is not None:
+        command[3] = region_None
+    if region_outfile_none is not None:
+        command[4] = '>'
+        command[5] = region_outfile_none
+        shell_true = True
+    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, shell_true, None, print_comand_True)
+    return run_successfully, stdout
+
+
+# Indexing reference file using Bowtie2
+def indexSequenceBowtie2(referenceFile, threads):
+    if os.path.isfile(str(referenceFile + '.1.bt2')):
+        run_successfully = True
+    else:
+        command = ['bowtie2-build', '--threads', str(threads), referenceFile, referenceFile]
+        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
+    return run_successfully
+
+
+# Mapping with Bowtie2
+def mappingBowtie2(fastq_files, referenceFile, threads, outdir, conserved_True, numMapLoc, bowtieOPT):
+    sam_file = os.path.join(outdir, str('alignment.sam'))
+
+    # Index reference file
+    run_successfully = indexSequenceBowtie2(referenceFile, threads)
+
+    if run_successfully:
+        command = ['bowtie2', '-k', str(numMapLoc), '-q', '', '--threads', str(threads), '-x', referenceFile, '', '--no-unal', '', '-S', sam_file]
+
+        if len(fastq_files) == 1:
+            command[9] = '-U ' + fastq_files[0]
+        elif len(fastq_files) == 2:
+            command[9] = '-1 ' + fastq_files[0] + ' -2 ' + fastq_files[1]
+        else:
+            return False, None
+
+        if conserved_True:
+            command[4] = '--sensitive'
+        else:
+            command[4] = '--very-sensitive-local'
+
+        if bowtieOPT is not None:
+            command[11] = bowtieOPT
+
+        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
+
+    if not run_successfully:
+        sam_file = None
+
+    return run_successfully, sam_file
+
+
+def split_cigar(cigar):
+    cigars = ['M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X']
+
+    splited_cigars = []
+    numbers = ''
+    for char in cigar:
+        if char not in cigars:
+            numbers += char
+        else:
+            splited_cigars.append([int(numbers), char])
+            numbers = ''
+
+    return splited_cigars
+
+
+def recode_cigar_based_on_base_quality(cigar, bases_quality, softClip_baseQuality, mapping_position, direct_strand_true, softClip_cigarFlagRecode):
+    cigar = split_cigar(cigar)
+    soft_left = []
+    soft_right = []
+    cigar_flags_for_reads_length = ('M', 'I', 'S', '=', 'X')
+    read_length_without_right_s = sum([cigar_part[0] for cigar_part in cigar if cigar_part[1] in cigar_flags_for_reads_length]) - (cigar[len(cigar) - 1][0] if cigar[len(cigar) - 1][1] == 'S' else 0)
+    for x, base in enumerate(bases_quality):
+        if ord(base) - 33 >= softClip_baseQuality:
+            if x <= cigar[0][0] - 1:
+                if cigar[0][1] == 'S':
+                    soft_left.append(x)
+            elif x > read_length_without_right_s - 1:
+                if cigar[len(cigar) - 1][1] == 'S':
+                    soft_right.append(x)
+
+    left_changed = (False, 0)
+    if len(soft_left) > 0:
+        soft_left = min(soft_left) + 1
+        if soft_left == 1:
+            cigar = [[cigar[0][0], softClip_cigarFlagRecode]] + cigar[1:]
+            left_changed = (True, cigar[0][0])
+        elif cigar[0][0] - soft_left > 0:
+            cigar = [[soft_left, 'S']] + [[cigar[0][0] - soft_left, softClip_cigarFlagRecode]] + cigar[1:]
+            left_changed = (True, cigar[0][0] - soft_left)
+
+    right_changed = (False, 0)
+    if len(soft_right) > 0:
+        soft_right = max(soft_right) + 1
+        cigar = cigar[:-1]
+        if soft_right - read_length_without_right_s > 0:
+            cigar.append([soft_right - read_length_without_right_s, softClip_cigarFlagRecode])
+            right_changed = (True, soft_right - read_length_without_right_s)
+        if len(bases_quality) - soft_right > 0:
+            cigar.append([len(bases_quality) - soft_right, 'S'])
+
+    if left_changed[0]:
+        # if direct_strand_true:
+        mapping_position = mapping_position - left_changed[1]
+    # if right_changed[0]:
+    #     if not direct_strand_true:
+    #         mapping_position = mapping_position + right_changed[1]
+
+    return ''.join([str(cigar_part[0]) + cigar_part[1] for cigar_part in cigar]), str(mapping_position)
+
+
+def verify_is_forward_read(sam_flag_bit):
+    # 64 = 1000000
+    forward_read = False
+    bit = format(sam_flag_bit, 'b').zfill(7)
+    if bit[-7] == '1':
+        forward_read = True
+    return forward_read
+
+
+def verify_mapped_direct_strand(sam_flag_bit):
+    # 16 = 10000 -> mapped in reverse strand
+    direct_strand = False
+    bit = format(sam_flag_bit, 'b').zfill(5)
+    if bit[-5] == '0':
+        direct_strand = True
+    return direct_strand
+
+
+def verify_mapped_tip(reference_length, mapping_position, read_length, cigar):
+    tip = False
+    if 'S' in cigar:
+        cigar = split_cigar(cigar)
+        if cigar[0][1] == 'S':
+            if mapping_position - cigar[0][0] < 0:
+                tip = True
+        if cigar[len(cigar) - 1][1] == 'S':
+            if mapping_position + cigar[len(cigar) - 1][0] > reference_length:
+                tip = True
+    return tip
+
+
+def change_sam_flag_bit_mapped_reverse_strand_2_direct_strand(sam_flag_bit):
+    bit = list(format(sam_flag_bit, 'b').zfill(5))
+    bit[-5] = '0'
+    return int(''.join(bit), 2)
+
+
+def change_sam_flag_bit_mate_reverse_strand_2_direct_strand(sam_flag_bit):
+    bit = list(format(sam_flag_bit, 'b').zfill(6))
+    bit[-6] = '0'
+    return int(''.join(bit), 2)
+
+
+def move_read_mapped_reverse_strand_2_direct_strand(seq, bases_quality, sam_flag_bit, cigar):
+    seq = utils.reverse_complement(seq)
+    bases_quality = ''.join(reversed(list(bases_quality)))
+    sam_flag_bit = change_sam_flag_bit_mapped_reverse_strand_2_direct_strand(sam_flag_bit)
+    cigar = ''.join([str(cigar_part[0]) + cigar_part[1] for cigar_part in reversed(split_cigar(cigar))])
+    return seq, bases_quality, str(sam_flag_bit), cigar
+
+
+@utils.trace_unhandled_exceptions
+def parallelized_recode_soft_clipping(line_collection, pickleFile, softClip_baseQuality, sequences_length, softClip_cigarFlagRecode):
+    lines_sam = []
+    for line in line_collection:
+        line = line.splitlines()[0]
+        if len(line) > 0:
+            if line.startswith('@'):
+                lines_sam.append(line)
+            else:
+                line = line.split('\t')
+                if not verify_mapped_tip(sequences_length[line[2]], int(line[3]), len(line[9]), line[5]):
+                    line[5], line[3] = recode_cigar_based_on_base_quality(line[5], line[10], softClip_baseQuality, int(line[3]), verify_mapped_direct_strand(int(line[1])), softClip_cigarFlagRecode)
+                lines_sam.append('\t'.join(line))
+    with open(pickleFile, 'wb') as writer:
+        pickle.dump(lines_sam, writer)
+
+
+def recode_soft_clipping_from_sam(sam_file, outdir, threads, softClip_baseQuality, reference_dict, softClip_cigarFlagRecode):
+    pickle_files = []
+    sequences_length = {}
+    for x, seq_info in reference_dict.items():
+        sequences_length[seq_info['header']] = seq_info['length']
+
+    with open(sam_file, 'rtU') as reader:
+        pool = multiprocessing.Pool(processes=threads)
+        line_collection = []
+        x = 0
+        for x, line in enumerate(reader):
+            line_collection.append(line)
+            if x % 10000 == 0:
+                pickleFile = os.path.join(outdir, 'remove_soft_clipping.' + str(x) + '.pkl')
+                pickle_files.append(pickleFile)
+                pool.apply_async(parallelized_recode_soft_clipping, args=(line_collection, pickleFile, softClip_baseQuality, sequences_length, softClip_cigarFlagRecode,))
+                line_collection = []
+        if len(line_collection) > 0:
+            pickleFile = os.path.join(outdir, 'remove_soft_clipping.' + str(x) + '.pkl')
+            pickle_files.append(pickleFile)
+            pool.apply_async(parallelized_recode_soft_clipping, args=(line_collection, pickleFile, softClip_baseQuality, sequences_length, softClip_cigarFlagRecode,))
+            line_collection = []
+        pool.close()
+        pool.join()
+
+    os.remove(sam_file)
+
+    new_sam_file = os.path.join(outdir, 'alignment_with_soft_clipping_recoded.sam')
+    with open(new_sam_file, 'wt') as writer:
+        for pickleFile in pickle_files:
+            if os.path.isfile(pickleFile):
+                lines_sam = None
+                with open(pickleFile, 'rb') as reader:
+                    lines_sam = pickle.load(reader)
+                if lines_sam is not None:
+                    for line in lines_sam:
+                        writer.write(line + '\n')
+                os.remove(pickleFile)
+
+    return new_sam_file
+
+
+# Sort alignment file
+def sortAlignment(alignment_file, output_file, sortByName_True, threads):
+    outFormat_string = os.path.splitext(output_file)[1][1:].lower()
+    command = ['samtools', 'sort', '-o', output_file, '-O', outFormat_string, '', '-@', str(threads), alignment_file]
+    if sortByName_True:
+        command[6] = '-n'
+    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
+    if not run_successfully:
+        output_file = None
+    return run_successfully, output_file
+
+
+# Index alignment file
+def indexAlignment(alignment_file):
+    command = ['samtools', 'index', alignment_file]
+    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
+    return run_successfully
+
+
+def mapping_reads(fastq_files, reference_file, threads, outdir, conserved_True, numMapLoc, rematch_run, softClip_baseQuality, softClip_recodeRun, reference_dict, softClip_cigarFlagRecode, bowtieOPT):
+    # Create a symbolic link to the reference_file
+    reference_link = os.path.join(outdir, os.path.basename(reference_file))
+    os.symlink(reference_file, reference_link)
+
+    bam_file = None
+    # Mapping reads using Bowtie2
+    run_successfully, sam_file = mappingBowtie2(fastq_files, reference_link, threads, outdir, conserved_True, numMapLoc, bowtieOPT)
+
+    if run_successfully:
+        # Remove soft clipping
+        if rematch_run == softClip_recodeRun or softClip_recodeRun == 'both':
+            print 'Recoding soft clipped regions'
+            sam_file = recode_soft_clipping_from_sam(sam_file, outdir, threads, softClip_baseQuality, reference_dict, softClip_cigarFlagRecode)
+
+        # Convert sam to bam and sort bam
+        run_successfully, bam_file = sortAlignment(sam_file, str(os.path.splitext(sam_file)[0] + '.bam'), False, threads)
+
+        if run_successfully:
+            os.remove(sam_file)
+            # Index bam
+            run_successfully = indexAlignment(bam_file)
+
+    return run_successfully, bam_file, reference_link
+
+
+def create_vcf(bam_file, sequence_to_analyse, outdir, counter, reference_file):
+    gene_vcf = os.path.join(outdir, 'samtools_mpileup.sequence_' + str(counter) + '.vcf')
+
+    command = ['samtools', 'mpileup', '--count-orphans', '--no-BAQ', '--min-BQ', '0', '--min-MQ', str(7), '--fasta-ref', reference_file, '--region', sequence_to_analyse, '--output', gene_vcf, '--VCF', '--uncompressed', '--output-tags', 'INFO/AD,AD,DP', bam_file]
+
+    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
+    if not run_successfully:
+        gene_vcf = None
+    return run_successfully, gene_vcf
+
+
+# Read vcf file
+class Vcf():
+    def __init__(self, vcfFile):
+        self.vcf = open(vcfFile, 'rtU')
+        self.line_read = self.vcf.readline()
+        while self.line_read.startswith('#'):
+            self.line_read = self.vcf.readline()
+        self.line = self.line_read
+
+    def readline(self):
+        self.line_stored = self.line
+        self.line = self.vcf.readline()
+        return self.line_stored
+
+    def close(self):
+        self.vcf.close()
+
+
+def get_variants(gene_vcf):
+    variants = {}
+
+    vfc_file = Vcf(gene_vcf)
+    line = vfc_file.readline()
+    while len(line) > 0:
+        fields = line.splitlines()[0].split('\t')
+        if len(fields) > 0:
+            fields[1] = int(fields[1])
+
+            info_field = {}
+            for i in fields[7].split(';'):
+                i = i.split('=')
+                if len(i) > 1:
+                    info_field[i[0]] = i[1]
+                else:
+                    info_field[i[0]] = None
+
+            format_field = {}
+            format_field_name = fields[8].split(':')
+            format_data = fields[9].split(':')
+
+            for i in range(0, len(format_data)):
+                format_field[format_field_name[i]] = format_data[i].split(',')
+
+            fields_to_store = {'REF': fields[3], 'ALT': fields[4].split(','), 'info': info_field, 'format': format_field}
+            if fields[1] in variants:
+                variants[fields[1]][len(variants[fields[1]])] = fields_to_store
+            else:
+                variants[fields[1]] = {0: fields_to_store}
+
+        line = vfc_file.readline()
+    vfc_file.close()
+
+    return variants
+
+
+def indel_entry(variant_position):
+    entry_with_indel = []
+    entry_with_snp = None
+    for i in variant_position:
+        keys = variant_position[i]['info'].keys()
+        if 'INDEL' in keys:
+            entry_with_indel.append(i)
+        else:
+            entry_with_snp = i
+
+    return entry_with_indel, entry_with_snp
+
+
+def get_alt_noMatter(variant_position, indel_true):
+    dp = sum(map(int, variant_position['format']['AD']))
+    index_alleles_sorted_position = sorted(zip(map(int, variant_position['format']['AD']), range(0, len(variant_position['format']['AD']))), reverse=True)
+    index_dominant_allele = None
+    if not indel_true:
+        ad_idv = index_alleles_sorted_position[0][0]
+
+        if len([x for x in index_alleles_sorted_position if x[0] == ad_idv]) > 1:
+            index_alleles_sorted_position = sorted([x for x in index_alleles_sorted_position if x[0] == ad_idv])
+
+        index_dominant_allele = index_alleles_sorted_position[0][1]
+        if index_dominant_allele == 0:
+            alt = '.'
+        else:
+            alt = variant_position['ALT'][index_dominant_allele - 1]
+
+    else:
+        ad_idv = int(variant_position['info']['IDV'])
+
+        if float(ad_idv) / float(dp) >= 0.5:
+            if len([x for x in index_alleles_sorted_position if x[0] == index_alleles_sorted_position[0][0]]) > 1:
+                index_alleles_sorted_position = sorted([x for x in index_alleles_sorted_position if x[0] == index_alleles_sorted_position[0][0]])
+
+            index_dominant_allele = index_alleles_sorted_position[0][1]
+            if index_dominant_allele == 0:
+                alt = '.'
+            else:
+                alt = variant_position['ALT'][index_dominant_allele - 1]
+        else:
+            ad_idv = int(variant_position['format']['AD'][0])
+            alt = '.'
+
+    return alt, dp, ad_idv, index_dominant_allele
+
+
+def count_number_diferences(ref, alt):
+    number_diferences = 0
+
+    if len(ref) != len(alt):
+        number_diferences += 1
+
+    for i in range(0, min(len(ref), len(alt))):
+        if alt[i] != 'N' and ref[i] != alt[i]:
+            number_diferences += 1
+
+    return number_diferences
+
+
+def get_alt_correct(variant_position, alt_noMatter, dp, ad_idv, index_dominant_allele, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele):
+    alt = None
+    low_coverage = False
+    multiple_alleles = False
+
+    if dp >= minimum_depth_presence:
+        if dp < minimum_depth_call:
+            alt = 'N' * len(variant_position['REF'])
+            low_coverage = True
+        else:
+            if ad_idv < minimum_depth_call:
+                alt = 'N' * len(variant_position['REF'])
+                low_coverage = True
+                if float(ad_idv) / float(dp) < minimum_depth_frequency_dominant_allele:
+                    multiple_alleles = True
+            else:
+                if float(ad_idv) / float(dp) < minimum_depth_frequency_dominant_allele:
+                    alt = 'N' * len(variant_position['REF'])
+                    if index_dominant_allele is not None:
+                        variants_coverage = [int(variant_position['format']['AD'][i]) for i in range(0, len(variant_position['ALT']) + 1) if i != index_dominant_allele]
+                        if sum(variants_coverage) > 0:
+                            if float(max(variants_coverage)) / float(sum(variants_coverage)) > 0.5:
+                                multiple_alleles = True
+                            elif float(max(variants_coverage)) / float(sum(variants_coverage)) == 0.5 and len(variants_coverage) > 2:
+                                multiple_alleles = True
+                    else:
+                        multiple_alleles = True
+                else:
+                    alt = alt_noMatter
+    else:
+        low_coverage = True
+
+    return alt, low_coverage, multiple_alleles
+
+
+def get_alt_alignment(ref, alt):
+    if alt is None:
+        alt = 'N' * len(ref)
+    else:
+        if len(ref) != len(alt):
+            if len(alt) < len(ref):
+                if alt == '.':
+                    alt = ref
+                alt += 'N' * (len(ref) - len(alt))
+            else:
+                if alt[:len(ref)] == ref:
+                    alt = '.'
+                else:
+                    alt = alt[:len(ref)]
+
+    return alt
+
+
+def get_indel_more_likely(variant_position, indels_entry):
+    indel_coverage = {}
+    for i in indels_entry:
+        indel_coverage[i] = int(variant_position['info']['IDV'])
+    return indel_coverage.index(str(max(indel_coverage.values())))
+
+
+def determine_variant(variant_position, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, indel_true):
+    alt_noMatter, dp, ad_idv, index_dominant_allele = get_alt_noMatter(variant_position, indel_true)
+
+    alt_correct, low_coverage, multiple_alleles = get_alt_correct(variant_position, alt_noMatter, dp, ad_idv, index_dominant_allele, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele)
+
+    alt_alignment = get_alt_alignment(variant_position['REF'], alt_correct)
+
+    return variant_position['REF'], alt_correct, low_coverage, multiple_alleles, alt_noMatter, alt_alignment
+
+
+def confirm_nucleotides_indel(ref, alt, variants, position_start_indel, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, alignment_true):
+    alt = list(alt)
+
+    for i in range(0, len(alt) - 1):
+        if len(alt) < len(ref):
+            new_position = position_start_indel + len(alt) - i - 1
+            alt_position = len(alt) - i - 1
+        else:
+            if i + 1 > len(ref):
+                break
+            new_position = position_start_indel + 1 + i
+            alt_position = 1 + i
+
+        if alt[alt_position] != 'N':
+            if new_position not in variants:
+                if alignment_true:
+                    alt[alt_position] = 'N'
+                else:
+                    alt = alt[: alt_position]
+                    break
+
+            entry_with_indel, entry_with_snp = indel_entry(variants[new_position])
+            new_ref, alt_correct, low_coverage, multiple_alleles, alt_noMatter, alt_alignment = determine_variant(variants[new_position][entry_with_snp], minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, False)
+            if alt_noMatter != '.' and alt[alt_position] != alt_noMatter:
+                alt[alt_position] = alt_noMatter
+
+    return ''.join(alt)
+
+
+def snp_indel(variants, position, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele):
+    entry_with_indel, entry_with_snp = indel_entry(variants[position])
+
+    if len(entry_with_indel) == 0:
+        ref, alt_correct, low_coverage, multiple_alleles, alt_noMatter, alt_alignment = determine_variant(variants[position][entry_with_snp], minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, False)
+    else:
+        ref_snp, alt_correct_snp, low_coverage_snp, multiple_alleles_snp, alt_noMatter_snp, alt_alignment_snp = determine_variant(variants[position][entry_with_snp], minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, False)
+
+        indel_more_likely = entry_with_indel[0]
+        if len(entry_with_indel) > 1:
+            indel_more_likely = get_indel_more_likely(variants[position], entry_with_indel)
+
+        ref, alt_correct, low_coverage, multiple_alleles, alt_noMatter, alt_alignment = determine_variant(variants[position][indel_more_likely], minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, True)
+
+        if alt_noMatter == '.':
+            ref, alt_correct, low_coverage, multiple_alleles, alt_noMatter, alt_alignment = ref_snp, alt_correct_snp, low_coverage_snp, multiple_alleles_snp, alt_noMatter_snp, alt_alignment_snp
+        else:
+            if alt_correct is None and alt_correct_snp is not None:
+                alt_correct = alt_correct_snp
+            elif alt_correct is not None and alt_correct_snp is not None:
+                if alt_correct_snp != '.' and alt_correct[0] != alt_correct_snp:
+                    alt_correct = alt_correct_snp + alt_correct[1:] if len(alt_correct) > 1 else alt_correct_snp
+            if alt_noMatter_snp != '.' and alt_noMatter[0] != alt_noMatter_snp:
+                alt_noMatter = alt_noMatter_snp + alt_noMatter[1:] if len(alt_noMatter) > 1 else alt_noMatter_snp
+            if alt_alignment_snp != '.' and alt_alignment[0] != alt_alignment_snp:
+                alt_alignment = alt_alignment_snp + alt_alignment[1:] if len(alt_alignment) > 1 else alt_alignment_snp
+
+            # if alt_noMatter != '.':
+            #     alt_noMatter = confirm_nucleotides_indel(ref, alt_noMatter, variants, position, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, False)
+            # if alt_correct is not None and alt_correct != '.':
+            #     alt_correct = confirm_nucleotides_indel(ref, alt_correct, variants, position, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, False)
+            # if alt_alignment != '.':
+            #     alt_alignment = confirm_nucleotides_indel(ref, alt_alignment, variants, position, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, True)
+
+    return ref, alt_correct, low_coverage, multiple_alleles, alt_noMatter, alt_alignment
+
+
+def get_true_variants(variants, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, sequence):
+    variants_correct = {}
+    variants_noMatter = {}
+    variants_alignment = {}
+
+    correct_absent_positions = {}
+    correct_last_absent_position = ''
+
+    noMatter_absent_positions = {}
+    noMatter_last_absent_position = ''
+
+    multiple_alleles_found = []
+
+    counter = 1
+    while counter <= len(sequence):
+        if counter in variants:
+            noMatter_last_absent_position = ''
+
+            ref, alt_correct, low_coverage, multiple_alleles, alt_noMatter, alt_alignment = snp_indel(variants, counter, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele)
+
+            if alt_alignment != '.':
+                variants_alignment[counter] = {'REF': ref, 'ALT': alt_alignment}
+
+            if alt_noMatter != '.':
+                variants_noMatter[counter] = {'REF': ref, 'ALT': alt_noMatter}
+
+            if alt_correct is None:
+                if counter - len(correct_last_absent_position) in correct_absent_positions:
+                    correct_absent_positions[counter - len(correct_last_absent_position)]['REF'] += ref
+                else:
+                    correct_absent_positions[counter] = {'REF': ref, 'ALT': ''}
+                correct_last_absent_position += ref
+            else:
+                if alt_correct != '.':
+                    if len(alt_correct) < len(ref):
+                        if len(alt_correct) == 1:
+                            correct_absent_positions[counter + 1] = {'REF': ref[1:], 'ALT': ''}
+                        else:
+                            correct_absent_positions[counter + 1] = {'REF': ref[1:], 'ALT': alt_correct[1:]}
+
+                        correct_last_absent_position = ref[1:]
+                    else:
+                        variants_correct[counter] = {'REF': ref, 'ALT': alt_correct}
+                        correct_last_absent_position = ''
+                else:
+                    correct_last_absent_position = ''
+
+            if multiple_alleles:
+                multiple_alleles_found.append(counter)
+
+            counter += len(ref)
+        else:
+            variants_alignment[counter] = {'REF': sequence[counter - 1], 'ALT': 'N'}
+
+            if counter - len(correct_last_absent_position) in correct_absent_positions:
+                correct_absent_positions[counter - len(correct_last_absent_position)]['REF'] += sequence[counter - 1]
+            else:
+                correct_absent_positions[counter] = {'REF': sequence[counter - 1], 'ALT': ''}
+            correct_last_absent_position += sequence[counter - 1]
+
+            if counter - len(noMatter_last_absent_position) in noMatter_absent_positions:
+                noMatter_absent_positions[counter - len(noMatter_last_absent_position)]['REF'] += sequence[counter - 1]
+            else:
+                noMatter_absent_positions[counter] = {'REF': sequence[counter - 1], 'ALT': ''}
+            noMatter_last_absent_position += sequence[counter - 1]
+
+            counter += 1
+
+    for position in correct_absent_positions:
+        if position == 1:
+            variants_correct[position] = {'REF': correct_absent_positions[position]['REF'], 'ALT': 'N'}
+        else:
+            if position - 1 not in variants_correct:
+                variants_correct[position - 1] = {'REF': sequence[position - 2] + correct_absent_positions[position]['REF'], 'ALT': sequence[position - 2] + correct_absent_positions[position]['ALT']}
+            else:
+                variants_correct[position - 1] = {'REF': variants_correct[position - 1]['REF'] + correct_absent_positions[position]['REF'][len(variants_correct[position - 1]['REF']) - 1:], 'ALT': variants_correct[position - 1]['ALT'] + correct_absent_positions[position]['ALT'][len(variants_correct[position - 1]['ALT']) - 1 if len(variants_correct[position - 1]['ALT']) > 0 else 0:]}
+
+    for position in noMatter_absent_positions:
+        if position == 1:
+            variants_noMatter[position] = {'REF': noMatter_absent_positions[position]['REF'], 'ALT': 'N'}
+        else:
+            if position - 1 not in variants_noMatter:
+                variants_noMatter[position - 1] = {'REF': sequence[position - 2] + noMatter_absent_positions[position]['REF'], 'ALT': sequence[position - 2] + noMatter_absent_positions[position]['ALT']}
+            else:
+                variants_noMatter[position - 1] = {'REF': variants_noMatter[position - 1]['REF'] + noMatter_absent_positions[position]['REF'][len(variants_noMatter[position - 1]['REF']) - 1:], 'ALT': variants_noMatter[position - 1]['ALT'] + noMatter_absent_positions[position]['ALT'][len(variants_noMatter[position - 1]['ALT']) - 1 if len(variants_noMatter[position - 1]['ALT']) > 0 else 0:]}
+
+    return variants_correct, variants_noMatter, variants_alignment, multiple_alleles_found
+
+
+def clean_variant_in_extra_seq_left(variant_dict, position, length_extra_seq, multiple_alleles_found, number_multi_alleles):
+    number_diferences = 0
+
+    if position + len(variant_dict[position]['REF']) - 1 > length_extra_seq:
+        if multiple_alleles_found is not None and position in multiple_alleles_found:
+            number_multi_alleles += 1
+
+        temp_variant = variant_dict[position]
+        del variant_dict[position]
+        variant_dict[length_extra_seq] = {}
+        variant_dict[length_extra_seq]['REF'] = temp_variant['REF'][length_extra_seq - position:]
+        variant_dict[length_extra_seq]['ALT'] = temp_variant['ALT'][length_extra_seq - position:] if len(temp_variant['ALT']) > length_extra_seq - position else temp_variant['REF'][length_extra_seq - position]
+        number_diferences = count_number_diferences(variant_dict[length_extra_seq]['REF'], variant_dict[length_extra_seq]['ALT'])
+    else:
+        del variant_dict[position]
+
+    return variant_dict, number_multi_alleles, number_diferences
+
+
+def clean_variant_in_extra_seq_rigth(variant_dict, position, sequence_length, length_extra_seq):
+    if position + len(variant_dict[position]['REF']) - 1 > sequence_length - length_extra_seq:
+        variant_dict[position]['REF'] = variant_dict[position]['REF'][: - (position - (sequence_length - length_extra_seq)) + 1]
+        variant_dict[position]['ALT'] = variant_dict[position]['ALT'][: - (position - (sequence_length - length_extra_seq)) + 1] if len(variant_dict[position]['ALT']) >= - (position - (sequence_length - length_extra_seq)) + 1 else variant_dict[position]['ALT']
+
+    number_diferences = count_number_diferences(variant_dict[position]['REF'], variant_dict[position]['ALT'])
+
+    return variant_dict, number_diferences
+
+
+def cleanning_variants_extra_seq(variants_correct, variants_noMatter, variants_alignment, multiple_alleles_found, length_extra_seq, sequence_length):
+    number_multi_alleles = 0
+    number_diferences = 0
+
+    counter = 1
+    while counter <= sequence_length:
+        if counter <= length_extra_seq:
+            if counter in variants_correct:
+                variants_correct, number_multi_alleles, number_diferences = clean_variant_in_extra_seq_left(variants_correct, counter, length_extra_seq, multiple_alleles_found, number_multi_alleles)
+            if counter in variants_noMatter:
+                variants_noMatter, ignore, ignore = clean_variant_in_extra_seq_left(variants_noMatter, counter, length_extra_seq, None, None)
+            if counter in variants_alignment:
+                variants_alignment, ignore, ignore = clean_variant_in_extra_seq_left(variants_alignment, counter, length_extra_seq, None, None)
+        elif counter > length_extra_seq and counter <= sequence_length - length_extra_seq:
+            if counter in variants_correct:
+                if counter in multiple_alleles_found:
+                    number_multi_alleles += 1
+                variants_correct, number_diferences_found = clean_variant_in_extra_seq_rigth(variants_correct, counter, sequence_length, length_extra_seq)
+                number_diferences += number_diferences_found
+            if counter in variants_noMatter:
+                variants_noMatter, ignore = clean_variant_in_extra_seq_rigth(variants_noMatter, counter, sequence_length, length_extra_seq)
+            if counter in variants_alignment:
+                variants_alignment, ignore = clean_variant_in_extra_seq_rigth(variants_alignment, counter, sequence_length, length_extra_seq)
+        else:
+            if counter in variants_correct:
+                del variants_correct[counter]
+            if counter in variants_noMatter:
+                del variants_noMatter[counter]
+            if counter in variants_alignment:
+                del variants_alignment[counter]
+
+        counter += 1
+
+    return variants_correct, variants_noMatter, variants_alignment, number_multi_alleles, number_diferences
+
+
+def get_coverage(gene_coverage):
+    coverage = {}
+
+    with open(gene_coverage, 'rtU') as reader:
+        for line in reader:
+            line = line.splitlines()[0]
+            if len(line) > 0:
+                line = line.split('\t')
+                coverage[int(line[1])] = int(line[2])
+
+    return coverage
+
+
+def get_coverage_report(coverage, sequence_length, minimum_depth_presence, minimum_depth_call, length_extra_seq):
+    if len(coverage) == 0:
+        return sequence_length - 2 * length_extra_seq, 100.0, 0.0
+
+    count_absent = 0
+    count_lowCoverage = 0
+    sum_coverage = 0
+
+    counter = 1
+    while counter <= sequence_length:
+        if counter > length_extra_seq and counter <= sequence_length - length_extra_seq:
+            if coverage[counter] < minimum_depth_presence:
+                count_absent += 1
+            else:
+                if coverage[counter] < minimum_depth_call:
+                    count_lowCoverage += 1
+                sum_coverage += coverage[counter]
+        counter += 1
+
+    mean_coverage = 0
+    percentage_lowCoverage = 0
+    if sequence_length - 2 * length_extra_seq - count_absent > 0:
+        mean_coverage = float(sum_coverage) / float(sequence_length - 2 * length_extra_seq - count_absent)
+        percentage_lowCoverage = float(count_lowCoverage) / float(sequence_length - 2 * length_extra_seq - count_absent) * 100
+
+    return count_absent, percentage_lowCoverage, mean_coverage
+
+
+# Get genome coverage data
+def compute_genome_coverage_data(alignment_file, sequence_to_analyse, outdir, counter):
+    genome_coverage_data_file = os.path.join(outdir, 'samtools_depth.sequence_' + str(counter) + '.tab')
+    command = ['samtools', 'depth', '-a', '-q', '0', '-r', sequence_to_analyse, alignment_file, '>', genome_coverage_data_file]
+    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, False)
+    return run_successfully, genome_coverage_data_file
+
+
+def write_variants_vcf(variants, outdir, sequence_to_analyse, sufix):
+    vcf_file = os.path.join(outdir, str(sequence_to_analyse + '.' + sufix + '.vcf'))
+    with open(vcf_file, 'wt') as writer:
+        writer.write('##fileformat=VCFv4.2' + '\n')
+        writer.write('#' + '\t'.join(['SEQUENCE', 'POSITION', 'ID_unused', 'REFERENCE_sequence', 'ALTERNATIVE_sequence', 'QUALITY_unused', 'FILTER_unused', 'INFO_unused', 'FORMAT_unused']) + '\n')
+        for i in sorted(variants.keys()):
+            writer.write('\t'.join([sequence_to_analyse, str(i), '.', variants[i]['REF'], variants[i]['ALT'], '.', '.', '.', '.']) + '\n')
+
+    compressed_vcf_file = vcf_file + '.gz'
+    command = ['bcftools', 'convert', '-o', compressed_vcf_file, '-O', 'z', vcf_file]
+    run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
+    if run_successfully:
+        command = ['bcftools', 'index', compressed_vcf_file]
+        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
+
+    if not run_successfully:
+        compressed_vcf_file = None
+
+    return run_successfully, compressed_vcf_file
+
+
+def parse_fasta_inMemory(fasta_memory):
+    fasta_memory = fasta_memory.splitlines()
+    sequence_dict = {}
+    for line in fasta_memory:
+        if len(line) > 0:
+            if line.startswith('>'):
+                sequence_dict = {'header': line[1:], 'sequence': ''}
+            else:
+                sequence_dict['sequence'] += line
+
+    return sequence_dict
+
+
+def compute_consensus_sequence(reference_file, sequence_to_analyse, compressed_vcf_file, outdir, sufix):
+    sequence_dict = None
+
+    gene_fasta = os.path.join(outdir, str(sequence_to_analyse + '.fasta'))
+
+    run_successfully, stdout = index_fasta_samtools(reference_file, sequence_to_analyse, gene_fasta, False)
+    if run_successfully:
+        command = ['bcftools', 'norm', '-c', 's', '-f', gene_fasta, '-Ov',  compressed_vcf_file]
+        run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
+        if run_successfully:
+            command = ['bcftools', 'consensus', '-f', gene_fasta, compressed_vcf_file, '-H', '1']
+            run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
+            if run_successfully:
+                sequence_dict = parse_fasta_inMemory(stdout)
+
+    return run_successfully, sequence_dict
+
+
+def create_sample_consensus_sequence(outdir, sequence_to_analyse, reference_file, variants, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, sequence, length_extra_seq):
+    variants_correct, variants_noMatter, variants_alignment, multiple_alleles_found = get_true_variants(variants, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, sequence)
+
+    variants_correct, variants_noMatter, variants_alignment, number_multi_alleles, number_diferences = cleanning_variants_extra_seq(variants_correct, variants_noMatter, variants_alignment, multiple_alleles_found, length_extra_seq, len(sequence))
+
+    run_successfully = False
+    consensus = {'correct': {}, 'noMatter': {}, 'alignment': {}}
+    for variant_type in ['variants_correct', 'variants_noMatter', 'variants_alignment']:
+        run_successfully, compressed_vcf_file = write_variants_vcf(eval(variant_type), outdir, sequence_to_analyse, variant_type.split('_', 1)[1])
+        if run_successfully:
+            run_successfully, sequence_dict = compute_consensus_sequence(reference_file, sequence_to_analyse, compressed_vcf_file, outdir, variant_type.split('_', 1)[1])
+            if run_successfully:
+                consensus[variant_type.split('_', 1)[1]] = {'header': sequence_dict['header'], 'sequence': sequence_dict['sequence'][length_extra_seq:len(sequence_dict['sequence']) - length_extra_seq]}
+
+    return run_successfully, number_multi_alleles, consensus, number_diferences
+
+
+@utils.trace_unhandled_exceptions
+def analyse_sequence_data(bam_file, sequence_information, outdir, counter, reference_file, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, threads):
+    count_absent = None
+    percentage_lowCoverage = None
+    meanCoverage = None
+    number_diferences = 0
+
+    # Create vcf file (for multiple alleles check)
+    run_successfully, gene_vcf = create_vcf(bam_file, sequence_information['header'], outdir, counter, reference_file)
+    if run_successfully:
+        # Create coverage tab file
+        run_successfully, gene_coverage = compute_genome_coverage_data(bam_file, sequence_information['header'], outdir, counter)
+
+        if run_successfully:
+            variants = get_variants(gene_vcf)
+
+            coverage = get_coverage(gene_coverage)
+
+            run_successfully, number_multi_alleles, consensus_sequence, number_diferences = create_sample_consensus_sequence(outdir, sequence_information['header'], reference_file, variants, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, sequence_information['sequence'], length_extra_seq)
+
+            count_absent, percentage_lowCoverage, meanCoverage = get_coverage_report(coverage, sequence_information['length'], minimum_depth_presence, minimum_depth_call, length_extra_seq)
+
+    utils.saveVariableToPickle([run_successfully, counter, number_multi_alleles, count_absent, percentage_lowCoverage, meanCoverage, consensus_sequence, number_diferences], outdir, str('coverage_info.' + str(counter)))
+
+
+def clean_header(header):
+    problematic_characters = ["|", " ", ",", ".", "(", ")", "'", "/", ":"]
+    new_header = str(header)
+    if any(x in header for x in problematic_characters):
+            for x in problematic_characters:
+                new_header = new_header.replace(x, '_')
+    return header, new_header
+
+
+def get_sequence_information(fasta_file, length_extra_seq):
+    sequence_dict = {}
+    headers = {}
+    headers_changed = False
+
+    with open(fasta_file, 'rtU') as reader:
+        blank_line_found = False
+        sequence_counter = 0
+        temp_sequence_dict = {}
+        for line in reader:
+            line = line.splitlines()[0]
+            if len(line) > 0:
+                if not blank_line_found:
+                    if line.startswith('>'):
+                        if len(temp_sequence_dict) > 0:
+                            if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0:
+                                sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0]
+                            else:
+                                print '{header} sequence ignored due to length <= 0'.format(header=temp_sequence_dict.values()[0]['header'])
+                                del headers[temp_sequence_dict.values()[0]['header']]
+                            temp_sequence_dict = {}
+
+                        original_header, new_header = clean_header(line[1:])
+                        if new_header in headers:
+                            sys.exit('Found duplicated sequence headers: {original_header}'.format(original_header=original_header))
+
+                        sequence_counter += 1
+                        temp_sequence_dict[sequence_counter] = {'header': new_header, 'sequence': '', 'length': 0}
+                        headers[new_header] = str(original_header)
+                        if new_header != original_header:
+                            headers_changed = True
+                    else:
+                        temp_sequence_dict[sequence_counter]['sequence'] += line
+                        temp_sequence_dict[sequence_counter]['length'] += len(line)
+                else:
+                    sys.exit('It was found a blank line between the fasta file above line ' + line)
+            else:
+                blank_line_found = True
+
+        if len(temp_sequence_dict) > 0:
+            if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0:
+                sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0]
+            else:
+                print '{header} sequence ignored due to length <= 0'.format(header=temp_sequence_dict.values()[0]['header'])
+                del headers[temp_sequence_dict.values()[0]['header']]
+
+    return sequence_dict, headers, headers_changed
+
+
+def determine_threads_2_use(number_sequences, threads):
+    if number_sequences >= threads:
+        return 1
+    else:
+        return threads / number_sequences
+
+
+def sequence_data(sample, reference_file, bam_file, outdir, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, notWriteConsensus):
+    sequence_data_outdir = os.path.join(outdir, 'sequence_data', '')
+    utils.removeDirectory(sequence_data_outdir)
+    os.mkdir(sequence_data_outdir)
+
+    sequences, headers, headers_changed = get_sequence_information(reference_file, length_extra_seq)
+
+    threads_2_use = determine_threads_2_use(len(sequences), threads)
+
+    pool = multiprocessing.Pool(processes=threads)
+    for sequence_counter in sequences:
+        sequence_dir = os.path.join(sequence_data_outdir, str(sequence_counter), '')
+        utils.removeDirectory(sequence_dir)
+        os.makedirs(sequence_dir)
+        pool.apply_async(analyse_sequence_data, args=(bam_file, sequences[sequence_counter], sequence_dir, sequence_counter, reference_file, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, threads_2_use,))
+    pool.close()
+    pool.join()
+
+    run_successfully, sample_data, consensus_files, consensus_sequences = gather_data_together(sample, sequence_data_outdir, sequences, outdir.rsplit('/', 2)[0], debug_mode_true, length_extra_seq, notWriteConsensus)
+
+    return run_successfully, sample_data, consensus_files, consensus_sequences
+
+
+def chunkstring(string, length):
+    return (string[0 + i:length + i] for i in range(0, len(string), length))
+
+
+def write_consensus(outdir, sample, consensus_sequence):
+    consensus_files = {}
+    for consensus_type in ['correct', 'noMatter', 'alignment']:
+        consensus_files[consensus_type] = os.path.join(outdir, str(sample + '.' + consensus_type + '.fasta'))
+        with open(consensus_files[consensus_type], 'at') as writer:
+            writer.write('>' + consensus_sequence[consensus_type]['header'] + '\n')
+            fasta_sequence_lines = chunkstring(consensus_sequence[consensus_type]['sequence'], 80)
+            for line in fasta_sequence_lines:
+                writer.write(line + '\n')
+    return consensus_files
+
+
+def gather_data_together(sample, data_directory, sequences_information, outdir, debug_mode_true, length_extra_seq, notWriteConsensus):
+    run_successfully = True
+    counter = 0
+    sample_data = {}
+
+    consensus_files = None
+    consensus_sequences_together = {'correct': {}, 'noMatter': {}, 'alignment': {}}
+
+    write_consensus_first_time = True
+
+    genes_directories = [d for d in os.listdir(data_directory) if not d.startswith('.') and os.path.isdir(os.path.join(data_directory, d, ''))]
+    for gene_dir in genes_directories:
+        gene_dir_path = os.path.join(data_directory, gene_dir, '')
+
+        files = [f for f in os.listdir(gene_dir_path) if not f.startswith('.') and os.path.isfile(os.path.join(gene_dir_path, f))]
+        for file_found in files:
+            if file_found.startswith('coverage_info.') and file_found.endswith('.pkl'):
+                file_path = os.path.join(gene_dir_path, file_found)
+
+                if run_successfully:
+                    run_successfully, sequence_counter, multiple_alleles_found, count_absent, percentage_lowCoverage, meanCoverage, consensus_sequence, number_diferences = utils.extractVariableFromPickle(file_path)
+
+                    if not notWriteConsensus:
+                        for consensus_type in consensus_sequence:
+                            consensus_sequences_together[consensus_type][sequence_counter] = {'header': consensus_sequence[consensus_type]['header'], 'sequence': consensus_sequence[consensus_type]['sequence']}
+
+                        if write_consensus_first_time:
+                            for consensus_type in ['correct', 'noMatter', 'alignment']:
+                                file_to_remove = os.path.join(outdir, str(sample + '.' + consensus_type + '.fasta'))
+                                if os.path.isfile(file_to_remove):
+                                    os.remove(file_to_remove)
+                            write_consensus_first_time = False
+                        consensus_files = write_consensus(outdir, sample, consensus_sequence)
+
+                    gene_identity = 0
+                    if sequences_information[sequence_counter]['length'] - 2 * length_extra_seq - count_absent > 0:
+                        gene_identity = 100 - (float(number_diferences) / (sequences_information[sequence_counter]['length'] - 2 * length_extra_seq - count_absent)) * 100
+
+                    sample_data[sequence_counter] = {'header': sequences_information[sequence_counter]['header'], 'gene_coverage': 100 - (float(count_absent) / (sequences_information[sequence_counter]['length'] - 2 * length_extra_seq)) * 100, 'gene_low_coverage': percentage_lowCoverage, 'gene_number_positions_multiple_alleles': multiple_alleles_found, 'gene_mean_read_coverage': meanCoverage, 'gene_identity': gene_identity}
+                    counter += 1
+
+        if not debug_mode_true:
+            utils.removeDirectory(gene_dir_path)
+
+    if counter != len(sequences_information):
+        run_successfully = False
+
+    return run_successfully, sample_data, consensus_files, consensus_sequences_together
+
+
+rematch_timer = functools.partial(utils.timer, name='ReMatCh module')
+
+
+@rematch_timer
+def runRematchModule(sample, fastq_files, reference_file, threads, outdir, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, minimum_gene_coverage, conserved_True, debug_mode_true, numMapLoc, minimum_gene_identity, rematch_run, softClip_baseQuality, softClip_recodeRun, reference_dict, softClip_cigarFlagRecode, bowtieOPT, gene_list_reference, notWriteConsensus):
+    rematch_folder = os.path.join(outdir, 'rematch_module', '')
+    utils.removeDirectory(rematch_folder)
+    os.mkdir(rematch_folder)
+
+    # Map reads
+    run_successfully, bam_file, reference_file = mapping_reads(fastq_files, reference_file, threads, rematch_folder, conserved_True, numMapLoc, rematch_run, softClip_baseQuality, softClip_recodeRun, reference_dict, softClip_cigarFlagRecode, bowtieOPT)
+
+    if run_successfully:
+        # Index reference file
+        run_successfully, stdout = index_fasta_samtools(reference_file, None, None, True)
+        if run_successfully:
+            print 'Analysing alignment data'
+            run_successfully, sample_data, consensus_files, consensus_sequences = sequence_data(sample, reference_file, bam_file, rematch_folder, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, notWriteConsensus)
+
+            if run_successfully:
+                print 'Writing report file'
+                number_absent_genes = 0
+                number_genes_multiple_alleles = 0
+                mean_sample_coverage = 0
+                with open(os.path.join(outdir, 'rematchModule_report.txt'), 'wt') as writer:
+                    writer.write('\t'.join(['#gene', 'percentage_gene_coverage', 'gene_mean_read_coverage', 'percentage_gene_low_coverage', 'number_positions_multiple_alleles', 'percentage_gene_identity']) + '\n')
+                    for i in range(1, len(sample_data) + 1):
+                        writer.write('\t'.join([gene_list_reference[sample_data[i]['header']], str(round(sample_data[i]['gene_coverage'], 2)), str(round(sample_data[i]['gene_mean_read_coverage'], 2)), str(round(sample_data[i]['gene_low_coverage'], 2)), str(sample_data[i]['gene_number_positions_multiple_alleles']), str(round(sample_data[i]['gene_identity'], 2))]) + '\n')
+
+                        if sample_data[i]['gene_coverage'] < minimum_gene_coverage or sample_data[i]['gene_identity'] < minimum_gene_identity:
+                            number_absent_genes += 1
+                        else:
+                            mean_sample_coverage += sample_data[i]['gene_mean_read_coverage']
+                            if sample_data[i]['gene_number_positions_multiple_alleles'] > 0:
+                                number_genes_multiple_alleles += 1
+
+                    if len(sample_data) - number_absent_genes > 0:
+                        mean_sample_coverage = float(mean_sample_coverage) / float(len(sample_data) - number_absent_genes)
+                    else:
+                        mean_sample_coverage = 0
+
+                    writer.write('\n'.join(['#general', '>number_absent_genes', str(number_absent_genes), '>number_genes_multiple_alleles', str(number_genes_multiple_alleles), '>mean_sample_coverage', str(round(mean_sample_coverage, 2))]) + '\n')
+
+                    print '\n'.join([str('number_absent_genes: ' + str(number_absent_genes)), str('number_genes_multiple_alleles: ' + str(number_genes_multiple_alleles)), str('mean_sample_coverage: ' + str(round(mean_sample_coverage, 2)))])
+
+    if not debug_mode_true:
+        utils.removeDirectory(rematch_folder)
+
+    return run_successfully, sample_data if 'sample_data' in locals() else None, {'number_absent_genes': number_absent_genes if 'number_absent_genes' in locals() else None, 'number_genes_multiple_alleles': number_genes_multiple_alleles if 'number_genes_multiple_alleles' in locals() else None, 'mean_sample_coverage': round(mean_sample_coverage, 2) if 'mean_sample_coverage' in locals() else None}, consensus_files if 'consensus_files' in locals() else None, consensus_sequences if 'consensus_sequences' in locals() else None