Mercurial > repos > iss > eurl_vtec_wgs_pt
annotate scripts/modules/run_rematch.py @ 3:0506503db01d draft
planemo upload commit 0708bd0601ac65e8a1c7cdd2504cebdf45755828
| author | iss | 
|---|---|
| date | Thu, 19 Oct 2023 12:31:29 +0000 | 
| parents | c6bab5103a14 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
1 import functools | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
2 import os | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
3 import sys | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
4 import multiprocessing | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
5 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
6 try: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
7 import modules.utils as utils | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
8 except ImportError: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
9 from pathotyping.modules import utils as utils | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
10 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
11 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
12 # {'noMatter': '/home/ubuntu/NGStools/patho_typing/mpmachado_stuff.out_test/rematch/sample.noMatter.fasta', 'correct': '/home/ubuntu/NGStools/patho_typing/mpmachado_stuff.out_test/rematch/sample.correct.fasta', 'alignment': '/home/ubuntu/NGStools/patho_typing/mpmachado_stuff.out_test/rematch/sample.alignment.fasta'} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
13 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
14 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
15 def remove_alignment(alignment_file): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
16 directory = os.path.dirname(alignment_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
17 files = [f for f in os.listdir(directory) if not f.startswith('.') and os.path.isfile(os.path.join(directory, f))] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
18 for file_found in files: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
19 if file_found.startswith(os.path.splitext(os.path.basename(alignment_file))[0]): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
20 file_found = os.path.join(directory, file_found) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
21 os.remove(file_found) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
22 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
23 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
24 def remove_reference_stuff(outdir, reference_file): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
25 files = [f for f in os.listdir(outdir) if not f.startswith('.') and os.path.isfile(os.path.join(outdir, f))] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
26 for file_found in files: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
27 if file_found.startswith(os.path.splitext(os.path.basename(reference_file))[0]): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
28 file_found = os.path.join(outdir, file_found) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
29 os.remove(file_found) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
30 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
31 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
32 def clean_rematch_folder(consensus_files, bam_file, reference_file, outdir, doNotRemoveConsensus, debug_mode_true): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
33 if not debug_mode_true: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
34 if not doNotRemoveConsensus: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
35 for consensus_type, file_path in list(consensus_files.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
36 if os.path.isfile(file_path): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
37 os.remove(file_path) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
38 if bam_file is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
39 remove_alignment(bam_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
40 remove_reference_stuff(outdir, reference_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
41 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
42 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
43 def sequence_data(sample, reference_file, bam_file, outdir, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, rematch): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
44 sequence_data_outdir = os.path.join(outdir, 'sequence_data', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
45 utils.removeDirectory(sequence_data_outdir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
46 os.mkdir(sequence_data_outdir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
47 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
48 sequences, headers = utils.get_sequence_information(reference_file, length_extra_seq) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
49 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
50 pool = multiprocessing.Pool(processes=threads) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
51 for sequence_counter in sequences: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
52 sequence_dir = os.path.join(sequence_data_outdir, str(sequence_counter), '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
53 utils.removeDirectory(sequence_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
54 os.makedirs(sequence_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
55 pool.apply_async(rematch.analyse_sequence_data, args=(bam_file, sequences[sequence_counter], sequence_dir, sequence_counter, reference_file, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele,)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
56 pool.close() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
57 pool.join() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
58 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
59 run_successfully, sample_data, consensus_files, consensus_sequences = rematch.gather_data_together(sample, sequence_data_outdir, sequences, outdir.rsplit('/', 2)[0], debug_mode_true, length_extra_seq, False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
60 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
61 return run_successfully, sample_data, consensus_files, consensus_sequences | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
62 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
63 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
64 def determine_general_statistics(sample_data, minimum_gene_coverage, minimum_gene_identity): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
65 print('Writing report file') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
66 number_absent_genes = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
67 number_genes_multiple_alleles = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
68 mean_sample_coverage = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
69 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
70 with open('output_dir/rematch/rematchModule_report.txt', 'wt') as writer: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
71 writer.write('\t'.join(['#gene', 'percentage_gene_coverage', 'gene_mean_read_coverage', 'percentage_gene_low_coverage', 'number_positions_multiple_alleles', 'percentage_gene_identity']) + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
72 for i in range(1, len(sample_data) + 1): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
73 writer.write('\t'.join([sample_data[i]['header'], str(round(sample_data[i]['gene_coverage'], 2)), str(round(sample_data[i]['gene_mean_read_coverage'], 2)), str(round(sample_data[i]['gene_low_coverage'], 2)), str(sample_data[i]['gene_number_positions_multiple_alleles']), str(round(sample_data[i]['gene_identity'], 2))]) + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
74 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
75 if sample_data[i]['gene_coverage'] < minimum_gene_coverage or sample_data[i]['gene_identity'] < minimum_gene_identity: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
76 number_absent_genes += 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
77 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
78 mean_sample_coverage += sample_data[i]['gene_mean_read_coverage'] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
79 if sample_data[i]['gene_number_positions_multiple_alleles'] > 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
80 number_genes_multiple_alleles += 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
81 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
82 if len(sample_data) - number_absent_genes > 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
83 mean_sample_coverage = float(mean_sample_coverage) / float(len(sample_data) - number_absent_genes) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
84 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
85 mean_sample_coverage = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
86 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
87 writer.write('\n'.join(['#general', '>number_absent_genes', str(number_absent_genes), '>number_genes_multiple_alleles', str(number_genes_multiple_alleles), '>mean_sample_coverage', str(round(mean_sample_coverage, 2))]) + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
88 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
89 print('\n'.join([str('number_absent_genes: ' + str(number_absent_genes)), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
90 str('number_genes_multiple_alleles: ' + str(number_genes_multiple_alleles)), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
91 str('mean_sample_coverage: ' + str(round(mean_sample_coverage, 2)))]) + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
92 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
93 return number_absent_genes, number_genes_multiple_alleles, mean_sample_coverage | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
94 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
95 module_timer = functools.partial(utils.timer, name='Module ReMatCh') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
96 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
97 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
98 @module_timer | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
99 def run_rematch(rematch, outdir, reference_file, bam_file, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, minimum_gene_coverage, minimum_gene_identity, debug_mode_true, doNotRemoveConsensus): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
100 module_dir = os.path.join(outdir, 'rematch', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
101 utils.removeDirectory(module_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
102 os.makedirs(module_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
103 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
104 sys.path.append(os.path.join(os.path.dirname(rematch), 'modules')) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
105 import rematch_module as rematch | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
106 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
107 print('Analysing alignment data') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
108 run_successfully, sample_data, consensus_files, consensus_sequences = sequence_data('sample', reference_file, bam_file, module_dir, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, rematch) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
109 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
110 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
111 number_absent_genes, number_genes_multiple_alleles, mean_sample_coverage = \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
112 determine_general_statistics(sample_data=sample_data, minimum_gene_coverage=minimum_gene_coverage, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
113 minimum_gene_identity=minimum_gene_identity) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
114 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
115 if not debug_mode_true: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
116 utils.removeDirectory(module_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
117 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
118 clean_rematch_folder(consensus_files, bam_file, reference_file, outdir, doNotRemoveConsensus, debug_mode_true) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
119 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
120 return run_successfully, {'number_absent_genes': number_absent_genes if 'number_absent_genes' in locals() else None, 'number_genes_multiple_alleles': number_genes_multiple_alleles if 'number_genes_multiple_alleles' in locals() else None, 'mean_sample_coverage': round(mean_sample_coverage, 2) if 'mean_sample_coverage' in locals() else None}, sample_data if 'sample_data' in locals() else None | 
