comparison scripts/S01_find_orf_on_multiple_alignment.py @ 1:c79bdda8abfb draft default tip

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3a118aa934e6406cc8b0b24d006af6365c277519
author abims-sbr
date Thu, 09 Jun 2022 12:40:00 +0000
parents eb95bf7f90ae
children
comparison
equal deleted inserted replaced
0:eb95bf7f90ae 1:c79bdda8abfb
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf8 2 # coding: utf8
3 # Author: Eric Fontanillas 3 # Author: Eric Fontanillas
4 # Modification: 03/09/14 by Julie BAFFARD 4 # Modification: 03/09/14 by Julie BAFFARD
5 # Last modification : 25/07/18 by Victor Mataigne 5 # Last modification : 10/09/21 by Charlotte Berthelier
6 6
7 # Description: Predict potential ORF on the basis of 2 criteria + 1 optional criteria 7 """
8 # CRITERIA 1 - Longest part of the alignment of sequence without codon stop "*", tested in the 3 potential ORF 8 Description: Predict potential ORF on the basis of 2 criteria + 1 optional criteria
9 # CRITERIA 2 - This longest part should be > 150nc or 50aa 9
10 # CRITERIA 3 - [OPTIONNAL] A codon start "M" should be present in this longuest part, before the last 50 aa 10 - CRITERIA 1
11 # OUTPUTs "05_CDS_aa" & "05_CDS_nuc" => NOT INCLUDE THIS CRITERIA 11
12 # OUTPUTs "06_CDS_with_M_aa" & "06_CDS_with_M_nuc" => INCLUDE THIS CRITERIA 12 Get the longest part of the sequence alignemen without codon stop "*",
13 13 and test in the 3 potential ORF and check with a Blast the best coding sequence
14 import string, os, time, re, zipfile, sys, argparse 14
15 - CRITERIA 2
16
17 This longest part should be > 150nc or 50aa
18
19 - CRITERIA 3 [OPTIONNAL]
20
21 A codon start "M" should be present in this longuest part, before the last 50 aa
22
23 OUTPUTs:
24 "05_CDS_aa" & "05_CDS_nuc" => NOT INCLUDE THIS CRITERIA
25 "06_CDS_with_M_aa" & "06_CDS_with_M_nuc" => INCLUDE THIS CRITERIA
26 """
27
28 import os
29 import re
30 import argparse
31
32 from Bio.Blast import NCBIWWW
33 from Bio.Blast import NCBIXML
15 from dico import dico 34 from dico import dico
16 35
17 def code_universel(F1): 36
37 def code_universel(file1):
18 """ Creates bash for genetic code (key : codon ; value : amino-acid) """ 38 """ Creates bash for genetic code (key : codon ; value : amino-acid) """
19 bash_codeUniversel = {} 39 bash_code_universel = {}
20 40
21 with open(F1, "r") as file: 41 with open(file1, "r") as file:
22 for line in file.readlines(): 42 for line in file.readlines():
23 L1 = string.split(line, " ") 43 item = str.split(line, " ")
24 length1 = len(L1) 44 length1 = len(item)
25 if length1 == 3: 45 if length1 == 3:
26 key = L1[0] 46 key = item[0]
27 value = L1[2][:-1] 47 value = item[2][:-1]
28 bash_codeUniversel[key] = value 48 bash_code_universel[key] = value
29 else: 49 else:
30 key = L1[0] 50 key = item[0]
31 value = L1[2] 51 value = item[2]
32 bash_codeUniversel[key] = value 52 bash_code_universel[key] = value
33 53
34 return(bash_codeUniversel) 54 return bash_code_universel
55
35 56
36 def multiple3(seq): 57 def multiple3(seq):
37 """ Tests if the sequence is a multiple of 3, and if not removes extra-bases 58 """
38 !! Possible to lost a codon, when I test ORF (as I will decay the ORF) """ 59 Tests if the sequence is a multiple of 3, and if not removes extra-bases
39 60 Possible to lost a codon, when I test ORF (as I will decay the ORF)
40 m = len(seq)%3 61 """
41 if m != 0 : 62
42 return seq[:-m], m 63 multiple = len(seq) % 3
43 else : 64 if multiple != 0:
44 return seq, m 65 return seq[:-multiple], multiple
45 66 return seq, multiple
46 def detect_Methionine(seq_aa, Ortho, minimal_cds_length): 67
68
69 def detect_methionine(seq_aa, ortho, minimal_cds_length):
47 """ Detects if methionin in the aa sequence """ 70 """ Detects if methionin in the aa sequence """
48 71
49 ln = len(seq_aa) 72 size = len(seq_aa)
50 CUTOFF_Last_50aa = ln - minimal_cds_length 73 cutoff_last_50aa = size - minimal_cds_length
51 74
52 # Find all indices of occurances of "M" in a string of aa 75 # Find all indices of occurances of "M" in a string of aa
53 list_indices = [pos for pos, char in enumerate(seq_aa) if char == "M"] 76 list_indices = [pos for pos, char in enumerate(seq_aa) if char == "M"]
54 77
55 # If some "M" are present, find whether the first "M" found is not in the 50 last aa (indice < CUTOFF_Last_50aa) ==> in this case: maybenot a CDS 78 # If some "M" are present, find whether the first "M" found is not in the
79 # 50 last aa (indice < CUTOFF_Last_50aa) ==> in this case: maybenot a CDS
56 if list_indices != []: 80 if list_indices != []:
57 first_M = list_indices[0] 81 first_m = list_indices[0]
58 if first_M < CUTOFF_Last_50aa: 82 if first_m < cutoff_last_50aa:
59 Ortho = 1 # means orthologs found 83 ortho = 1 # means orthologs found
60 84
61 return(Ortho) 85 return ortho
62 86
63 def ReverseComplement2(seq): 87
88 def reverse_complement2(seq):
64 """ Reverse complement DNA sequence """ 89 """ Reverse complement DNA sequence """
65 seq1 = 'ATCGN-TAGCN-atcgn-tagcn-' 90 seq1 = 'ATCGN-TAGCN-atcgn-tagcn-'
66 seq_dict = { seq1[i]:seq1[i+6] for i in range(24) if i < 6 or 12<=i<=16 } 91 seq_dict = {seq1[i]: seq1[i + 6]
92 for i in range(24) if i < 6 or 12 <= i <= 16}
67 93
68 return "".join([seq_dict[base] for base in reversed(seq)]) 94 return "".join([seq_dict[base] for base in reversed(seq)])
69 95
70 def simply_get_ORF(seq_dna, gen_code): 96
71 seq_by_codons = [seq_dna.upper().replace('T', 'U')[i:i+3] for i in range(0, len(seq_dna), 3)] 97 def simply_get_orf(seq_dna, gen_code):
72 seq_by_aa = [gen_code[codon] if codon in gen_code.keys() else '?' for codon in seq_by_codons] 98 """ Generate the ORF sequence from DNA sequence """
99 seq_by_codons = [seq_dna.upper().replace('T', 'U')[i:i + 3]
100 for i in range(0, len(seq_dna), 3)]
101 seq_by_aa = [gen_code[codon] if codon in gen_code.keys()
102 else '?' for codon in seq_by_codons]
73 103
74 return ''.join(seq_by_aa) 104 return ''.join(seq_by_aa)
75 105
76 def find_good_ORF_criteria_3(bash_aligned_nc_seq, bash_codeUniversel, minimal_cds_length, min_spec): 106 def find_good_ORF_criteria_3(bash_aligned_nc_seq, bash_codeUniversel, minimal_cds_length, min_spec):
77 # Multiple sequence based : Based on the alignment of several sequences (orthogroup) 107 # Multiple sequence based : Based on the alignment of several sequences (orthogroup)
78 # Criteria 1 : Get the segment in the alignment with no codon stop 108 # Criteria 1 : Get the segment in the alignment with no codon stop
79 109
80 # 1 - Get the list of aligned aa seq for the 3 ORF: 110 # 1 - Get the list of aligned aa seq for the 3 ORF:
111 print("1 - Get the list of aligned aa seq for the 3 ORF")
81 bash_of_aligned_aa_seq_3ORF = {} 112 bash_of_aligned_aa_seq_3ORF = {}
82 bash_of_aligned_nuc_seq_3ORF = {} 113 bash_of_aligned_nuc_seq_3ORF = {}
83 BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION = [] 114 BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION = []
84 115
85 for fasta_name in bash_aligned_nc_seq.keys(): 116 for fasta_name in bash_aligned_nc_seq.keys():
86 # Get sequence, chek if multiple 3, then get 6 orfs 117 # Get sequence, chek if multiple 3, then get 6 orfs
87 sequence_nc = bash_aligned_nc_seq[fasta_name] 118 sequence_nc = bash_aligned_nc_seq[fasta_name]
88 new_sequence_nc, modulo = multiple3(sequence_nc) 119 new_sequence_nc, modulo = multiple3(sequence_nc)
89 new_sequence_rev = ReverseComplement2(new_sequence_nc) 120 new_sequence_rev = reverse_complement2(new_sequence_nc)
90 # For each seq of the multialignment => give the 6 ORFs (in nuc) 121 # For each seq of the multialignment => give the 6 ORFs (in nuc)
91 bash_of_aligned_nuc_seq_3ORF[fasta_name] = [new_sequence_nc, new_sequence_nc[1:-2], new_sequence_nc[2:-1], new_sequence_rev, new_sequence_rev[1:-2], new_sequence_rev[2:-1]] 122 bash_of_aligned_nuc_seq_3ORF[fasta_name] = [new_sequence_nc, new_sequence_nc[1:-2], new_sequence_nc[2:-1], new_sequence_rev, new_sequence_rev[1:-2], new_sequence_rev[2:-1]]
92 123
93 seq_prot_ORF1 = simply_get_ORF(new_sequence_nc, bash_codeUniversel) 124 seq_prot_ORF1 = simply_get_orf(new_sequence_nc, bash_codeUniversel)
94 seq_prot_ORF2 = simply_get_ORF(new_sequence_nc[1:-2], bash_codeUniversel) 125 seq_prot_ORF2 = simply_get_orf(new_sequence_nc[1:-2], bash_codeUniversel)
95 seq_prot_ORF3 = simply_get_ORF(new_sequence_nc[2:-1], bash_codeUniversel) 126 seq_prot_ORF3 = simply_get_orf(new_sequence_nc[2:-1], bash_codeUniversel)
96 seq_prot_ORF4 = simply_get_ORF(new_sequence_rev, bash_codeUniversel) 127 seq_prot_ORF4 = simply_get_orf(new_sequence_rev, bash_codeUniversel)
97 seq_prot_ORF5 = simply_get_ORF(new_sequence_rev[1:-2], bash_codeUniversel) 128 seq_prot_ORF5 = simply_get_orf(new_sequence_rev[1:-2], bash_codeUniversel)
98 seq_prot_ORF6 = simply_get_ORF(new_sequence_rev[2:-1], bash_codeUniversel) 129 seq_prot_ORF6 = simply_get_orf(new_sequence_rev[2:-1], bash_codeUniversel)
99 130
100 # For each seq of the multialignment => give the 6 ORFs (in aa) 131 # For each seq of the multialignment => give the 6 ORFs (in aa)
101 bash_of_aligned_aa_seq_3ORF[fasta_name] = [seq_prot_ORF1, seq_prot_ORF2, seq_prot_ORF3, seq_prot_ORF4, seq_prot_ORF5, seq_prot_ORF6] 132 bash_of_aligned_aa_seq_3ORF[fasta_name] = [seq_prot_ORF1, seq_prot_ORF2, seq_prot_ORF3, seq_prot_ORF4, seq_prot_ORF5, seq_prot_ORF6]
102 133
103 # 2 - Test for the best ORF (Get the longuest segment in the alignment with no codon stop ... for each ORF ... the longuest should give the ORF) 134 # 2 - Test for the best ORF (Get the longuest segment in the alignment with no codon stop ... for each ORF ... the longuest should give the ORF)
135 print("2 - Test for the best ORF")
104 BEST_MAX = 0 136 BEST_MAX = 0
105 137
106 for i in [0,1,2,3,4,5]: # Test the 6 ORFs 138 for i in [0,1,2,3,4,5]: # Test the 6 ORFs
107 ORF_Aligned_aa = [] 139 ORF_Aligned_aa = []
108 ORF_Aligned_nuc = [] 140 ORF_Aligned_nuc = []
168 200
169 201
170 # 3 - ONCE we have this better segment (BEST CODING SEGMENT) 202 # 3 - ONCE we have this better segment (BEST CODING SEGMENT)
171 # ==> GET THE STARTING and ENDING POSITIONS (in aa position and in nuc position) 203 # ==> GET THE STARTING and ENDING POSITIONS (in aa position and in nuc position)
172 # And get the INDEX of the best ORF [0, 1, or 2] 204 # And get the INDEX of the best ORF [0, 1, or 2]
205 print("3 - ONCE we have this better segment")
173 if BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION != []: 206 if BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION != []:
174 pos_MIN_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[0] 207 pos_MIN_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[0]
175 pos_MIN_aa = pos_MIN_aa - 1 208 pos_MIN_aa = pos_MIN_aa - 1
176 pos_MAX_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[-1] 209 pos_MAX_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[-1]
177 210
184 seq_coding = seq[pos_MIN_aa:pos_MAX_aa] 217 seq_coding = seq[pos_MIN_aa:pos_MAX_aa]
185 BESTORF_bash_of_aligned_aa_seq[fasta_name] = seq 218 BESTORF_bash_of_aligned_aa_seq[fasta_name] = seq
186 BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] = seq_coding 219 BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] = seq_coding
187 220
188 # 4 - Get the corresponding position (START/END of BEST CODING SEGMENT) for nucleotides alignment 221 # 4 - Get the corresponding position (START/END of BEST CODING SEGMENT) for nucleotides alignment
222 print("4 - Get the corresponding position")
189 pos_MIN_nuc = pos_MIN_aa * 3 223 pos_MIN_nuc = pos_MIN_aa * 3
190 pos_MAX_nuc = pos_MAX_aa * 3 224 pos_MAX_nuc = pos_MAX_aa * 3
191 225
192 BESTORF_bash_aligned_nc_seq = {} 226 BESTORF_bash_aligned_nc_seq = {}
193 BESTORF_bash_aligned_nc_seq_CODING = {} 227 BESTORF_bash_aligned_nc_seq_CODING = {}
194 for fasta_name in bash_aligned_nc_seq.keys(): 228 for fasta_name in bash_aligned_nc_seq.keys():
195 seq = bash_of_aligned_nuc_seq_3ORF[fasta_name][index_BEST_ORF] 229 seq = bash_of_aligned_nuc_seq_3ORF[fasta_name][index_BEST_ORF]
196 seq_coding = seq[pos_MIN_nuc:pos_MAX_nuc] 230 seq_coding = seq[pos_MIN_nuc:pos_MAX_nuc]
197 BESTORF_bash_aligned_nc_seq[fasta_name] = seq 231 BESTORF_bash_aligned_nc_seq[fasta_name] = seq
198 BESTORF_bash_aligned_nc_seq_CODING[fasta_name] = seq_coding 232 BESTORF_bash_aligned_nc_seq_CODING[fasta_name] = seq_coding
233 seq_cutted = re.sub(r'^.*?[a-zA-Z]', '', seq)
234 sequence_for_blast=(fasta_name+'\n'+seq_cutted+'\n')
235 good_ORF_found = False
236 try:
237 #result_handle = ""
238 #blast_records = ""
239 # logger.debug("sequence_for_blast = %s ", sequence_for_blast)
240 print('sequence_for_blast = %s ',sequence_for_blast, end=' ', flush=True)
241 result_handle = NCBIWWW.qblast("blastn", "/db/nt/current/fasta/nt.fsa", sequence_for_blast, expect=0.001, hitlist_size=1)
242 blast_records = NCBIXML.parse(result_handle)
243 except:
244 good_ORF_found = False
245 else:
246 for blast_record in blast_records:
247 for alignment in blast_record.alignments:
248 for hsp in alignment.hsps:
249 if hsp.expect < 0.001:
250 good_ORF_found = True
251 print("good_ORF_found = %s" %good_ORF_found)
199 252
200 else: # no CDS found 253 else: # no CDS found
201 BESTORF_bash_aligned_nc_seq = {} 254 BESTORF_bash_aligned_nc_seq = {}
202 BESTORF_bash_aligned_nc_seq_CODING = {} 255 BESTORF_bash_aligned_nc_seq_CODING = {}
203 BESTORF_bash_of_aligned_aa_seq = {} 256 BESTORF_bash_of_aligned_aa_seq = {}
204 BESTORF_bash_of_aligned_aa_seq_CODING ={} 257 BESTORF_bash_of_aligned_aa_seq_CODING ={}
205 258
206 # Check whether their is a "M" or not, and if at least 1 "M" is present, that it is not in the last 50 aa 259 # Check whether their is a "M" or not, and if at least 1 "M" is present, that it is not in the last 50 aa
207 260
208 BESTORF_bash_of_aligned_aa_seq_CDS_with_M = {} 261 BESTORF_bash_of_aligned_aa_seq_CDS_with_M = {}
209 BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = {} 262 BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = {}
210 263
211 Ortho = 0 264 Ortho = 0
212 for fasta_name in BESTORF_bash_of_aligned_aa_seq_CODING.keys(): 265 for fasta_name in BESTORF_bash_of_aligned_aa_seq_CODING.keys():
213 seq_aa = BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] 266 seq_aa = BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name]
214 Ortho = detect_Methionine(seq_aa, Ortho, minimal_cds_length) ### DEF6 ### 267 Ortho = detect_methionine(seq_aa, Ortho, minimal_cds_length) ### DEF6 ###
215 268
216 # CASE 1: A "M" is present and correctly localized (not in last 50 aa) 269 # CASE 1: A "M" is present and correctly localized (not in last 50 aa)
217 if Ortho == 1: 270 if Ortho == 1:
218 BESTORF_bash_of_aligned_aa_seq_CDS_with_M = BESTORF_bash_of_aligned_aa_seq_CODING 271 BESTORF_bash_of_aligned_aa_seq_CDS_with_M = BESTORF_bash_of_aligned_aa_seq_CODING
219 BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING 272 BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING
241 out1.close() 294 out1.close()
242 295
243 def main(): 296 def main():
244 parser = argparse.ArgumentParser() 297 parser = argparse.ArgumentParser()
245 parser.add_argument("codeUniversel", help="File describing the genetic code (code_universel_modified.txt") 298 parser.add_argument("codeUniversel", help="File describing the genetic code (code_universel_modified.txt")
246 parser.add_argument("min_cds_len", help="Minmal length of a CDS (in amino-acids)", type=int) 299 parser.add_argument("min_cds_len", help="Minmal length of a CDS (in amino-acids)", type=int)
247 parser.add_argument("min_spec", help="Minimal number of species per alignment") 300 parser.add_argument("min_spec", help="Minimal number of species per alignment")
248 parser.add_argument("list_files", help="File with all input files names") 301 parser.add_argument("list_files", help="File with all input files names")
249 args = parser.parse_args() 302 args = parser.parse_args()
250 303
251 minimal_cds_length = int(args.min_cds_len) # in aa number 304 minimal_cds_length = int(args.min_cds_len) # in aa number
252 bash_codeUniversel = code_universel(args.codeUniversel) 305 bash_codeUniversel = code_universel(args.codeUniversel)
253 minimum_species = int(args.min_spec) 306 minimum_species = int(args.min_spec)
254 307
255 # Inputs from file containing list of species 308 # Inputs from file containing list of species
256 list_files = [] 309 list_files = []
257 with open(args.list_files, 'r') as f: 310 with open(args.list_files, 'r') as f:
258 for line in f.readlines(): 311 for line in f.readlines():
259 list_files.append(line.strip('\n')) 312 list_files.append(line.strip('\n'))
276 for file in list_files: 329 for file in list_files:
277 #n0 += 1 330 #n0 += 1
278 331
279 count_file_processed = count_file_processed + 1 332 count_file_processed = count_file_processed + 1
280 nb_gp = file.split('_')[1] # Keep trace of the orthogroup number 333 nb_gp = file.split('_')[1] # Keep trace of the orthogroup number
281 fasta_file_path = "./%s" %file 334 fasta_file_path = "./%s" %file
282 bash_fasta = dico(fasta_file_path) 335 bash_fasta = dico(fasta_file_path)
283 BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel, minimal_cds_length, minimum_species) 336 BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel, minimal_cds_length, minimum_species)
284 337
285 name_elems[1] = nb_gp 338 name_elems[1] = nb_gp
286 339
287 # Update counts and write group in corresponding output directory 340 # Update counts and write group in corresponding output directory
288 if BESTORF_nuc != {}: 341 if BESTORF_nuc != {}:
289 count_file_with_CDS += 1 342 count_file_with_CDS += 1
290 if len(BESTORF_nuc.keys()) >= minimum_species : 343 if len(BESTORF_nuc.keys()) >= minimum_species :
291 count_file_with_cds_and_enought_species += 1 344 count_file_with_cds_and_enought_species += 1
292 write_output_file(BESTORF_nuc, name_elems, dirs[0]) # OUTPUT BESTORF_nuc 345 write_output_file(BESTORF_nuc, name_elems, dirs[0]) # OUTPUT BESTORF_nuc
293 write_output_file(BESTORF_aa, name_elems, dirs[1]) # The most interesting 346 write_output_file(BESTORF_aa, name_elems, dirs[1]) # The most interesting
303 if len(BESTORF_nuc_CDS_with_M.keys()) >= minimum_species : 356 if len(BESTORF_nuc_CDS_with_M.keys()) >= minimum_species :
304 count_file_with_cds_M_and_enought_species += 1 357 count_file_with_cds_M_and_enought_species += 1
305 write_output_file(BESTORF_nuc_CDS_with_M, name_elems, dirs[4]) 358 write_output_file(BESTORF_nuc_CDS_with_M, name_elems, dirs[4])
306 write_output_file(BESTORF_aa_CDS_with_M, name_elems, dirs[5]) 359 write_output_file(BESTORF_aa_CDS_with_M, name_elems, dirs[5])
307 360
308 print "*************** CDS detection ***************" 361 print("*************** CDS detection ***************")
309 print "\nFiles processed: %d" %count_file_processed 362 print("\nFiles processed: %d" %count_file_processed)
310 print "\tFiles with CDS: %d" %count_file_with_CDS 363 print("\tFiles with CDS: %d" %count_file_with_CDS)
311 print "\tFiles wth CDS and more than %s species: %d" %(minimum_species, count_file_with_cds_and_enought_species) 364 print("\tFiles wth CDS and more than %s species: %d" %(minimum_species, count_file_with_cds_and_enought_species))
312 print "\t\tFiles with CDS plus M (codon start): %d" %count_file_with_CDS_plus_M 365 print("\t\tFiles with CDS plus M (codon start): %d" %count_file_with_CDS_plus_M)
313 print "\t\tFiles with CDS plus M (codon start) and more than %s species: %d" %(minimum_species,count_file_with_cds_M_and_enought_species) 366 print("\t\tFiles with CDS plus M (codon start) and more than %s species: %d" %(minimum_species,count_file_with_cds_M_and_enought_species) )
314 print "\tFiles without CDS: %d \n" %count_file_without_CDS 367 print("\tFiles without CDS: %d \n" %count_file_without_CDS)
315 print "" 368 print("")
316 369
317 if __name__ == '__main__': 370 if __name__ == '__main__':
318 main() 371 main()