Mercurial > repos > abims-sbr > cds_search
diff scripts/S03_remove_site_with_not_enough_species_represented.py @ 1:c79bdda8abfb draft default tip
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3a118aa934e6406cc8b0b24d006af6365c277519
author | abims-sbr |
---|---|
date | Thu, 09 Jun 2022 12:40:00 +0000 |
parents | eb95bf7f90ae |
children |
line wrap: on
line diff
--- a/scripts/S03_remove_site_with_not_enough_species_represented.py Fri Feb 01 10:26:37 2019 -0500 +++ b/scripts/S03_remove_site_with_not_enough_species_represented.py Thu Jun 09 12:40:00 2022 +0000 @@ -12,7 +12,7 @@ def remove_position_with_too_much_missing_data(bash_aa, bash_nuc, MIN_SPECIES_NB): ## 1 ## Get alignment length - fasta_name0 = bash_aa.keys()[0] + fasta_name0 = list(bash_aa.keys())[0] ln_aa = len(bash_aa[fasta_name0]) ln_nuc = len(bash_nuc[fasta_name0]) @@ -23,7 +23,7 @@ i=0 while i < ln_aa: site = [] - for fasta_name in bash_aa.keys(): + for fasta_name in list(bash_aa.keys()): pos = bash_aa[fasta_name][i] if pos != "-" and pos != "?" and pos != "X": @@ -45,15 +45,15 @@ ## 4 ## Create entries for "filtered_bash" for aa & nuc filtered_bash_aa = {} filtered_bash_nuc = {} - for fasta_name in bash_aa.keys(): + for fasta_name in list(bash_aa.keys()): filtered_bash_aa[fasta_name] = "" - for fasta_name in bash_nuc.keys(): + for fasta_name in list(bash_nuc.keys()): filtered_bash_nuc[fasta_name] = "" ## 5 ## Write "filtered_bash" for aa j=0 while j < ln_aa: - for fasta_name in bash_aa.keys(): + for fasta_name in list(bash_aa.keys()): seq=filtered_bash_aa[fasta_name] pos=bash_aa[fasta_name][j] @@ -63,7 +63,7 @@ j = j + 1 ## 6 ## Remove empty sequence - for name in filtered_bash_aa.keys(): + for name in list(filtered_bash_aa.keys()): seq = filtered_bash_aa[name] if seq == '': del filtered_bash_aa[name] @@ -72,7 +72,7 @@ ## 7 ## Write "filtered_bash" for nuc j=0 while j < ln_nuc: - for fasta_name in bash_nuc.keys(): + for fasta_name in list(bash_nuc.keys()): seq=filtered_bash_nuc[fasta_name] #print seq pos=bash_nuc[fasta_name][j] @@ -83,7 +83,7 @@ j = j + 1 ## 8 ## Remove empty sequence - for name in filtered_bash_nuc.keys(): + for name in list(filtered_bash_nuc.keys()): seq = filtered_bash_nuc[name] if seq == '': del filtered_bash_nuc[name] @@ -147,7 +147,7 @@ ## 4.1 ## REMOVE POSITIONS WITH TOO MUCH MISSING DATA (i.e. not enough taxa represented at each position in the alignment) filtered_bash_aa, filtered_bash_nuc = remove_position_with_too_much_missing_data(dico_aa, dico_nuc, MIN_SPECIES_NB) ### DEF 2 ### - k = filtered_bash_nuc.keys() + k = list(filtered_bash_nuc.keys()) new_leng_nuc = 0 if k != []: k0 = k[0] @@ -158,14 +158,14 @@ n0+=1 #name_elems[1] = str(n0) name_elems[1] = file.split('_')[1] - name_elems[3] = str(len(filtered_bash_aa.keys())) + name_elems[3] = str(len(list(filtered_bash_aa.keys()))) new_name = "_".join(name_elems) ## 4.5 ## Write filtered alignment in OUTPUTs ## aa if filtered_bash_aa != {} and new_leng_nuc >= MIN_LENGTH_FINAL_ALIGNMENT_NUC: OUTaa=open("%s/%s" %(path_OUT1, new_name), "w") - for fasta_name in filtered_bash_aa.keys(): + for fasta_name in list(filtered_bash_aa.keys()): seq_aa = filtered_bash_aa[fasta_name] OUTaa.write("%s\n" %fasta_name) OUTaa.write("%s\n" %seq_aa) @@ -174,7 +174,7 @@ if filtered_bash_nuc != {} and new_leng_nuc >= MIN_LENGTH_FINAL_ALIGNMENT_NUC: good+=1 OUTnuc=open("%s/%s" %(path_OUT2, new_name), "w") - for fasta_name in filtered_bash_nuc.keys(): + for fasta_name in list(filtered_bash_nuc.keys()): seq_nuc = filtered_bash_nuc[fasta_name] OUTnuc.write("%s\n" %fasta_name) OUTnuc.write("%s\n" %seq_nuc) @@ -184,8 +184,8 @@ ## 5 ## Print -print "*************** 2nd Filter : removal of the indel ***************" -print "\nTotal number of locus recorded = %d" %n0 -print "\tTotal number of locus with no indels (SAVED) = %d" %good -print "\tTotal number of locus, when removing indel, wich are empty (EXCLUDED) = %d" %bad -print "" \ No newline at end of file +print("*************** 2nd Filter : removal of the indel ***************") +print("\nTotal number of locus recorded = %d" %n0) +print("\tTotal number of locus with no indels (SAVED) = %d" %good) +print("\tTotal number of locus, when removing indel, wich are empty (EXCLUDED) = %d" %bad) +print("")