cds_search: scripts/S02_remove_too_short_bit_or_whole

comparison scripts/S02_remove_too_short_bit_or_whole_sequence.py @ 1:c79bdda8abfb draft default tip

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3a118aa934e6406cc8b0b24d006af6365c277519

author	abims-sbr
date	Thu, 09 Jun 2022 12:40:00 +0000
parents	eb95bf7f90ae
children

comparison

equal deleted inserted replaced

-:eb95bf7f90ae
+:c79bdda8abfb
 for fasta_name in dico_aa.keys():
 seq = dico_aa[fasta_name]
 seq_nuc = dico_nuc[fasta_name]
 if "?" in seq:
-seq = string.replace(seq, "?", "-")
+seq = str.replace(seq, "?", "-")
 if "?" in seq_nuc:
-seq_nuc = string.replace(seq_nuc, "?", "-")
+seq_nuc = str.replace(seq_nuc, "?", "-")
 ## 4.1 ## [FILTER 1] : Detect and Replace short internal indel symbole (= "-" as for other longer gaps) by a "?"
 ## aa
 list_sublist_pos = detect_short_indel(seq, MAX_LENGTH_SMALL_INDEL)   ### DEF 9 ###
 for pos_short_indels in list_sublist_pos:
 for pos in pos_short_indels:
 seq_nuc = seq_nuc[:pos] + "?" + seq_nuc[pos+1:]
 ## 4.2 ## [FILTER 2] : Remove short bits of sequence (<"MIN_LENGTH_BIT_OF_SEQUENCE_aa")
 LIST_sublist_aa=[]
-S1 = string.split(seq, "-")
+S1 = str.split(seq, "-")
 for element in S1:
 if len(element) > MIN_LENGTH_BIT_OF_SEQUENCE_aa:
 LIST_sublist_aa.append(element)
 ## 4.3 ## [FILTER 3] : Remove all the sequence if the total length of all subsequences < "MIN_LENGTH_ALL_aa")
 seq_gap = "-" * len(seq)    ## 4.4.1 ## generate a sequence with only gaps inside
 seq_gap_nuc = "-" * len(seq_nuc)
 for subsequence in LIST_sublist_aa:
 ## aa
-START = string.find(seq, subsequence)
+START = str.find(seq, subsequence)
 END = START + len(subsequence)
 seq_gap = seq_gap[:START] + seq[START:END] + seq_gap[END:]  ## 4.4.2 ## and then replace the correponding gaps by coding subsequence in the sequence
 ## nuc
 START_nuc = START*3
 END_nuc = END*3
 seq_gap_nuc = seq_gap_nuc[:START_nuc] + seq_nuc[START_nuc:END_nuc] + seq_gap_nuc[END_nuc:]
 ## 4.5 ## Save new sequence in bash if not empty
-seq_empty_test = string.replace(seq_gap, "-", "")
+seq_empty_test = str.replace(seq_gap, "-", "")
 if seq_empty_test != "":
 new_bash_aa[fasta_name] = seq_gap
-seq_empty_test = string.replace(seq_gap_nuc, "-", "")
+seq_empty_test = str.replace(seq_gap_nuc, "-", "")
 if seq_empty_test != "":
 new_bash_nuc[fasta_name] = seq_gap_nuc
 # 4.6 ## Correct the nb of sequence in the output name, if necessary
 n0 += 1
 else:
 e+=1
 ###Print
 if sys.argv[2] == "oui" :
-print "\nIn locus with CDS considering Methionine : \n"
+print("\nIn locus with CDS considering Methionine : \n")
 else :
-print "\nIn locus with CDS regardless of the Methionine : \n"
+print("\nIn locus with CDS regardless of the Methionine : \n")
-print "\nTotal number of locus recorded  = %d" %n0
+print("\nTotal number of locus recorded  = %d" %n0)

Mercurial > repos > abims-sbr > cds_search

comparison scripts/S02_remove_too_short_bit_or_whole_sequence.py @ 1:c79bdda8abfb draft default tip