Mercurial > repos > abims-sbr > cds_search
comparison scripts/S02_remove_too_short_bit_or_whole_sequence.py @ 1:c79bdda8abfb draft default tip
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3a118aa934e6406cc8b0b24d006af6365c277519
author | abims-sbr |
---|---|
date | Thu, 09 Jun 2022 12:40:00 +0000 |
parents | eb95bf7f90ae |
children |
comparison
equal
deleted
inserted
replaced
0:eb95bf7f90ae | 1:c79bdda8abfb |
---|---|
87 for fasta_name in dico_aa.keys(): | 87 for fasta_name in dico_aa.keys(): |
88 seq = dico_aa[fasta_name] | 88 seq = dico_aa[fasta_name] |
89 seq_nuc = dico_nuc[fasta_name] | 89 seq_nuc = dico_nuc[fasta_name] |
90 | 90 |
91 if "?" in seq: | 91 if "?" in seq: |
92 seq = string.replace(seq, "?", "-") | 92 seq = str.replace(seq, "?", "-") |
93 if "?" in seq_nuc: | 93 if "?" in seq_nuc: |
94 seq_nuc = string.replace(seq_nuc, "?", "-") | 94 seq_nuc = str.replace(seq_nuc, "?", "-") |
95 | 95 |
96 ## 4.1 ## [FILTER 1] : Detect and Replace short internal indel symbole (= "-" as for other longer gaps) by a "?" | 96 ## 4.1 ## [FILTER 1] : Detect and Replace short internal indel symbole (= "-" as for other longer gaps) by a "?" |
97 ## aa | 97 ## aa |
98 list_sublist_pos = detect_short_indel(seq, MAX_LENGTH_SMALL_INDEL) ### DEF 9 ### | 98 list_sublist_pos = detect_short_indel(seq, MAX_LENGTH_SMALL_INDEL) ### DEF 9 ### |
99 for pos_short_indels in list_sublist_pos: | 99 for pos_short_indels in list_sublist_pos: |
105 for pos in pos_short_indels: | 105 for pos in pos_short_indels: |
106 seq_nuc = seq_nuc[:pos] + "?" + seq_nuc[pos+1:] | 106 seq_nuc = seq_nuc[:pos] + "?" + seq_nuc[pos+1:] |
107 | 107 |
108 ## 4.2 ## [FILTER 2] : Remove short bits of sequence (<"MIN_LENGTH_BIT_OF_SEQUENCE_aa") | 108 ## 4.2 ## [FILTER 2] : Remove short bits of sequence (<"MIN_LENGTH_BIT_OF_SEQUENCE_aa") |
109 LIST_sublist_aa=[] | 109 LIST_sublist_aa=[] |
110 S1 = string.split(seq, "-") | 110 S1 = str.split(seq, "-") |
111 for element in S1: | 111 for element in S1: |
112 if len(element) > MIN_LENGTH_BIT_OF_SEQUENCE_aa: | 112 if len(element) > MIN_LENGTH_BIT_OF_SEQUENCE_aa: |
113 LIST_sublist_aa.append(element) | 113 LIST_sublist_aa.append(element) |
114 | 114 |
115 ## 4.3 ## [FILTER 3] : Remove all the sequence if the total length of all subsequences < "MIN_LENGTH_ALL_aa") | 115 ## 4.3 ## [FILTER 3] : Remove all the sequence if the total length of all subsequences < "MIN_LENGTH_ALL_aa") |
124 seq_gap = "-" * len(seq) ## 4.4.1 ## generate a sequence with only gaps inside | 124 seq_gap = "-" * len(seq) ## 4.4.1 ## generate a sequence with only gaps inside |
125 seq_gap_nuc = "-" * len(seq_nuc) | 125 seq_gap_nuc = "-" * len(seq_nuc) |
126 | 126 |
127 for subsequence in LIST_sublist_aa: | 127 for subsequence in LIST_sublist_aa: |
128 ## aa | 128 ## aa |
129 START = string.find(seq, subsequence) | 129 START = str.find(seq, subsequence) |
130 END = START + len(subsequence) | 130 END = START + len(subsequence) |
131 seq_gap = seq_gap[:START] + seq[START:END] + seq_gap[END:] ## 4.4.2 ## and then replace the correponding gaps by coding subsequence in the sequence | 131 seq_gap = seq_gap[:START] + seq[START:END] + seq_gap[END:] ## 4.4.2 ## and then replace the correponding gaps by coding subsequence in the sequence |
132 ## nuc | 132 ## nuc |
133 START_nuc = START*3 | 133 START_nuc = START*3 |
134 END_nuc = END*3 | 134 END_nuc = END*3 |
135 seq_gap_nuc = seq_gap_nuc[:START_nuc] + seq_nuc[START_nuc:END_nuc] + seq_gap_nuc[END_nuc:] | 135 seq_gap_nuc = seq_gap_nuc[:START_nuc] + seq_nuc[START_nuc:END_nuc] + seq_gap_nuc[END_nuc:] |
136 | 136 |
137 ## 4.5 ## Save new sequence in bash if not empty | 137 ## 4.5 ## Save new sequence in bash if not empty |
138 seq_empty_test = string.replace(seq_gap, "-", "") | 138 seq_empty_test = str.replace(seq_gap, "-", "") |
139 if seq_empty_test != "": | 139 if seq_empty_test != "": |
140 new_bash_aa[fasta_name] = seq_gap | 140 new_bash_aa[fasta_name] = seq_gap |
141 | 141 |
142 seq_empty_test = string.replace(seq_gap_nuc, "-", "") | 142 seq_empty_test = str.replace(seq_gap_nuc, "-", "") |
143 if seq_empty_test != "": | 143 if seq_empty_test != "": |
144 new_bash_nuc[fasta_name] = seq_gap_nuc | 144 new_bash_nuc[fasta_name] = seq_gap_nuc |
145 | 145 |
146 # 4.6 ## Correct the nb of sequence in the output name, if necessary | 146 # 4.6 ## Correct the nb of sequence in the output name, if necessary |
147 n0 += 1 | 147 n0 += 1 |
178 else: | 178 else: |
179 e+=1 | 179 e+=1 |
180 | 180 |
181 ###Print | 181 ###Print |
182 if sys.argv[2] == "oui" : | 182 if sys.argv[2] == "oui" : |
183 print "\nIn locus with CDS considering Methionine : \n" | 183 print("\nIn locus with CDS considering Methionine : \n") |
184 else : | 184 else : |
185 print "\nIn locus with CDS regardless of the Methionine : \n" | 185 print("\nIn locus with CDS regardless of the Methionine : \n") |
186 | 186 |
187 print "\nTotal number of locus recorded = %d" %n0 | 187 print("\nTotal number of locus recorded = %d" %n0) |