Mercurial > repos > galaxy-australia > alphafold2
comparison validate_fasta.py @ 7:eb085b3dbaf8 draft
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
author | galaxy-australia |
---|---|
date | Tue, 19 Apr 2022 00:39:29 +0000 |
parents | 04e95886cf24 |
children | ca90d17ff51b |
comparison
equal
deleted
inserted
replaced
6:04e95886cf24 | 7:eb085b3dbaf8 |
---|---|
84 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 84 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
85 'Y', 'Z', '-' | 85 'Y', 'Z', '-' |
86 } | 86 } |
87 | 87 |
88 def validate(self): | 88 def validate(self): |
89 """performs fasta validation""" | 89 """Perform FASTA validation.""" |
90 self.validate_num_seqs() | 90 self.validate_num_seqs() |
91 self.validate_length() | 91 self.validate_length() |
92 self.validate_alphabet() | 92 self.validate_alphabet() |
93 | 93 |
94 # not checking for 'X' nucleotides at the moment. | 94 # not checking for 'X' nucleotides at the moment. |
96 # self.validate_x() | 96 # self.validate_x() |
97 | 97 |
98 def validate_num_seqs(self) -> None: | 98 def validate_num_seqs(self) -> None: |
99 """Assert that only one sequence has been provided.""" | 99 """Assert that only one sequence has been provided.""" |
100 if len(self.fasta_list) > 1: | 100 if len(self.fasta_list) > 1: |
101 raise Exception( | 101 raise ValueError( |
102 'Error encountered validating fasta:' | 102 'Error encountered validating FASTA:\n' |
103 f' More than 1 sequence detected ({len(self.fasta_list)}).' | 103 f' More than 1 sequence detected ({len(self.fasta_list)}).' |
104 ' Please use single fasta sequence as input.') | 104 ' Please use single FASTA sequence as input.') |
105 elif len(self.fasta_list) == 0: | 105 elif len(self.fasta_list) == 0: |
106 raise Exception( | 106 raise ValueError( |
107 'Error encountered validating fasta:' | 107 'Error encountered validating FASTA:\n' |
108 ' input file has no fasta sequences') | 108 ' input file has no FASTA sequences') |
109 | 109 |
110 def validate_length(self): | 110 def validate_length(self): |
111 """Confirm whether sequence length is valid.""" | 111 """Confirm whether sequence length is valid.""" |
112 fasta = self.fasta_list[0] | 112 fasta = self.fasta_list[0] |
113 if self.min_length: | 113 if self.min_length: |
114 if len(fasta.aa_seq) < self.min_length: | 114 if len(fasta.aa_seq) < self.min_length: |
115 raise Exception( | 115 raise ValueError( |
116 'Error encountered validating fasta: Sequence too short' | 116 'Error encountered validating FASTA:\n Sequence too short' |
117 f' ({len(fasta.aa_seq)}AA).' | 117 f' ({len(fasta.aa_seq)}AA).' |
118 f' Minimum length is {self.min_length}AA.') | 118 f' Minimum length is {self.min_length}AA.') |
119 if self.max_length: | 119 if self.max_length: |
120 if len(fasta.aa_seq) > self.max_length: | 120 if len(fasta.aa_seq) > self.max_length: |
121 raise Exception( | 121 raise ValueError( |
122 'Error encountered validating fasta:' | 122 'Error encountered validating FASTA:\n' |
123 f' Sequence too long ({len(fasta.aa_seq)}AA).' | 123 f' Sequence too long ({len(fasta.aa_seq)}AA).' |
124 f' Maximum length is {self.max_length}AA.') | 124 f' Maximum length is {self.max_length}AA.') |
125 | 125 |
126 def validate_alphabet(self): | 126 def validate_alphabet(self): |
127 """ | 127 """Confirm whether the sequence conforms to IUPAC codes. |
128 Confirm whether the sequence conforms to IUPAC codes. | 128 |
129 If not, report the offending character and its position. | 129 If not, report the offending character and its position. |
130 """ | 130 """ |
131 fasta = self.fasta_list[0] | 131 fasta = self.fasta_list[0] |
132 for i, char in enumerate(fasta.aa_seq.upper()): | 132 for i, char in enumerate(fasta.aa_seq.upper()): |
133 if char not in self.iupac_characters: | 133 if char not in self.iupac_characters: |
134 raise Exception( | 134 raise ValueError( |
135 'Error encountered validating fasta: Invalid amino acid' | 135 'Error encountered validating FASTA:\n Invalid amino acid' |
136 f' found at pos {i}: "{char}"') | 136 f' found at pos {i}: "{char}"') |
137 | 137 |
138 def validate_x(self): | 138 def validate_x(self): |
139 """Check for X bases.""" | 139 """Check for X bases.""" |
140 fasta = self.fasta_list[0] | 140 fasta = self.fasta_list[0] |
141 for i, char in enumerate(fasta.aa_seq.upper()): | 141 for i, char in enumerate(fasta.aa_seq.upper()): |
142 if char == 'X': | 142 if char == 'X': |
143 raise Exception( | 143 raise ValueError( |
144 'Error encountered validating fasta: Unsupported AA code' | 144 'Error encountered validating FASTA:\n Unsupported AA code' |
145 f' "X" found at pos {i}') | 145 f' "X" found at pos {i}') |
146 | 146 |
147 | 147 |
148 class FastaWriter: | 148 class FastaWriter: |
149 def __init__(self) -> None: | 149 def __init__(self) -> None: |
162 return formatted_seq | 162 return formatted_seq |
163 | 163 |
164 | 164 |
165 def main(): | 165 def main(): |
166 # load fasta file | 166 # load fasta file |
167 args = parse_args() | 167 try: |
168 fas = FastaLoader(args.input) | 168 args = parse_args() |
169 | 169 fas = FastaLoader(args.input) |
170 # validate | 170 |
171 fv = FastaValidator( | 171 # validate |
172 fas.fastas, | 172 fv = FastaValidator( |
173 min_length=args.min_length, | 173 fas.fastas, |
174 max_length=args.max_length, | 174 min_length=args.min_length, |
175 ) | 175 max_length=args.max_length, |
176 fv.validate() | 176 ) |
177 | 177 fv.validate() |
178 # write cleaned version | 178 |
179 fw = FastaWriter() | 179 # write cleaned version |
180 fw.write(fas.fastas[0]) | 180 fw = FastaWriter() |
181 fw.write(fas.fastas[0]) | |
182 | |
183 except ValueError as exc: | |
184 sys.stderr.write(f"{exc}\n\n") | |
185 raise exc | |
186 | |
187 except Exception as exc: | |
188 sys.stderr.write( | |
189 "Input error: FASTA input is invalid. Please check your input.\n\n" | |
190 ) | |
191 raise exc | |
181 | 192 |
182 | 193 |
183 def parse_args() -> argparse.Namespace: | 194 def parse_args() -> argparse.Namespace: |
184 parser = argparse.ArgumentParser() | 195 parser = argparse.ArgumentParser() |
185 parser.add_argument( | 196 parser.add_argument( |