Mercurial > repos > galaxy-australia > alphafold2
comparison scripts/validate_fasta.py @ 18:e4a053d67e24 draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
| author | galaxy-australia |
|---|---|
| date | Fri, 01 Sep 2023 00:09:46 +0000 |
| parents | f9eb041c518c |
| children | 2f7702fd0a4c |
comparison
equal
deleted
inserted
replaced
| 17:5b85006245f3 | 18:e4a053d67e24 |
|---|---|
| 4 import re | 4 import re |
| 5 import sys | 5 import sys |
| 6 from typing import List | 6 from typing import List |
| 7 | 7 |
| 8 MULTIMER_MAX_SEQUENCE_COUNT = 10 | 8 MULTIMER_MAX_SEQUENCE_COUNT = 10 |
| 9 STRIP_SEQUENCE_CHARS = ['\n', '\r', '\t', ' '] | |
| 9 | 10 |
| 10 | 11 |
| 11 class Fasta: | 12 class Fasta: |
| 12 def __init__(self, header_str: str, seq_str: str): | 13 def __init__(self, header_str: str, seq_str: str): |
| 13 self.header = header_str | 14 self.header = header_str |
| 64 if sequence: | 65 if sequence: |
| 65 # create generic header if not exists | 66 # create generic header if not exists |
| 66 if not header: | 67 if not header: |
| 67 fasta_count = len(self.fastas) | 68 fasta_count = len(self.fastas) |
| 68 header = f'>sequence_{fasta_count}' | 69 header = f'>sequence_{fasta_count}' |
| 70 | |
| 71 for char in STRIP_SEQUENCE_CHARS: | |
| 72 sequence = sequence.replace(char, '') | |
| 69 | 73 |
| 70 # Create new Fasta | 74 # Create new Fasta |
| 71 self.fastas.append(Fasta(header, sequence)) | 75 self.fastas.append(Fasta(header, sequence)) |
| 72 | 76 |
| 73 | 77 |
| 107 raise ValueError( | 111 raise ValueError( |
| 108 'Error encountered validating FASTA:\n' | 112 'Error encountered validating FASTA:\n' |
| 109 'Multimer mode requires multiple input sequence.' | 113 'Multimer mode requires multiple input sequence.' |
| 110 f' Only {fasta_count} sequences were detected in' | 114 f' Only {fasta_count} sequences were detected in' |
| 111 ' the provided file.') | 115 ' the provided file.') |
| 112 self.fasta_list = self.fasta_list | |
| 113 | 116 |
| 114 elif fasta_count > MULTIMER_MAX_SEQUENCE_COUNT: | 117 elif fasta_count > MULTIMER_MAX_SEQUENCE_COUNT: |
| 115 sys.stderr.write( | 118 sys.stderr.write( |
| 116 f'WARNING: detected {fasta_count} sequences but the' | 119 f'WARNING: detected {fasta_count} sequences but the' |
| 117 f' maximum allowed is {MULTIMER_MAX_SEQUENCE_COUNT}' | 120 f' maximum allowed is {MULTIMER_MAX_SEQUENCE_COUNT}' |
