Mercurial > repos > galaxy-australia > alphafold2
annotate validate_fasta.py @ 12:7fbec959cf2b draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 6fdbb269efd97b6f5c6ab40db4ab0b23459f884b
author | galaxy-australia |
---|---|
date | Fri, 16 Sep 2022 06:14:06 +0000 |
parents | 3bd420ec162d |
children | d00e15139065 |
rev | line source |
---|---|
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
1 """Validate input FASTA sequence.""" |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
2 |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
3 import re |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
4 import sys |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
5 import argparse |
8
ca90d17ff51b
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 03537aada92b5fff565ff48dd47c81462c5df47e"
galaxy-australia
parents:
7
diff
changeset
|
6 from typing import List |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
7 |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
8 MULTIMER_MAX_SEQUENCE_COUNT = 10 |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
9 |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
10 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
11 class Fasta: |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
12 def __init__(self, header_str: str, seq_str: str): |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
13 self.header = header_str |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
14 self.aa_seq = seq_str |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
15 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
16 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
17 class FastaLoader: |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
18 def __init__(self, fasta_path: str): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
19 """Initialize from FASTA file.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
20 self.fastas = [] |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
21 self.load(fasta_path) |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
22 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
23 def load(self, fasta_path: str): |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
24 """Load bare or FASTA formatted sequence.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
25 with open(fasta_path, 'r') as f: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
26 self.content = f.read() |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
27 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
28 if "__cn__" in self.content: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
29 # Pasted content with escaped characters |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
30 self.newline = '__cn__' |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
31 self.read_caret = '__gt__' |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
32 else: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
33 # Uploaded file with normal content |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
34 self.newline = '\n' |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
35 self.read_caret = '>' |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
36 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
37 self.lines = self.content.split(self.newline) |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
38 |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
39 if not self.lines[0].startswith(self.read_caret): |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
40 # Fasta is headless, load as single sequence |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
41 self.update_fastas( |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
42 '', ''.join(self.lines) |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
43 ) |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
44 |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
45 else: |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
46 header = None |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
47 sequence = None |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
48 for line in self.lines: |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
49 if line.startswith(self.read_caret): |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
50 if header: |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
51 self.update_fastas(header, sequence) |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
52 header = '>' + self.strip_header(line) |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
53 sequence = '' |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
54 else: |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
55 sequence += line.strip('\n ') |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
56 self.update_fastas(header, sequence) |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
57 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
58 def strip_header(self, line): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
59 """Strip characters escaped with underscores from pasted text.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
60 return re.sub(r'\_\_.{2}\_\_', '', line).strip('>') |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
61 |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
62 def update_fastas(self, header: str, sequence: str): |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
63 # if we have a sequence |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
64 if sequence: |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
65 # create generic header if not exists |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
66 if not header: |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
67 fasta_count = len(self.fastas) |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
68 header = f'>sequence_{fasta_count}' |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
69 |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
70 # Create new Fasta |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
71 self.fastas.append(Fasta(header, sequence)) |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
72 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
73 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
74 class FastaValidator: |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
75 def __init__( |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
76 self, |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
77 min_length=None, |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
78 max_length=None, |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
79 multiple=False): |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
80 self.multiple = multiple |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
81 self.min_length = min_length |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
82 self.max_length = max_length |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
83 self.iupac_characters = { |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
84 'A', 'B', 'C', 'D', 'E', 'F', 'G', |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
85 'H', 'I', 'K', 'L', 'M', 'N', 'P', |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
86 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
87 'Y', 'Z', '-' |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
88 } |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
89 |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
90 def validate(self, fasta_list: List[Fasta]): |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
91 """Perform FASTA validation.""" |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
92 self.fasta_list = fasta_list |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
93 self.validate_num_seqs() |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
94 self.validate_length() |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
95 self.validate_alphabet() |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
96 # not checking for 'X' nucleotides at the moment. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
97 # alphafold can throw an error if it doesn't like it. |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
98 # self.validate_x() |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
99 return self.fasta_list |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
100 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
101 def validate_num_seqs(self) -> None: |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
102 """Assert that only one sequence has been provided.""" |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
103 fasta_count = len(self.fasta_list) |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
104 |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
105 if self.multiple: |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
106 if fasta_count < 2: |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
107 raise ValueError( |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
108 'Error encountered validating FASTA:\n' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
109 'Multimer mode requires multiple input sequence.' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
110 f' Only {fasta_count} sequences were detected in' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
111 ' the provided file.') |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
112 self.fasta_list = self.fasta_list |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
113 |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
114 elif fasta_count > MULTIMER_MAX_SEQUENCE_COUNT: |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
115 sys.stderr.write( |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
116 f'WARNING: detected {fasta_count} sequences but the' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
117 f' maximum allowed is {MULTIMER_MAX_SEQUENCE_COUNT}' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
118 ' sequences. The last' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
119 f' {fasta_count - MULTIMER_MAX_SEQUENCE_COUNT} sequence(s)' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
120 ' have been discarded.\n') |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
121 self.fasta_list = self.fasta_list[:MULTIMER_MAX_SEQUENCE_COUNT] |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
122 else: |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
123 if fasta_count > 1: |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
124 sys.stderr.write( |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
125 'WARNING: More than 1 sequence detected.' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
126 ' Using first FASTA sequence as input.\n') |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
127 self.fasta_list = self.fasta_list[:1] |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
128 |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
129 elif len(self.fasta_list) == 0: |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
130 raise ValueError( |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
131 'Error encountered validating FASTA:\n' |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
132 ' no FASTA sequences detected in input file.') |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
133 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
134 def validate_length(self): |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
135 """Confirm whether sequence length is valid.""" |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
136 fasta = self.fasta_list[0] |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
137 if self.min_length: |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
138 if len(fasta.aa_seq) < self.min_length: |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
139 raise ValueError( |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
140 'Error encountered validating FASTA:\n Sequence too short' |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
141 f' ({len(fasta.aa_seq)}AA).' |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
142 f' Minimum length is {self.min_length}AA.') |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
143 if self.max_length: |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
144 if len(fasta.aa_seq) > self.max_length: |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
145 raise ValueError( |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
146 'Error encountered validating FASTA:\n' |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
147 f' Sequence too long ({len(fasta.aa_seq)}AA).' |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
148 f' Maximum length is {self.max_length}AA.') |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
149 |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
150 def validate_alphabet(self): |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
151 """Confirm whether the sequence conforms to IUPAC codes. |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
152 |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
153 If not, report the offending character and its position. |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
154 """ |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
155 fasta = self.fasta_list[0] |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
156 for i, char in enumerate(fasta.aa_seq.upper()): |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
157 if char not in self.iupac_characters: |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
158 raise ValueError( |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
159 'Error encountered validating FASTA:\n Invalid amino acid' |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
160 f' found at pos {i}: "{char}"') |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
161 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
162 def validate_x(self): |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
163 """Check for X bases.""" |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
164 fasta = self.fasta_list[0] |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
165 for i, char in enumerate(fasta.aa_seq.upper()): |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
166 if char == 'X': |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
167 raise ValueError( |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
168 'Error encountered validating FASTA:\n Unsupported AA code' |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
169 f' "X" found at pos {i}') |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
170 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
171 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
172 class FastaWriter: |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
173 def __init__(self) -> None: |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
174 self.line_wrap = 60 |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
175 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
176 def write(self, fasta: Fasta): |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
177 header = fasta.header |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
178 seq = self.format_sequence(fasta.aa_seq) |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
179 sys.stdout.write(header + '\n') |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
180 sys.stdout.write(seq) |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
181 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
182 def format_sequence(self, aa_seq: str): |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
183 formatted_seq = '' |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
184 for i in range(0, len(aa_seq), self.line_wrap): |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
185 formatted_seq += aa_seq[i: i + self.line_wrap] + '\n' |
8
ca90d17ff51b
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 03537aada92b5fff565ff48dd47c81462c5df47e"
galaxy-australia
parents:
7
diff
changeset
|
186 return formatted_seq.upper() |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
187 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
188 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
189 def main(): |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
190 # load fasta file |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
191 try: |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
192 args = parse_args() |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
193 fas = FastaLoader(args.input) |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
194 |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
195 # validate |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
196 fv = FastaValidator( |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
197 min_length=args.min_length, |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
198 max_length=args.max_length, |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
199 multiple=args.multimer, |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
200 ) |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
201 clean_fastas = fv.validate(fas.fastas) |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
202 |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
203 # write clean data |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
204 fw = FastaWriter() |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
205 for fas in clean_fastas: |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
206 fw.write(fas) |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
207 |
7
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
208 except ValueError as exc: |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
209 sys.stderr.write(f"{exc}\n\n") |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
210 raise exc |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
211 |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
212 except Exception as exc: |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
213 sys.stderr.write( |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
214 "Input error: FASTA input is invalid. Please check your input.\n\n" |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
215 ) |
eb085b3dbaf8
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
galaxy-australia
parents:
6
diff
changeset
|
216 raise exc |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
217 |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
218 |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
219 def parse_args() -> argparse.Namespace: |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
220 parser = argparse.ArgumentParser() |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
221 parser.add_argument( |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
222 "input", |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
223 help="input fasta file", |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
224 type=str |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
225 ) |
6
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
226 parser.add_argument( |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
227 "--min_length", |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
228 dest='min_length', |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
229 help="Minimum length of input protein sequence (AA)", |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
230 default=None, |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
231 type=int, |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
232 ) |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
233 parser.add_argument( |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
234 "--max_length", |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
235 dest='max_length', |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
236 help="Maximum length of input protein sequence (AA)", |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
237 default=None, |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
238 type=int, |
04e95886cf24
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 724a7a389c878dded1c0332f3b6e507e0c4cd52a-dirty"
galaxy-australia
parents:
1
diff
changeset
|
239 ) |
9
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
240 parser.add_argument( |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
241 "--multimer", |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
242 action='store_true', |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
243 help="Require multiple input sequences", |
3bd420ec162d
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 7726c3cba165bdc8fc6366ec0ce6596e55657468
galaxy-australia
parents:
8
diff
changeset
|
244 ) |
0
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
245 return parser.parse_args() |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
246 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
247 |
7ae9d78b06f5
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
galaxy-australia
parents:
diff
changeset
|
248 if __name__ == '__main__': |
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
0
diff
changeset
|
249 main() |