# HG changeset patch # User galaxy-australia # Date 1650328769 0 # Node ID eb085b3dbaf873cfae919e218b923ebebe8a4ae4 # Parent 04e95886cf24283e9269265a3ff5093605209205 "planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341" diff -r 04e95886cf24 -r eb085b3dbaf8 README.rst --- a/README.rst Mon Apr 04 01:46:22 2022 +0000 +++ b/README.rst Tue Apr 19 00:39:29 2022 +0000 @@ -157,6 +157,18 @@ "$job_directory:ro,$tool_directory:ro,$job_directory/outputs:rw,$working_directory:rw,/data/alphafold_databases:/data:ro" +CUSTOM PARAMETERS +~~~~~~~~~~~~~~~~~ + +A few parameters can be customized with the use of environment variables set in the job destination: + +- ``ALPHAFOLD_DB``: path to the reference database root (default ``/data``) +- ``ALPHAFOLD_AA_LENGTH_MIN``: minimum accepted sequence length (default ``30``) +- ``ALPHAFOLD_AA_LENGTH_MAX``: maximum accepted sequence length (default ``2000``) + +For the last two, these could be set to ``0`` and ``50000`` respectively to remove the valiation entirely. + + Closing ~~~~~~~ diff -r 04e95886cf24 -r eb085b3dbaf8 alphafold.xml --- a/alphafold.xml Mon Apr 04 01:46:22 2022 +0000 +++ b/alphafold.xml Tue Apr 19 00:39:29 2022 +0000 @@ -195,17 +195,6 @@ Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation. To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD `_ and `SeeSAR `_, but many `free and open-source options `_ are available such as `AutoDock `_ and `SwissDock `_. - *Expected run times* - - .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold_runtime_graph.png?raw=true - :height: 520 - :alt: Run time graph - - | - | In general, we observe a quadratic relationship between sequence length and time to fold. - | Once your job begins, a sequence of 50aa will take approximately 1hr to complete, while a sequence of 2000aa will take about 18hrs. - | - **Input** *Amino acid sequence* diff -r 04e95886cf24 -r eb085b3dbaf8 validate_fasta.py --- a/validate_fasta.py Mon Apr 04 01:46:22 2022 +0000 +++ b/validate_fasta.py Tue Apr 19 00:39:29 2022 +0000 @@ -86,7 +86,7 @@ } def validate(self): - """performs fasta validation""" + """Perform FASTA validation.""" self.validate_num_seqs() self.validate_length() self.validate_alphabet() @@ -98,41 +98,41 @@ def validate_num_seqs(self) -> None: """Assert that only one sequence has been provided.""" if len(self.fasta_list) > 1: - raise Exception( - 'Error encountered validating fasta:' + raise ValueError( + 'Error encountered validating FASTA:\n' f' More than 1 sequence detected ({len(self.fasta_list)}).' - ' Please use single fasta sequence as input.') + ' Please use single FASTA sequence as input.') elif len(self.fasta_list) == 0: - raise Exception( - 'Error encountered validating fasta:' - ' input file has no fasta sequences') + raise ValueError( + 'Error encountered validating FASTA:\n' + ' input file has no FASTA sequences') def validate_length(self): """Confirm whether sequence length is valid.""" fasta = self.fasta_list[0] if self.min_length: if len(fasta.aa_seq) < self.min_length: - raise Exception( - 'Error encountered validating fasta: Sequence too short' + raise ValueError( + 'Error encountered validating FASTA:\n Sequence too short' f' ({len(fasta.aa_seq)}AA).' f' Minimum length is {self.min_length}AA.') if self.max_length: if len(fasta.aa_seq) > self.max_length: - raise Exception( - 'Error encountered validating fasta:' + raise ValueError( + 'Error encountered validating FASTA:\n' f' Sequence too long ({len(fasta.aa_seq)}AA).' f' Maximum length is {self.max_length}AA.') def validate_alphabet(self): - """ - Confirm whether the sequence conforms to IUPAC codes. + """Confirm whether the sequence conforms to IUPAC codes. + If not, report the offending character and its position. """ fasta = self.fasta_list[0] for i, char in enumerate(fasta.aa_seq.upper()): if char not in self.iupac_characters: - raise Exception( - 'Error encountered validating fasta: Invalid amino acid' + raise ValueError( + 'Error encountered validating FASTA:\n Invalid amino acid' f' found at pos {i}: "{char}"') def validate_x(self): @@ -140,8 +140,8 @@ fasta = self.fasta_list[0] for i, char in enumerate(fasta.aa_seq.upper()): if char == 'X': - raise Exception( - 'Error encountered validating fasta: Unsupported AA code' + raise ValueError( + 'Error encountered validating FASTA:\n Unsupported AA code' f' "X" found at pos {i}') @@ -164,20 +164,31 @@ def main(): # load fasta file - args = parse_args() - fas = FastaLoader(args.input) + try: + args = parse_args() + fas = FastaLoader(args.input) + + # validate + fv = FastaValidator( + fas.fastas, + min_length=args.min_length, + max_length=args.max_length, + ) + fv.validate() - # validate - fv = FastaValidator( - fas.fastas, - min_length=args.min_length, - max_length=args.max_length, - ) - fv.validate() + # write cleaned version + fw = FastaWriter() + fw.write(fas.fastas[0]) - # write cleaned version - fw = FastaWriter() - fw.write(fas.fastas[0]) + except ValueError as exc: + sys.stderr.write(f"{exc}\n\n") + raise exc + + except Exception as exc: + sys.stderr.write( + "Input error: FASTA input is invalid. Please check your input.\n\n" + ) + raise exc def parse_args() -> argparse.Namespace: