comparison scripts/download_uniprot.sh @ 0:7ae9d78b06f5 draft

"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit 7b79778448363aa8c9b14604337e81009e461bd2-dirty"
author galaxy-australia
date Fri, 28 Jan 2022 04:56:29 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7ae9d78b06f5
1 #!/bin/bash
2 #
3 # Copyright 2021 DeepMind Technologies Limited
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 # Downloads, unzips and merges the SwissProt and TrEMBL databases for
18 # AlphaFold-Multimer.
19 #
20 # Usage: bash download_uniprot.sh /path/to/download/directory
21 set -e
22
23 if [[ $# -eq 0 ]]; then
24 echo "Error: download directory must be provided as an input argument."
25 exit 1
26 fi
27
28 if ! command -v aria2c &> /dev/null ; then
29 echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
30 exit 1
31 fi
32
33 DOWNLOAD_DIR="$1"
34 ROOT_DIR="${DOWNLOAD_DIR}/uniprot"
35
36 TREMBL_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz"
37 TREMBL_BASENAME=$(basename "${TREMBL_SOURCE_URL}")
38 TREMBL_UNZIPPED_BASENAME="${TREMBL_BASENAME%.gz}"
39
40 SPROT_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz"
41 SPROT_BASENAME=$(basename "${SPROT_SOURCE_URL}")
42 SPROT_UNZIPPED_BASENAME="${SPROT_BASENAME%.gz}"
43
44 mkdir --parents "${ROOT_DIR}"
45 aria2c "${TREMBL_SOURCE_URL}" --dir="${ROOT_DIR}"
46 aria2c "${SPROT_SOURCE_URL}" --dir="${ROOT_DIR}"
47 pushd "${ROOT_DIR}"
48 gunzip "${ROOT_DIR}/${TREMBL_BASENAME}"
49 gunzip "${ROOT_DIR}/${SPROT_BASENAME}"
50
51 # Concatenate TrEMBL and SwissProt, rename to uniprot and clean up.
52 cat "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}" >> "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}"
53 mv "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}" "${ROOT_DIR}/uniprot.fasta"
54 rm "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}"
55 popd