Mercurial > repos > fubar > egapx_runner
comparison nf/subworkflows/ncbi/shared/diamond/main.nf @ 0:d9c5c5b87fec draft
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author | fubar |
---|---|
date | Sat, 03 Aug 2024 11:16:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d9c5c5b87fec |
---|---|
1 #!/usr/bin/env nextflow | |
2 nextflow.enable.dsl=2 | |
3 | |
4 | |
5 /* | |
6 *Execution of: | |
7 * /netmnt/vast01/gpi/regr/GPIPE_REGR1/system/2024-03-27.prod.build25780/bin/diamond | |
8 * -asn-cache /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/sequence_cache | |
9 * -blastp-args '--sam-query-len --comp-based-stats 0 --evalue 0.0001 --very-sensitive --max-hsps 3' | |
10 * -diamond-executable /netmnt/vast01/gpi/regr/GPIPE_REGR1/system/2024-03-27.prod.build25780/third-party/diamond/diamond | |
11 * -lds2 /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/prot_gnomon_prepare.8202002/out/LDS2 | |
12 * -ofmt seq-align-set | |
13 * -output-dir /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/out | |
14 * -output-manifest /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/out/align.mft | |
15 * -output-prefix hits | |
16 * ## query is gnomon-made proteins 'gnl|GNOMON|23016146.p' | |
17 * ## query-fmt is <String, `fasta', `seq-ids'> | |
18 * -query-fmt seq-ids | |
19 * -query-manifest /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/inp/query_ids.mft | |
20 * ## subject is swiss-prot ids 'sp|A0A009IHW8.1|ABTIR_ACIB9' | |
21 * -subject-fmt seq-ids | |
22 * -subject-manifest /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/inp/subject_ids.mft | |
23 * -work-area /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/tmp | |
24 | |
25 */ | |
26 | |
27 include {to_map; shellSplit } from '../../utilities' | |
28 | |
29 | |
30 swiss_prot_url='https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/reference_sets/swissprot.asnb.gz' | |
31 process fetch_swiss_prot_asn { | |
32 input: | |
33 output: | |
34 path "output/swissprot.asnb", emit: "swiss_prot_asn" | |
35 script: | |
36 """ | |
37 curl -O '$swiss_prot_url' | |
38 gunzip swissprot.asnb.gz | |
39 mkdir -p output | |
40 mv swissprot.asnb output/swissprot.asnb | |
41 """ | |
42 stub: | |
43 """ | |
44 mkdir -p output | |
45 touch output/swissprot.asnb | |
46 """ | |
47 } | |
48 | |
49 process get_swiss_prot_ids { | |
50 input: | |
51 path swiss_prot_asn | |
52 output: | |
53 path "output/swiss_prot_ids" | |
54 script: | |
55 """ | |
56 mkdir -p output | |
57 lds2_indexer -db lds -source . | |
58 sqlite3 ./lds "SELECT txt_id FROM seq_id WHERE orig=1 AND int_id IS NULL;" > output/swiss_prot_ids | |
59 """ | |
60 stub: | |
61 """ | |
62 mkdir -p output | |
63 touch output/swiss_prot_ids | |
64 """ | |
65 } | |
66 | |
67 process run_diamond_egap { | |
68 input: | |
69 path gnomon_prot_ids | |
70 path swiss_prot_ids | |
71 path gnomon_prot_asn, stageAs: 'indexed/*' | |
72 path swiss_prot_asn, stageAs: 'indexed/*' | |
73 val params | |
74 output: | |
75 path "output/*" | |
76 script: | |
77 // print(params) | |
78 """ | |
79 | |
80 ###diamond_bin=`which diamond` | |
81 #diamond_egap uses GP_HOME to build paths to both some gp apps, and third-party | |
82 #GP_HOME needs to be the directory that contains third-party, and the directory that contains bin/<gp apps> | |
83 diamond_bin=\${GP_HOME}/third-party/diamond/diamond | |
84 | |
85 mkdir -p ./asncache/ | |
86 | |
87 prime_cache -cache ./asncache/ -ifmt asnb-seq-entry -i ${gnomon_prot_asn} -oseq-ids /dev/null -split-sequences | |
88 prime_cache -cache ./asncache/ -ifmt asnb-seq-entry -i ${swiss_prot_asn} -oseq-ids /dev/null -split-sequences | |
89 | |
90 mkdir ./output | |
91 mkdir ./work | |
92 | |
93 echo ${params} | |
94 echo "${gnomon_prot_ids.join('\n')}" > query.mft | |
95 diamond_egap ${params} -asn-cache ./asncache/ -nogenbank -query-manifest query.mft -subject ${swiss_prot_ids} \ | |
96 -output-dir ./output/ -work-area ./work/ -diamond-executable \${diamond_bin} | |
97 rm -rf ./work | |
98 """ | |
99 | |
100 stub: | |
101 """ | |
102 mkdir -p output | |
103 touch output/diamond_output.asn | |
104 """ | |
105 } | |
106 | |
107 |