Mercurial > repos > fubar > egapx_runner
diff nf/subworkflows/ncbi/shared/diamond/main.nf @ 0:d9c5c5b87fec draft
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author | fubar |
---|---|
date | Sat, 03 Aug 2024 11:16:53 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nf/subworkflows/ncbi/shared/diamond/main.nf Sat Aug 03 11:16:53 2024 +0000 @@ -0,0 +1,107 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl=2 + + +/* + *Execution of: + * /netmnt/vast01/gpi/regr/GPIPE_REGR1/system/2024-03-27.prod.build25780/bin/diamond + * -asn-cache /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/sequence_cache + * -blastp-args '--sam-query-len --comp-based-stats 0 --evalue 0.0001 --very-sensitive --max-hsps 3' + * -diamond-executable /netmnt/vast01/gpi/regr/GPIPE_REGR1/system/2024-03-27.prod.build25780/third-party/diamond/diamond + * -lds2 /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/prot_gnomon_prepare.8202002/out/LDS2 + * -ofmt seq-align-set + * -output-dir /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/out + * -output-manifest /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/out/align.mft + * -output-prefix hits + * ## query is gnomon-made proteins 'gnl|GNOMON|23016146.p' + * ## query-fmt is <String, `fasta', `seq-ids'> + * -query-fmt seq-ids + * -query-manifest /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/inp/query_ids.mft + * ## subject is swiss-prot ids 'sp|A0A009IHW8.1|ABTIR_ACIB9' + * -subject-fmt seq-ids + * -subject-manifest /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/inp/subject_ids.mft + * -work-area /netmnt/vast01/gpi/regr/GPIPE_REGR1/data00/Gavia_stellata/GP37025.85624/846757/diamond.8202022/tmp + + */ + +include {to_map; shellSplit } from '../../utilities' + + +swiss_prot_url='https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/reference_sets/swissprot.asnb.gz' +process fetch_swiss_prot_asn { + input: + output: + path "output/swissprot.asnb", emit: "swiss_prot_asn" + script: + """ + curl -O '$swiss_prot_url' + gunzip swissprot.asnb.gz + mkdir -p output + mv swissprot.asnb output/swissprot.asnb + """ + stub: + """ + mkdir -p output + touch output/swissprot.asnb + """ +} + +process get_swiss_prot_ids { + input: + path swiss_prot_asn + output: + path "output/swiss_prot_ids" + script: + """ + mkdir -p output + lds2_indexer -db lds -source . + sqlite3 ./lds "SELECT txt_id FROM seq_id WHERE orig=1 AND int_id IS NULL;" > output/swiss_prot_ids + """ + stub: + """ + mkdir -p output + touch output/swiss_prot_ids + """ +} + +process run_diamond_egap { + input: + path gnomon_prot_ids + path swiss_prot_ids + path gnomon_prot_asn, stageAs: 'indexed/*' + path swiss_prot_asn, stageAs: 'indexed/*' + val params + output: + path "output/*" + script: + // print(params) + """ + + ###diamond_bin=`which diamond` + #diamond_egap uses GP_HOME to build paths to both some gp apps, and third-party + #GP_HOME needs to be the directory that contains third-party, and the directory that contains bin/<gp apps> + diamond_bin=\${GP_HOME}/third-party/diamond/diamond + + mkdir -p ./asncache/ + + prime_cache -cache ./asncache/ -ifmt asnb-seq-entry -i ${gnomon_prot_asn} -oseq-ids /dev/null -split-sequences + prime_cache -cache ./asncache/ -ifmt asnb-seq-entry -i ${swiss_prot_asn} -oseq-ids /dev/null -split-sequences + + mkdir ./output + mkdir ./work + + echo ${params} + echo "${gnomon_prot_ids.join('\n')}" > query.mft + diamond_egap ${params} -asn-cache ./asncache/ -nogenbank -query-manifest query.mft -subject ${swiss_prot_ids} \ + -output-dir ./output/ -work-area ./work/ -diamond-executable \${diamond_bin} + rm -rf ./work + """ + + stub: + """ + mkdir -p output + touch output/diamond_output.asn + """ +} + +