annotate nf/subworkflows/ncbi/gnomon/chainer_wnode/main.nf @ 5:6effccc966d0 draft

planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
author fubar
date Sun, 04 Aug 2024 01:59:37 +0000
parents d9c5c5b87fec
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
1 #!/usr/bin/env nextflow
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
2 nextflow.enable.dsl=2
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
3
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
4
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
5 include { merge_params } from '../../utilities'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
6 include { run_align_sort } from '../../default/align_sort_sa/main.nf'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
7
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
8 split_count=16
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
9
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
10
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
11 workflow chainer_wnode {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
12 take:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
13 alignments
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
14 hmm_params
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
15 evidence_denylist
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
16 gap_fill_allowlist
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
17 scaffolds
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
18 trusted_genes
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
19 genome
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
20 proteins
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
21 parameters // Map : extra parameter and parameter update
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
22 main:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
23 String input_sorting = parameters.get('input_aligns_sort', '')
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
24 def sort_aligns = alignments
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
25 if (!input_sorting.contains("presorted")) {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
26 String align_sort_params = ""
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
27 if (input_sorting.contains("merge_only")) {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
28 align_sort_params = "-merge"
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
29 }
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
30 align_sort_params += " -ifmt seq-align -compression none -k subject,subject_start,-subject_end "
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
31 // print(align_sort_params)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
32 sort_aligns = run_align_sort([], [], alignments, align_sort_params).collect()
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
33 //sort_aligns = align_sort(alignments, align_sort_params)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
34 }
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
35 String submit_chainer_params = merge_params("-minimum-abut-margin 20 -separate-within-introns", parameters, 'submit_chainer')
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
36 String chainer_wnode_params = merge_params("", parameters, 'chainer_wnode')
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
37 String gpx_make_outputs_params = merge_params("-default-output-name chains -slices-for affinity -sort-by affinity", parameters, 'gpx_make_outputs')
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
38
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
39 def (jobs, lines_per_file) = generate_jobs(sort_aligns, submit_chainer_params)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
40 def collected = run_chainer(jobs.flatten(), sort_aligns, hmm_params, evidence_denylist, gap_fill_allowlist, scaffolds, trusted_genes, genome, proteins, lines_per_file, chainer_wnode_params) | collect
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
41
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
42 run_gpx_make_outputs(collected, gpx_make_outputs_params)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
43 emit:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
44 chains = run_gpx_make_outputs.out.chains
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
45 chains_slices = run_gpx_make_outputs.out.chains_slices
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
46 evidence = run_gpx_make_outputs.out.evidence
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
47 evidence_slices = run_gpx_make_outputs.out.evidence_slices
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
48 }
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
49
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
50
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
51 process generate_jobs {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
52 input:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
53 path sort_aligns
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
54 val params
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
55 output:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
56 path "job.*"
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
57 env lines_per_file
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
58 script:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
59 njobs=split_count
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
60 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
61 #!/usr/bin/env bash
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
62 # generate_jobs $sort_aligns $params -output chains -output-slices chains_slices -output-evidence evidence -output-evidence-slices evidence_slices
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
63 submit_chainer $params -asn $sort_aligns -o jobs
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
64 total_lines=\$(wc -l <jobs)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
65 (( lines_per_file = (total_lines + ${njobs} - 1) / ${njobs} ))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
66 echo total_lines=\$total_lines, lines_per_file=\$lines_per_file
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
67 ####split -l\$lines_per_file jobs job. -da 3
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
68 # Use round robin to distribute jobs across nodes more evenly
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
69 if [ \$total_lines -lt $njobs ]; then
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
70 effective_njobs=\$total_lines
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
71 else
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
72 effective_njobs=$njobs
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
73 fi
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
74 split -nr/\$effective_njobs jobs job. -da 3
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
75 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
76 stub:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
77 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
78 for i in {1..$split_count}; do
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
79 echo "<job query =\\\"lcl|${sort_aligns}:\${i}-\${i}\\\"></job>" >> jobs
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
80 done
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
81 split -nr/$split_count jobs job. -da 3
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
82 lines_per_file=10
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
83 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
84 }
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
85
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
86
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
87 process run_chainer {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
88 input:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
89 path job
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
90 path alignments
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
91 path hmm_params
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
92 path evidence_denylist
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
93 path gap_fill_allowlist
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
94 path scaffolds
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
95 path trusted_genes
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
96 path genome, stageAs: 'indexed/*'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
97 path proteins_asn, stageAs: 'indexed/*'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
98 val lines_per_file
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
99 val params
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
100 output:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
101 path "output/*"
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
102 script:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
103 job_num = job.toString().tokenize('.').last().toInteger()
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
104 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
105 echo "${evidence_denylist.join('\n')}" > evidence_denylist.mft
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
106 echo "${gap_fill_allowlist.join('\n')}" > gap_fill_allowlist.mft
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
107 echo "${scaffolds.join('\n')}" > scaffolds.mft
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
108 echo "${trusted_genes.join('\n')}" > trusted_genes.mft
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
109 # HACK: derive start_job_id from job file extension
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
110 filename=\$(basename -- "$job")
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
111 extension="\${filename##*.}"
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
112 (( start_job_id = ((10#\$extension) * $lines_per_file) + 1 ))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
113
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
114 # make the local LDS of the genomic and protein (if present) sequences
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
115 lds2_indexer -source indexed -db LDS2
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
116
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
117 # When running multiple jobs on the cluster there is a chance that
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
118 # several jobs will run on the same node and thus generate files
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
119 # with the same filename. We need to avoid that to be able to stage
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
120 # the output files for gpx_make_outputs. We add the job file numeric
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
121 # extension as a prefix to the filename.
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
122 mkdir interim
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
123 chainer_wnode $params -start-job-id \$start_job_id -workers 32 -input-jobs ${job} -O interim -nogenbank -lds2 LDS2 -evidence-denylist-manifest evidence_denylist.mft -gap-fill-allowlist-manifest gap_fill_allowlist.mft -param ${hmm_params} -scaffolds-manifest scaffolds.mft -trusted-genes-manifest trusted_genes.mft
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
124 mkdir output
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
125 for f in interim/*; do
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
126 if [ -f \$f ]; then
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
127 mv \$f output/\${extension}_\$(basename \$f)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
128 fi
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
129 done
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
130 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
131
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
132 stub:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
133 job_num = job.toString().tokenize('.').last().toInteger()
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
134 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
135 mkdir -p output
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
136 touch output/sample_chainer_wnode.${job_num}.out
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
137 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
138 }
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
139
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
140
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
141 process run_gpx_make_outputs {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
142 input:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
143 path files, stageAs: "gpx_inputs/*"
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
144 val params
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
145 output:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
146 path "output/chains.*.out.gz", emit: 'chains'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
147 path "output/chains.*.out.gz.slices", emit: 'chains_slices'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
148 path "output/evidence.*.out.gz", emit: 'evidence', optional: true
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
149 path "output/evidence.*.out.gz.slices", emit: 'evidence_slices', optional: true
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
150 script:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
151 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
152 ls -1 gpx_inputs/* > gpx_inputs.mft
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
153 mkdir -p output
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
154 gpx_make_outputs $params -input-manifest gpx_inputs.mft -output output/@.#.out.gz -output-manifest output/@.mft -slices-manifest output/@_slices.mft -num-partitions $split_count
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
155 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
156 stub:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
157 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
158 mkdir -p output
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
159 echo ${files}
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
160 for i in {1..$split_count}; do
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
161 touch output/chains.\$i.out.gz
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
162 touch output/chains.\$i.out.gz.slices
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
163 touch output/evidence.\$i.out.gz
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
164 touch output/evidence.\$i.out.gz.slices
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
165 done
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
166 """
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
167 }