Mercurial > repos > fubar > egapx_runner
annotate nf/subworkflows/ncbi/gnomon/chainer_wnode/main.nf @ 5:6effccc966d0 draft
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
author | fubar |
---|---|
date | Sun, 04 Aug 2024 01:59:37 +0000 |
parents | d9c5c5b87fec |
children |
rev | line source |
---|---|
0
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
1 #!/usr/bin/env nextflow |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
2 nextflow.enable.dsl=2 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
3 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
4 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
5 include { merge_params } from '../../utilities' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
6 include { run_align_sort } from '../../default/align_sort_sa/main.nf' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
7 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
8 split_count=16 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
9 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
10 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
11 workflow chainer_wnode { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
12 take: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
13 alignments |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
14 hmm_params |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
15 evidence_denylist |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
16 gap_fill_allowlist |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
17 scaffolds |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
18 trusted_genes |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
19 genome |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
20 proteins |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
21 parameters // Map : extra parameter and parameter update |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
22 main: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
23 String input_sorting = parameters.get('input_aligns_sort', '') |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
24 def sort_aligns = alignments |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
25 if (!input_sorting.contains("presorted")) { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
26 String align_sort_params = "" |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
27 if (input_sorting.contains("merge_only")) { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
28 align_sort_params = "-merge" |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
29 } |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
30 align_sort_params += " -ifmt seq-align -compression none -k subject,subject_start,-subject_end " |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
31 // print(align_sort_params) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
32 sort_aligns = run_align_sort([], [], alignments, align_sort_params).collect() |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
33 //sort_aligns = align_sort(alignments, align_sort_params) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
34 } |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
35 String submit_chainer_params = merge_params("-minimum-abut-margin 20 -separate-within-introns", parameters, 'submit_chainer') |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
36 String chainer_wnode_params = merge_params("", parameters, 'chainer_wnode') |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
37 String gpx_make_outputs_params = merge_params("-default-output-name chains -slices-for affinity -sort-by affinity", parameters, 'gpx_make_outputs') |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
38 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
39 def (jobs, lines_per_file) = generate_jobs(sort_aligns, submit_chainer_params) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
40 def collected = run_chainer(jobs.flatten(), sort_aligns, hmm_params, evidence_denylist, gap_fill_allowlist, scaffolds, trusted_genes, genome, proteins, lines_per_file, chainer_wnode_params) | collect |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
41 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
42 run_gpx_make_outputs(collected, gpx_make_outputs_params) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
43 emit: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
44 chains = run_gpx_make_outputs.out.chains |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
45 chains_slices = run_gpx_make_outputs.out.chains_slices |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
46 evidence = run_gpx_make_outputs.out.evidence |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
47 evidence_slices = run_gpx_make_outputs.out.evidence_slices |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
48 } |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
49 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
50 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
51 process generate_jobs { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
52 input: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
53 path sort_aligns |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
54 val params |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
55 output: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
56 path "job.*" |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
57 env lines_per_file |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
58 script: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
59 njobs=split_count |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
60 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
61 #!/usr/bin/env bash |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
62 # generate_jobs $sort_aligns $params -output chains -output-slices chains_slices -output-evidence evidence -output-evidence-slices evidence_slices |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
63 submit_chainer $params -asn $sort_aligns -o jobs |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
64 total_lines=\$(wc -l <jobs) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
65 (( lines_per_file = (total_lines + ${njobs} - 1) / ${njobs} )) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
66 echo total_lines=\$total_lines, lines_per_file=\$lines_per_file |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
67 ####split -l\$lines_per_file jobs job. -da 3 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
68 # Use round robin to distribute jobs across nodes more evenly |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
69 if [ \$total_lines -lt $njobs ]; then |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
70 effective_njobs=\$total_lines |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
71 else |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
72 effective_njobs=$njobs |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
73 fi |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
74 split -nr/\$effective_njobs jobs job. -da 3 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
75 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
76 stub: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
77 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
78 for i in {1..$split_count}; do |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
79 echo "<job query =\\\"lcl|${sort_aligns}:\${i}-\${i}\\\"></job>" >> jobs |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
80 done |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
81 split -nr/$split_count jobs job. -da 3 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
82 lines_per_file=10 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
83 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
84 } |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
85 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
86 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
87 process run_chainer { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
88 input: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
89 path job |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
90 path alignments |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
91 path hmm_params |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
92 path evidence_denylist |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
93 path gap_fill_allowlist |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
94 path scaffolds |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
95 path trusted_genes |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
96 path genome, stageAs: 'indexed/*' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
97 path proteins_asn, stageAs: 'indexed/*' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
98 val lines_per_file |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
99 val params |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
100 output: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
101 path "output/*" |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
102 script: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
103 job_num = job.toString().tokenize('.').last().toInteger() |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
104 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
105 echo "${evidence_denylist.join('\n')}" > evidence_denylist.mft |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
106 echo "${gap_fill_allowlist.join('\n')}" > gap_fill_allowlist.mft |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
107 echo "${scaffolds.join('\n')}" > scaffolds.mft |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
108 echo "${trusted_genes.join('\n')}" > trusted_genes.mft |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
109 # HACK: derive start_job_id from job file extension |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
110 filename=\$(basename -- "$job") |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
111 extension="\${filename##*.}" |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
112 (( start_job_id = ((10#\$extension) * $lines_per_file) + 1 )) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
113 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
114 # make the local LDS of the genomic and protein (if present) sequences |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
115 lds2_indexer -source indexed -db LDS2 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
116 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
117 # When running multiple jobs on the cluster there is a chance that |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
118 # several jobs will run on the same node and thus generate files |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
119 # with the same filename. We need to avoid that to be able to stage |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
120 # the output files for gpx_make_outputs. We add the job file numeric |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
121 # extension as a prefix to the filename. |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
122 mkdir interim |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
123 chainer_wnode $params -start-job-id \$start_job_id -workers 32 -input-jobs ${job} -O interim -nogenbank -lds2 LDS2 -evidence-denylist-manifest evidence_denylist.mft -gap-fill-allowlist-manifest gap_fill_allowlist.mft -param ${hmm_params} -scaffolds-manifest scaffolds.mft -trusted-genes-manifest trusted_genes.mft |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
124 mkdir output |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
125 for f in interim/*; do |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
126 if [ -f \$f ]; then |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
127 mv \$f output/\${extension}_\$(basename \$f) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
128 fi |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
129 done |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
130 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
131 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
132 stub: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
133 job_num = job.toString().tokenize('.').last().toInteger() |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
134 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
135 mkdir -p output |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
136 touch output/sample_chainer_wnode.${job_num}.out |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
137 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
138 } |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
139 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
140 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
141 process run_gpx_make_outputs { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
142 input: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
143 path files, stageAs: "gpx_inputs/*" |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
144 val params |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
145 output: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
146 path "output/chains.*.out.gz", emit: 'chains' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
147 path "output/chains.*.out.gz.slices", emit: 'chains_slices' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
148 path "output/evidence.*.out.gz", emit: 'evidence', optional: true |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
149 path "output/evidence.*.out.gz.slices", emit: 'evidence_slices', optional: true |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
150 script: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
151 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
152 ls -1 gpx_inputs/* > gpx_inputs.mft |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
153 mkdir -p output |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
154 gpx_make_outputs $params -input-manifest gpx_inputs.mft -output output/@.#.out.gz -output-manifest output/@.mft -slices-manifest output/@_slices.mft -num-partitions $split_count |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
155 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
156 stub: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
157 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
158 mkdir -p output |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
159 echo ${files} |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
160 for i in {1..$split_count}; do |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
161 touch output/chains.\$i.out.gz |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
162 touch output/chains.\$i.out.gz.slices |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
163 touch output/evidence.\$i.out.gz |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
164 touch output/evidence.\$i.out.gz.slices |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
165 done |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
166 """ |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
167 } |