Mercurial > repos > fubar > egapx_runner
annotate nf/subworkflows/ncbi/gnomon/main.nf @ 8:1680e72e27be draft default tip
planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
author | fubar |
---|---|
date | Mon, 05 Aug 2024 03:56:41 +0000 |
parents | d9c5c5b87fec |
children |
rev | line source |
---|---|
0
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
1 #!/usr/bin/env nextflow |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
2 // gnomon plane workflow |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
3 // route data to tasks |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
4 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
5 nextflow.enable.dsl=2 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
6 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
7 include { chainer_wnode as chainer } from './chainer_wnode/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
8 include { gnomon_wnode } from './gnomon_wnode/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
9 include { prot_gnomon_prepare } from './prot_gnomon_prepare/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
10 include { gnomon_training_iterations } from '../gnomon-training-iteration/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
11 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
12 include { diamond_worker} from './diamond/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
13 include { best_protein_hits } from './protein_filter/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
14 include { gnomon_biotype} from './gnomon_biotype/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
15 include { fetch_swiss_prot_asn; get_swiss_prot_ids } from '../shared/diamond/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
16 include { diamond_orthology } from '../orthology/diamond_orthology/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
17 include { locus_link } from './locus_link/main' |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
18 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
19 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
20 params.intermediate = false |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
21 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
22 workflow gnomon_plane { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
23 take: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
24 genome_asn |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
25 scaffolds |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
26 gencoll_asn |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
27 proteins_asn |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
28 alignments // list of all relevent input alignments |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
29 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
30 // Alternative parameters, one of them should be set |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
31 // tax_id - NCBI tax id of the closest taxon to the genome |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
32 // hmm_params - HMM parameters |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
33 tax_id // NCBI tax id of the closest taxon to the genome |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
34 hmm_params // HMM parameters |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
35 hmm_taxid // NCBI tax id of the taxon of the HMM |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
36 // |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
37 softmask // softmask for GNOMON, optional |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
38 max_intron // max intron length |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
39 task_params // task parameters for every task |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
40 main: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
41 // GNOMON |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
42 def effective_hmm |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
43 if (tax_id == hmm_taxid) { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
44 effective_hmm = hmm_params |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
45 } else { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
46 effective_hmm = gnomon_training_iterations(hmm_params, genome_asn, proteins_asn, alignments, /* evidence_denylist */ [], /* gap_fill_allowlist */ [], |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
47 /* trusted_genes */ [], scaffolds, softmask, |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
48 softmask, scaffolds, |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
49 max_intron, |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
50 task_params) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
51 } |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
52 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
53 chainer(alignments, effective_hmm, /* evidence_denylist */ [], /* gap_fill_allowlist */ [], scaffolds, /* trusted_genes */ [], genome_asn, proteins_asn, task_params.get('chainer', [:])) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
54 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
55 def gn_models = [] |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
56 gnomon_wnode(scaffolds, chainer.out.chains, chainer.out.chains_slices, effective_hmm, [], softmask, genome_asn, proteins_asn, task_params.get('gnomon', [:])) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
57 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
58 emit: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
59 gnomon_models = gnomon_wnode.out.outputs |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
60 // trained_hmm = effective_hmm |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
61 } |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
62 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
63 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
64 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
65 workflow post_gnomon_plane { |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
66 take: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
67 gnomon_models |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
68 gencoll_asn |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
69 orthologs |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
70 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
71 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
72 // Alternative parameters, one of them should be set |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
73 // tax_id - NCBI tax id of the closest taxon to the genome |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
74 // hmm_params - HMM parameters |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
75 tax_id // NCBI tax id of the closest taxon to the genome |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
76 task_params // task parameters for every task |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
77 main: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
78 // Post GNOMON |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
79 // might come its own plane |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
80 def swiss_prot_asn = fetch_swiss_prot_asn() |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
81 def swiss_prot_ids = get_swiss_prot_ids(swiss_prot_asn) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
82 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
83 prot_gnomon_prepare(gnomon_models, task_params.get('prot_gnomon_prepare', [:])) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
84 // Seed Protein-Model Hits |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
85 diamond_worker(prot_gnomon_prepare.out.prot_ids, swiss_prot_ids, gnomon_models, swiss_prot_asn, task_params.get('diamond', [:])) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
86 best_protein_hits(gnomon_models, swiss_prot_asn, diamond_worker.out.alignments , task_params.get('protein_filter', [:])) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
87 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
88 gnomon_biotype([] /*models*/,/*splices_file -- constant*/ [], /*denylist -- constant*/ [], gencoll_asn, swiss_prot_asn, gnomon_models, diamond_worker.out.alignments,task_params.get('gnomon_biotype', [:])) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
89 locus_link(/*best_refseq_prot_hit -- best protein hits from refseq plane*/ [], orthologs, [] /*annot_builder.out.annot_files*/, |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
90 gencoll_asn, gnomon_models, best_protein_hits.out.alignments , /*track_loci*/ [], /*comparisons*/ [], /*curr_prev_compare*/ [], |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
91 gnomon_biotype.out.biotypes, /*lxr_data*/ [], swiss_prot_asn, /*name_from_ortholog */ [], task_params.get('locus_link', [:])) |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
92 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
93 |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
94 emit: |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
95 locus = locus_link.out.locus |
d9c5c5b87fec
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff
changeset
|
96 } |