annotate nf/subworkflows/ncbi/gnomon/main.nf @ 8:1680e72e27be draft default tip

planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
author fubar
date Mon, 05 Aug 2024 03:56:41 +0000
parents d9c5c5b87fec
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
1 #!/usr/bin/env nextflow
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
2 // gnomon plane workflow
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
3 // route data to tasks
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
4
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
5 nextflow.enable.dsl=2
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
6
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
7 include { chainer_wnode as chainer } from './chainer_wnode/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
8 include { gnomon_wnode } from './gnomon_wnode/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
9 include { prot_gnomon_prepare } from './prot_gnomon_prepare/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
10 include { gnomon_training_iterations } from '../gnomon-training-iteration/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
11
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
12 include { diamond_worker} from './diamond/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
13 include { best_protein_hits } from './protein_filter/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
14 include { gnomon_biotype} from './gnomon_biotype/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
15 include { fetch_swiss_prot_asn; get_swiss_prot_ids } from '../shared/diamond/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
16 include { diamond_orthology } from '../orthology/diamond_orthology/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
17 include { locus_link } from './locus_link/main'
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
18
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
19
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
20 params.intermediate = false
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
21
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
22 workflow gnomon_plane {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
23 take:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
24 genome_asn
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
25 scaffolds
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
26 gencoll_asn
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
27 proteins_asn
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
28 alignments // list of all relevent input alignments
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
29
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
30 // Alternative parameters, one of them should be set
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
31 // tax_id - NCBI tax id of the closest taxon to the genome
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
32 // hmm_params - HMM parameters
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
33 tax_id // NCBI tax id of the closest taxon to the genome
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
34 hmm_params // HMM parameters
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
35 hmm_taxid // NCBI tax id of the taxon of the HMM
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
36 //
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
37 softmask // softmask for GNOMON, optional
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
38 max_intron // max intron length
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
39 task_params // task parameters for every task
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
40 main:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
41 // GNOMON
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
42 def effective_hmm
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
43 if (tax_id == hmm_taxid) {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
44 effective_hmm = hmm_params
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
45 } else {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
46 effective_hmm = gnomon_training_iterations(hmm_params, genome_asn, proteins_asn, alignments, /* evidence_denylist */ [], /* gap_fill_allowlist */ [],
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
47 /* trusted_genes */ [], scaffolds, softmask,
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
48 softmask, scaffolds,
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
49 max_intron,
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
50 task_params)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
51 }
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
52
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
53 chainer(alignments, effective_hmm, /* evidence_denylist */ [], /* gap_fill_allowlist */ [], scaffolds, /* trusted_genes */ [], genome_asn, proteins_asn, task_params.get('chainer', [:]))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
54
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
55 def gn_models = []
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
56 gnomon_wnode(scaffolds, chainer.out.chains, chainer.out.chains_slices, effective_hmm, [], softmask, genome_asn, proteins_asn, task_params.get('gnomon', [:]))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
57
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
58 emit:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
59 gnomon_models = gnomon_wnode.out.outputs
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
60 // trained_hmm = effective_hmm
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
61 }
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
62
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
63
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
64
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
65 workflow post_gnomon_plane {
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
66 take:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
67 gnomon_models
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
68 gencoll_asn
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
69 orthologs
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
70
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
71
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
72 // Alternative parameters, one of them should be set
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
73 // tax_id - NCBI tax id of the closest taxon to the genome
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
74 // hmm_params - HMM parameters
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
75 tax_id // NCBI tax id of the closest taxon to the genome
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
76 task_params // task parameters for every task
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
77 main:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
78 // Post GNOMON
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
79 // might come its own plane
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
80 def swiss_prot_asn = fetch_swiss_prot_asn()
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
81 def swiss_prot_ids = get_swiss_prot_ids(swiss_prot_asn)
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
82
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
83 prot_gnomon_prepare(gnomon_models, task_params.get('prot_gnomon_prepare', [:]))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
84 // Seed Protein-Model Hits
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
85 diamond_worker(prot_gnomon_prepare.out.prot_ids, swiss_prot_ids, gnomon_models, swiss_prot_asn, task_params.get('diamond', [:]))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
86 best_protein_hits(gnomon_models, swiss_prot_asn, diamond_worker.out.alignments , task_params.get('protein_filter', [:]))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
87
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
88 gnomon_biotype([] /*models*/,/*splices_file -- constant*/ [], /*denylist -- constant*/ [], gencoll_asn, swiss_prot_asn, gnomon_models, diamond_worker.out.alignments,task_params.get('gnomon_biotype', [:]))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
89 locus_link(/*best_refseq_prot_hit -- best protein hits from refseq plane*/ [], orthologs, [] /*annot_builder.out.annot_files*/,
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
90 gencoll_asn, gnomon_models, best_protein_hits.out.alignments , /*track_loci*/ [], /*comparisons*/ [], /*curr_prev_compare*/ [],
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
91 gnomon_biotype.out.biotypes, /*lxr_data*/ [], swiss_prot_asn, /*name_from_ortholog */ [], task_params.get('locus_link', [:]))
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
92
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
93
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
94 emit:
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
95 locus = locus_link.out.locus
d9c5c5b87fec planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
fubar
parents:
diff changeset
96 }