Mercurial > repos > fubar > egapx_runner
comparison nf/subworkflows/ncbi/gnomon/main.nf @ 0:d9c5c5b87fec draft
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author | fubar |
---|---|
date | Sat, 03 Aug 2024 11:16:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d9c5c5b87fec |
---|---|
1 #!/usr/bin/env nextflow | |
2 // gnomon plane workflow | |
3 // route data to tasks | |
4 | |
5 nextflow.enable.dsl=2 | |
6 | |
7 include { chainer_wnode as chainer } from './chainer_wnode/main' | |
8 include { gnomon_wnode } from './gnomon_wnode/main' | |
9 include { prot_gnomon_prepare } from './prot_gnomon_prepare/main' | |
10 include { gnomon_training_iterations } from '../gnomon-training-iteration/main' | |
11 | |
12 include { diamond_worker} from './diamond/main' | |
13 include { best_protein_hits } from './protein_filter/main' | |
14 include { gnomon_biotype} from './gnomon_biotype/main' | |
15 include { fetch_swiss_prot_asn; get_swiss_prot_ids } from '../shared/diamond/main' | |
16 include { diamond_orthology } from '../orthology/diamond_orthology/main' | |
17 include { locus_link } from './locus_link/main' | |
18 | |
19 | |
20 params.intermediate = false | |
21 | |
22 workflow gnomon_plane { | |
23 take: | |
24 genome_asn | |
25 scaffolds | |
26 gencoll_asn | |
27 proteins_asn | |
28 alignments // list of all relevent input alignments | |
29 | |
30 // Alternative parameters, one of them should be set | |
31 // tax_id - NCBI tax id of the closest taxon to the genome | |
32 // hmm_params - HMM parameters | |
33 tax_id // NCBI tax id of the closest taxon to the genome | |
34 hmm_params // HMM parameters | |
35 hmm_taxid // NCBI tax id of the taxon of the HMM | |
36 // | |
37 softmask // softmask for GNOMON, optional | |
38 max_intron // max intron length | |
39 task_params // task parameters for every task | |
40 main: | |
41 // GNOMON | |
42 def effective_hmm | |
43 if (tax_id == hmm_taxid) { | |
44 effective_hmm = hmm_params | |
45 } else { | |
46 effective_hmm = gnomon_training_iterations(hmm_params, genome_asn, proteins_asn, alignments, /* evidence_denylist */ [], /* gap_fill_allowlist */ [], | |
47 /* trusted_genes */ [], scaffolds, softmask, | |
48 softmask, scaffolds, | |
49 max_intron, | |
50 task_params) | |
51 } | |
52 | |
53 chainer(alignments, effective_hmm, /* evidence_denylist */ [], /* gap_fill_allowlist */ [], scaffolds, /* trusted_genes */ [], genome_asn, proteins_asn, task_params.get('chainer', [:])) | |
54 | |
55 def gn_models = [] | |
56 gnomon_wnode(scaffolds, chainer.out.chains, chainer.out.chains_slices, effective_hmm, [], softmask, genome_asn, proteins_asn, task_params.get('gnomon', [:])) | |
57 | |
58 emit: | |
59 gnomon_models = gnomon_wnode.out.outputs | |
60 // trained_hmm = effective_hmm | |
61 } | |
62 | |
63 | |
64 | |
65 workflow post_gnomon_plane { | |
66 take: | |
67 gnomon_models | |
68 gencoll_asn | |
69 orthologs | |
70 | |
71 | |
72 // Alternative parameters, one of them should be set | |
73 // tax_id - NCBI tax id of the closest taxon to the genome | |
74 // hmm_params - HMM parameters | |
75 tax_id // NCBI tax id of the closest taxon to the genome | |
76 task_params // task parameters for every task | |
77 main: | |
78 // Post GNOMON | |
79 // might come its own plane | |
80 def swiss_prot_asn = fetch_swiss_prot_asn() | |
81 def swiss_prot_ids = get_swiss_prot_ids(swiss_prot_asn) | |
82 | |
83 prot_gnomon_prepare(gnomon_models, task_params.get('prot_gnomon_prepare', [:])) | |
84 // Seed Protein-Model Hits | |
85 diamond_worker(prot_gnomon_prepare.out.prot_ids, swiss_prot_ids, gnomon_models, swiss_prot_asn, task_params.get('diamond', [:])) | |
86 best_protein_hits(gnomon_models, swiss_prot_asn, diamond_worker.out.alignments , task_params.get('protein_filter', [:])) | |
87 | |
88 gnomon_biotype([] /*models*/,/*splices_file -- constant*/ [], /*denylist -- constant*/ [], gencoll_asn, swiss_prot_asn, gnomon_models, diamond_worker.out.alignments,task_params.get('gnomon_biotype', [:])) | |
89 locus_link(/*best_refseq_prot_hit -- best protein hits from refseq plane*/ [], orthologs, [] /*annot_builder.out.annot_files*/, | |
90 gencoll_asn, gnomon_models, best_protein_hits.out.alignments , /*track_loci*/ [], /*comparisons*/ [], /*curr_prev_compare*/ [], | |
91 gnomon_biotype.out.biotypes, /*lxr_data*/ [], swiss_prot_asn, /*name_from_ortholog */ [], task_params.get('locus_link', [:])) | |
92 | |
93 | |
94 emit: | |
95 locus = locus_link.out.locus | |
96 } |