diff nf/subworkflows/ncbi/gnomon/main.nf @ 0:d9c5c5b87fec draft

planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author fubar
date Sat, 03 Aug 2024 11:16:53 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nf/subworkflows/ncbi/gnomon/main.nf	Sat Aug 03 11:16:53 2024 +0000
@@ -0,0 +1,96 @@
+#!/usr/bin/env nextflow
+// gnomon plane workflow
+// route data to tasks
+
+nextflow.enable.dsl=2
+
+include { chainer_wnode as chainer } from './chainer_wnode/main'
+include { gnomon_wnode } from './gnomon_wnode/main'
+include { prot_gnomon_prepare } from './prot_gnomon_prepare/main'
+include { gnomon_training_iterations } from '../gnomon-training-iteration/main'
+
+include { diamond_worker} from './diamond/main'
+include { best_protein_hits } from './protein_filter/main'
+include { gnomon_biotype} from './gnomon_biotype/main'
+include { fetch_swiss_prot_asn; get_swiss_prot_ids } from '../shared/diamond/main'
+include { diamond_orthology } from '../orthology/diamond_orthology/main'
+include { locus_link } from './locus_link/main'
+
+
+params.intermediate = false
+
+workflow gnomon_plane {
+    take:
+        genome_asn
+        scaffolds
+        gencoll_asn
+        proteins_asn
+        alignments // list of all relevent input alignments
+
+        // Alternative parameters, one of them should be set
+        // tax_id - NCBI tax id of the closest taxon to the genome
+        // hmm_params - HMM parameters
+        tax_id          // NCBI tax id of the closest taxon to the genome
+        hmm_params      // HMM parameters
+        hmm_taxid       // NCBI tax id of the taxon of the HMM
+        //
+        softmask        // softmask for GNOMON, optional
+        max_intron      // max intron length
+        task_params     // task parameters for every task
+    main:
+        // GNOMON
+        def effective_hmm
+        if (tax_id == hmm_taxid) {
+            effective_hmm = hmm_params
+        } else {
+            effective_hmm = gnomon_training_iterations(hmm_params, genome_asn, proteins_asn, alignments, /* evidence_denylist */ [], /* gap_fill_allowlist */ [],
+                /* trusted_genes */ [], scaffolds, softmask,
+                softmask, scaffolds,
+                max_intron,
+                task_params)
+        }
+
+        chainer(alignments, effective_hmm, /* evidence_denylist */ [], /* gap_fill_allowlist */ [], scaffolds, /* trusted_genes */ [], genome_asn, proteins_asn, task_params.get('chainer', [:]))
+
+        def gn_models = []
+        gnomon_wnode(scaffolds, chainer.out.chains, chainer.out.chains_slices, effective_hmm, [], softmask, genome_asn, proteins_asn, task_params.get('gnomon', [:]))
+       
+    emit:
+        gnomon_models = gnomon_wnode.out.outputs
+        // trained_hmm = effective_hmm
+}
+
+
+
+workflow post_gnomon_plane {
+    take:
+        gnomon_models
+        gencoll_asn
+        orthologs
+
+
+        // Alternative parameters, one of them should be set
+        // tax_id - NCBI tax id of the closest taxon to the genome
+        // hmm_params - HMM parameters
+        tax_id          // NCBI tax id of the closest taxon to the genome
+        task_params     // task parameters for every task
+    main:
+        // Post GNOMON
+        // might come its own plane
+        def swiss_prot_asn = fetch_swiss_prot_asn()
+        def swiss_prot_ids = get_swiss_prot_ids(swiss_prot_asn)
+
+        prot_gnomon_prepare(gnomon_models, task_params.get('prot_gnomon_prepare', [:]))
+        // Seed Protein-Model Hits
+        diamond_worker(prot_gnomon_prepare.out.prot_ids, swiss_prot_ids, gnomon_models, swiss_prot_asn, task_params.get('diamond', [:]))
+        best_protein_hits(gnomon_models, swiss_prot_asn,  diamond_worker.out.alignments , task_params.get('protein_filter', [:]))
+
+        gnomon_biotype([] /*models*/,/*splices_file  -- constant*/ [],  /*denylist -- constant*/ [], gencoll_asn, swiss_prot_asn, gnomon_models, diamond_worker.out.alignments,task_params.get('gnomon_biotype', [:]))
+        locus_link(/*best_refseq_prot_hit  -- best protein hits from refseq plane*/ [], orthologs, []  /*annot_builder.out.annot_files*/, 
+                gencoll_asn, gnomon_models, best_protein_hits.out.alignments , /*track_loci*/ [], /*comparisons*/ [],  /*curr_prev_compare*/ [], 
+                gnomon_biotype.out.biotypes, /*lxr_data*/ [], swiss_prot_asn, /*name_from_ortholog */ [],  task_params.get('locus_link', [:]))
+
+       
+    emit:
+        locus = locus_link.out.locus
+}