diff nf/subworkflows/ncbi/only_gnomon.nf @ 0:d9c5c5b87fec draft

planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author fubar
date Sat, 03 Aug 2024 11:16:53 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nf/subworkflows/ncbi/only_gnomon.nf	Sat Aug 03 11:16:53 2024 +0000
@@ -0,0 +1,95 @@
+#!/usr/bin/env nextflow
+// gnomon-only nextflow script for EGAPx execution
+// route data to subworkflows
+
+nextflow.enable.dsl=2
+
+include { setup_genome; setup_proteins } from './setup/main'
+include { get_hmm_params; run_get_hmm } from './default/get_hmm_params/main'
+include { chainer_wnode as chainer } from './gnomon/chainer_wnode/main'
+include { gnomon_wnode } from './gnomon/gnomon_wnode/main'
+include { prot_gnomon_prepare } from './gnomon/prot_gnomon_prepare/main'
+include { annot_builder } from './default/annot_builder/main'
+include { annotwriter } from './default/annotwriter/main'
+include { run_align_sort} from './default/align_sort_sa/main'
+
+params.intermediate = false
+
+workflow only_gnomon {
+    take:
+        genome          // path to genome
+        proteins        // path to proteins, optional
+        // Alternative groups of parameters, one of them should be set
+        rnaseq_alignments // path to rnaseq_collapse'ed alignments
+        protein_alignments // path to miniprot, filtered, sorted, alignments
+        
+        organelles      // path to organelle list
+        // Alternative parameters, one of them should be set
+        // tax_id - NCBI tax id of the closest taxon to the genome
+        // hmm_params - HMM parameters
+        tax_id          // NCBI tax id of the closest taxon to the genome
+        hmm_params      // HMM parameters
+        hmm_taxid       // NCBI tax id of the taxon of the HMM
+        //
+        softmask        // softmask for GNOMON, optional
+        task_params     // task parameters for every task
+    main:
+        
+        def (scaffolds, gencoll_asn, unpacked_genome, genome_asn) = setup_genome(genome, organelles, task_params.get('setup', [:]))
+
+        // Protein alignments
+        def unpacked_proteins
+        def proteins_asn = []
+        if (proteins) {
+            // miniprot plane
+            (unpacked_proteins, proteins_asn) = setup_proteins(proteins, task_params.get('setup', [:]))
+        }
+
+        // Combine RNASeq and protein alignments
+
+        def alignments
+        if (protein_alignments && rnaseq_alignments) {
+            print(rnaseq_alignments.getClass())
+            print(rnaseq_alignments)
+            print(protein_alignments.getClass())
+            print(protein_alignments)
+            alignments = Channel.of(rnaseq_alignments).combine(Channel.of(protein_alignments))
+        } else if (protein_alignments) {
+            alignments = protein_alignments
+        } else if (rnaseq_alignments) {
+            alignments = rnaseq_alignments
+        } else {
+            print("error") 
+        }
+
+        // GNOMON
+
+        def effective_hmm
+        if (hmm_params) {
+            effective_hmm = hmm_params
+        } else {
+            tmp_hmm = run_get_hmm(tax_id)
+            b = tmp_hmm | splitText( { it.split('\n') } ) | flatten 
+            c = b | last
+            effective_hmm = c
+        }
+
+        chainer(alignments, effective_hmm, /* evidence_denylist */ [], /* gap_fill_allowlist */ [], scaffolds, /* trusted_genes */ [], genome_asn, proteins_asn, task_params.get('chainer', [:]))
+
+        gnomon_wnode(scaffolds, chainer.out.chains, chainer.out.chains_slices, effective_hmm, [], softmask, genome_asn, proteins_asn, task_params.get('gnomon', [:]))
+        def models = gnomon_wnode.out.outputs
+
+        // prot_gnomon_prepare(models, task_params.get('prot_gnomon_prepare', [:]))
+        
+        // actual gnomon end but whatever
+
+
+        annot_builder(gencoll_asn, models, genome_asn, task_params.get('annot_builder', [:]))
+        def accept_asn = annot_builder.out.accept_asn
+        
+        annotwriter(accept_asn, [:])
+        annotwriter.out.annoted_file
+    emit:
+        out_files = annotwriter.out.annoted_file
+        evidence = annot_builder.out.outputs
+}