Mercurial > repos > fubar > egapx_runner
comparison nf/subworkflows/ncbi/target_proteins/miniprot/main.nf @ 0:d9c5c5b87fec draft
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author | fubar |
---|---|
date | Sat, 03 Aug 2024 11:16:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d9c5c5b87fec |
---|---|
1 #!/usr/bin/env nextflow | |
2 | |
3 nextflow.enable.dsl=2 | |
4 | |
5 include { merge_params } from '../../utilities' | |
6 | |
7 | |
8 def get_effective_params(parameters, max_intron) { | |
9 def default_params = "-t 8 -G ${max_intron}" | |
10 def value = parameters.get("miniprot", "") | |
11 value = value.replaceFirst("-cpu-count", "-t") | |
12 value = value.replaceFirst("-max-intron", "-G") | |
13 parameters['miniprot'] = value | |
14 def effective_params = merge_params(default_params, parameters, "miniprot") | |
15 return effective_params | |
16 } | |
17 | |
18 workflow miniprot { | |
19 take: | |
20 fasta_genome_file //path: genome fasta file | |
21 fasta_proteins_file //path: protein fasta file | |
22 max_intron //int: max intron length | |
23 parameters // Map : extra parameter and parameter update | |
24 main: | |
25 // println("Miniprot max intron: ${max_intron}") | |
26 def items_per_chunk = merge_params("-n 1000000000", parameters, "split_proteins").replaceFirst("-n ", "").toInteger() | |
27 def protein_chunks | |
28 if (items_per_chunk == 1000000000) { | |
29 protein_chunks = fasta_proteins_file | |
30 } else { | |
31 protein_chunks = split_proteins(fasta_proteins_file, items_per_chunk) | |
32 } | |
33 run_miniprot(fasta_genome_file, protein_chunks.flatten(), max_intron, parameters) | |
34 | |
35 emit: | |
36 miniprot_file = run_miniprot.out.miniprot_file | |
37 } | |
38 | |
39 | |
40 process split_proteins { | |
41 input: | |
42 path fasta_proteins_file | |
43 val items_per_chunk | |
44 output: | |
45 path 'output/*' | |
46 script: | |
47 """ | |
48 #!/usr/bin/env python3 | |
49 import os | |
50 | |
51 os.makedirs("output", exist_ok=True) | |
52 with open("${fasta_proteins_file}", 'rt') as f: | |
53 items = 0 | |
54 chunk = [] | |
55 nextfile = 1 | |
56 for line in f: | |
57 if line and line[0] == '>': | |
58 items += 1 | |
59 if items >= ${items_per_chunk}: | |
60 with open(f"output/{nextfile}.fa", "w") as outf: | |
61 outf.write(''.join(chunk)) | |
62 chunk = [] | |
63 nextfile += 1 | |
64 items = 1 | |
65 chunk.append(line) | |
66 if chunk: | |
67 with open(f"output/{nextfile}.fa", "w") as outf: | |
68 outf.write(''.join(chunk)) | |
69 """ | |
70 stub: | |
71 print("items_per_chunk ${items_per_chunk}") | |
72 """ | |
73 mkdir -p output | |
74 touch output/1.fa | |
75 touch output/2.fa | |
76 touch output/3.fa | |
77 """ | |
78 } | |
79 | |
80 | |
81 process run_miniprot { | |
82 label 'huge_job' | |
83 label 'long_job' | |
84 input: | |
85 path fasta_genome_file | |
86 path fasta_proteins_file | |
87 val max_intron | |
88 val parameters | |
89 output: | |
90 path ('output/*.paf'), emit: 'miniprot_file' | |
91 | |
92 script: | |
93 def paf_name = fasta_proteins_file.baseName.toString() + ".paf" | |
94 def effective_params = get_effective_params(parameters, max_intron) | |
95 // println("Miniprot params: ${effective_params}") | |
96 """ | |
97 mkdir -p output | |
98 miniprot ${effective_params} ${fasta_genome_file} ${fasta_proteins_file} > output/${paf_name} | |
99 """ | |
100 stub: | |
101 def paf_name = fasta_proteins_file.baseName.toString() + ".paf" | |
102 def effective_params = get_effective_params(parameters, max_intron) | |
103 println("Miniprot params: ${effective_params}") | |
104 """ | |
105 mkdir -p output | |
106 touch output/${paf_name} | |
107 """ | |
108 } |