Mercurial > repos > fubar > egapx_runner
comparison nf/subworkflows/ncbi/utilities.nf @ 0:d9c5c5b87fec draft
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author | fubar |
---|---|
date | Sat, 03 Aug 2024 11:16:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d9c5c5b87fec |
---|---|
1 #!/usr/bin/env nextflow | |
2 nextflow.enable.dsl=2 | |
3 | |
4 | |
5 // Analog of shlex.split | |
6 def List<String> shellSplit(CharSequence s) { | |
7 List<String> tokens = [] | |
8 boolean escaping = false | |
9 char quoteChar = ' ' | |
10 boolean quoting = false | |
11 int lastCloseQuoteIndex = Integer.MIN_VALUE | |
12 StringBuilder current = new StringBuilder() | |
13 | |
14 s.eachWithIndex { c, i -> | |
15 if (escaping) { | |
16 current.append(c) | |
17 escaping = false | |
18 // } else if (c == '\\' && !(quoting && quoteChar == '\'')) { | |
19 } else if (c == '\\' && !quoting) { | |
20 escaping = true | |
21 } else if (quoting && c == quoteChar) { | |
22 quoting = false | |
23 lastCloseQuoteIndex = i | |
24 } else if (!quoting && (c == '\'' || c == '"')) { | |
25 quoting = true | |
26 quoteChar = c | |
27 } else if (!quoting && c.isAllWhitespace()) { | |
28 if (current.size() > 0 || lastCloseQuoteIndex == (i - 1)) { | |
29 tokens.add(current.toString()) | |
30 current = new StringBuilder() | |
31 } | |
32 } else { | |
33 current.append(c) | |
34 } | |
35 } | |
36 if (current.size() > 0 || lastCloseQuoteIndex == (s.length() - 1)) { | |
37 tokens.add(current.toString()) | |
38 } | |
39 | |
40 return tokens | |
41 } | |
42 | |
43 | |
44 // Convert a parameter list into a map | |
45 def Map<String, String> to_map(List<String> list ) | |
46 { | |
47 def map = [:] | |
48 def s = list.size() | |
49 def i = 0 | |
50 while (i < s) | |
51 { | |
52 def elem = list.get(i) | |
53 i = i + 1 | |
54 if (elem.size() > 0 && elem[0] == '-') | |
55 { | |
56 if (i < s) { | |
57 def val = list.get(i) | |
58 if ( val.size() > 0 && (val[0] != '-' || val.contains(' ')) ) | |
59 { | |
60 map[elem] = val | |
61 i = i + 1 | |
62 } else { | |
63 map[elem] = "" | |
64 } | |
65 } else { | |
66 map[elem] = "" | |
67 } | |
68 } else { | |
69 println("Error: parameter string not well formed, map ${map}, elem ${elem}, i ${i}, s ${s}") | |
70 return map | |
71 } | |
72 } | |
73 return map | |
74 } | |
75 | |
76 | |
77 def quote(String s) | |
78 { | |
79 if (s.size() > 0 && !(s =~ /[^\w@%+=:,.\/-]/)) { | |
80 return s | |
81 } | |
82 return "'" + s + "'" | |
83 } | |
84 | |
85 | |
86 // Read a section of the parameters and merge them into the default parameters | |
87 // Parameters: | |
88 // default_params: the default parameters, string | |
89 // parameters: the parameters as a map from string to string | |
90 // section_name: the name of the section in the parameters map to use | |
91 // Return: the merged parameters | |
92 def merge_params(default_params, parameters, section_name) | |
93 { | |
94 def section = parameters.get(section_name, "") | |
95 def update_map = to_map(shellSplit(section)) | |
96 def default_params_map = to_map(shellSplit(default_params)) | |
97 default_params_map.putAll(update_map) | |
98 def l = [] | |
99 default_params_map.each { parameter, value -> | |
100 l << quote(parameter) | |
101 if (value.size() > 0) { | |
102 l << quote(value) | |
103 } | |
104 } | |
105 | |
106 return l.join(" ") | |
107 } | |
108 | |
109 | |
110 | |
111 process clean_fasta_ids { | |
112 input: | |
113 path fasta_in | |
114 output: | |
115 path "fasta_out", emit: 'fasta_out' | |
116 script: | |
117 """ | |
118 ## turns Fasta inputs formatted with multi-part IDs into | |
119 ## single-part IDs, like | |
120 ## >gi|1234|ref|NW_1234.1 Some Defline For This Org | |
121 ## >gi|1234 Some Defline For This Org | |
122 ## LDS chokes on the multi-part IDs. | |
123 # the base64 nonsense is because I couldnt get it to not complain about the regex as syntax errors in some way. | |
124 # its just this: | |
125 # import re,sys; | |
126 # for l in sys.stdin: | |
127 # <I had to delete the regex here because even in a comment nextflow lost it> | |
128 echo 'aW1wb3J0IHJlLHN5czsKZm9yIGwgaW4gc3lzLnN0ZGluOgogICAgcHJpbnQocmUuc3ViKHIiXig+' > reol.b64 | |
129 echo 'Z2lcfFxkKylcfD8oW2Etel0rXHxbQS1aX10rW1xkXC5dK1x8KSguKikiLCAiXGc8MT5cZzwzPiIs' >> reol.b64 | |
130 echo 'IGwuc3RyaXAoKSkpCg==' >> reol.b64 | |
131 base64 -d ./reol.b64 > ./reol.py | |
132 cat ${fasta_in} | python reol.py > ./fasta_out | |
133 """ | |
134 stub: | |
135 """ | |
136 touch ./fasta.out | |
137 """ | |
138 } | |
139 | |
140 | |
141 | |
142 process multireader { | |
143 input: | |
144 path fasta_file | |
145 val parameters | |
146 output: | |
147 path ('output/fasta_file.asnt') , emit: 'multireader_file' | |
148 script: | |
149 """ | |
150 mkdir -p output | |
151 if [ -n "$fasta_file" ]; then | |
152 multireader $parameters -out-format asn_text -input $fasta_file -output output/fasta_file.asnt | |
153 else | |
154 touch output/fasta_file.asnt | |
155 fi | |
156 """ | |
157 stub: | |
158 """ | |
159 mkdir -p output | |
160 touch output/fasta_file.asnt | |
161 """ | |
162 } |