Mercurial > repos > nml > tree_relabeler
comparison nml_tree_relabeler.pl @ 0:02bc0d7d40b5 draft
planemo upload for repository https://github.com/phac-nml/galaxy_tools/blob/master/tools/tree_relabeler commit 974af0d5e7ccfcbd47284eb85a5f593d5e48daf8
author | nml |
---|---|
date | Mon, 29 Apr 2019 11:18:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:02bc0d7d40b5 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use Bio::TreeIO; | |
6 use Bio::Tree::Tree; | |
7 use IO::String; | |
8 use Getopt::Long; | |
9 use Pod::Usage; | |
10 | |
11 my ($treefile, $tabfile, $delim, $outfile, $template, $help, $replace, $tabline, $man ); | |
12 | |
13 GetOptions( | |
14 'i|treefile=s' => \$treefile, | |
15 't|tabfile=s' => \$tabfile, | |
16 'o|outfile=s' => \$outfile, | |
17 'd|delim:s' => \$delim, | |
18 'p|print-template=s' => \$template, | |
19 'r|replace' => \$replace, | |
20 'h|help' => \$help, | |
21 'm|man' => \$man | |
22 ); | |
23 | |
24 if ($help){ | |
25 pod2usage(-verbose => 99, | |
26 -sections => "SYNOPSIS|OPTIONS AND ARGUMENTS|DESCRIPTION|DIAGNOSTICS"); | |
27 } elsif ($man){ | |
28 pod2usage(-verbose => 2); | |
29 } elsif (!$treefile) { | |
30 pod2usage(-msg => "**Tree file is required.**\n", | |
31 -exitval => 2, | |
32 -verbose => 1, | |
33 -output => \*STDERR); | |
34 } elsif ( !(($template) || ($tabfile && $outfile)) ){ | |
35 pod2usage(-msg => "**Either select a template file, or a tab file and outfile**\n", | |
36 -exitval => 2, | |
37 -verbose => 1, | |
38 -output => \*STDERR); | |
39 } elsif ( $treefile && $template) { | |
40 generate_template(); | |
41 } elsif ( $treefile && $tabfile && $outfile ){ | |
42 relabel_tree(); | |
43 } | |
44 | |
45 sub generate_template | |
46 { | |
47 check_treefile(); | |
48 print_template(); | |
49 } | |
50 | |
51 sub relabel_tree | |
52 { | |
53 my %new_labels; | |
54 check_delimiter(); | |
55 %new_labels = %{load_tabfile()}; | |
56 check_treefile(); | |
57 write_treefile(\%new_labels); | |
58 } | |
59 | |
60 sub print_template | |
61 { | |
62 open my $tempout, '>', $template or die "Could not open file: $!"; | |
63 print $tempout "#label\n"; | |
64 | |
65 my $treein = Bio::TreeIO->new( | |
66 -format => "newick", | |
67 -file => "$treefile" | |
68 ); | |
69 | |
70 while(my $t = $treein->next_tree) | |
71 { | |
72 my @nodes = $t->get_nodes; | |
73 | |
74 if (scalar(@nodes) <= 1) | |
75 { | |
76 print STDERR "Tree is not in newick format.\n"; | |
77 exit(2); | |
78 } | |
79 else | |
80 { | |
81 foreach my $node ($t->get_leaf_nodes) | |
82 { | |
83 print $tempout $node->id,"\n"; | |
84 } | |
85 } | |
86 } | |
87 close ($tempout); | |
88 } | |
89 | |
90 sub check_delimiter | |
91 { | |
92 if (!($delim)){ | |
93 $delim = ' '; | |
94 } else{ | |
95 my $delimlen = length $delim; | |
96 # delim length less than 1 indicates empty string. | |
97 if ($delimlen < 1){ | |
98 $delim = ' '; | |
99 }elsif ($delim =~ /[\(\)\;\,\:]/){ | |
100 print STDERR "Delimiters cannot be Newick reserved characters '(),;:'.\n"; | |
101 exit(1); | |
102 } | |
103 } | |
104 } | |
105 | |
106 sub load_tabfile | |
107 { | |
108 my %new_labels; | |
109 if (!(-e $tabfile)){ | |
110 # exit if error in tab file | |
111 print STDERR "Error opening tab file.\n"; | |
112 exit(1); | |
113 } | |
114 | |
115 open (my $tabin, '<', $tabfile); | |
116 | |
117 # append the > to the front of the outfile string | |
118 # $outfile = '>'.$outfile; | |
119 | |
120 # go through tab file to add new labels to a hash file | |
121 while ($tabline = <$tabin>){ | |
122 # skip the first row if it starts with a # | |
123 next if $tabline =~ s/^#//; | |
124 $tabline =~ s/\r//g; | |
125 chomp $tabline; | |
126 | |
127 if ($tabline =~ /[\(\)\;\,\:]/){ | |
128 print STDERR "New labels cannot contain Newick reserved characters '(),;:'.\n"; | |
129 exit(1); | |
130 } | |
131 | |
132 my @splits = split("\t", $tabline); | |
133 | |
134 # Check that the tab file has more than one column | |
135 my $num_cols = scalar @splits; | |
136 if ($num_cols <= 1){ | |
137 # exit if one column or less; no new info to add to tree/error with tab layout. | |
138 print STDERR "Tab file does not contain new labels.\n"; | |
139 exit(1); | |
140 } | |
141 # set the hash label to the first value in a row, is the original tip label. | |
142 my $label = $splits[0]; | |
143 # If user chose find and replace instead, get rid of the first value. | |
144 shift @splits if ($replace); | |
145 # join all values from @split into one string with delim separating them | |
146 my $new_info = join($delim, @splits); | |
147 # add the new info to the hash | |
148 $new_labels{$label} = $new_info; | |
149 } | |
150 | |
151 close ($tabin); | |
152 | |
153 return \%new_labels; | |
154 } | |
155 | |
156 sub check_treefile | |
157 { | |
158 if (!(-e $treefile)){ | |
159 # exit if error in tree file | |
160 print STDERR "Error opening tree file.\n"; | |
161 exit(1); | |
162 } | |
163 | |
164 | |
165 # open tree file to check format | |
166 if (!(-e $treefile)){ | |
167 # exit if error in tab file | |
168 print STDERR "Error opening tree file.\n"; | |
169 exit(1); | |
170 } | |
171 if (-z $treefile){ | |
172 print STDERR "Tree file is empty.\n"; | |
173 exit(1); | |
174 } | |
175 my $linecount = 0; | |
176 open (my $treein, '<', $treefile); | |
177 my $line = <$treein>; | |
178 my $nextline = <$treein>; | |
179 if (defined $nextline){ | |
180 print STDERR "Tree is not in newick format. More than one line\n"; | |
181 exit(2); | |
182 } | |
183 close ($treein); | |
184 | |
185 my @chars = split("",$line); | |
186 my $lastelem = @chars-2; | |
187 my $bracketcount = 0; | |
188 # look for non-spaces at end of line | |
189 while($chars[$lastelem] eq " "){ | |
190 $lastelem--; | |
191 } | |
192 if ($chars[$lastelem] ne ";"){ | |
193 # newick formats end in ; | |
194 print STDERR "Tree is not in newick format. Does not end in ; \n"; | |
195 exit(2); | |
196 } | |
197 | |
198 foreach my $char (@chars){ | |
199 if ($char eq ")"){ | |
200 if ($bracketcount == 0){ | |
201 # There is a ) before a ( | |
202 print STDERR "Tree is not in newick format. Missing a (\n"; | |
203 exit(2); | |
204 }else { | |
205 $bracketcount--; | |
206 } | |
207 }elsif ($char eq "("){ | |
208 $bracketcount++; | |
209 } | |
210 } | |
211 if ($bracketcount != 0){ | |
212 # There were not equal number of ( and ) | |
213 print STDERR "Tree is not in newick format. Brackets do not match. \n"; | |
214 exit(2); | |
215 } | |
216 } | |
217 | |
218 sub write_treefile | |
219 { | |
220 my $temp = shift; | |
221 my %new_labels = %{$temp}; | |
222 | |
223 # open tree file as a tree | |
224 my $treeinput = Bio::TreeIO->new( | |
225 -file => "$treefile" | |
226 ); | |
227 | |
228 #create output tree file | |
229 my $treeoutput = Bio::TreeIO->new( | |
230 -format => "newick", | |
231 -file => ">$outfile" | |
232 ); | |
233 | |
234 while(my $t = $treeinput->next_tree ){ | |
235 # check if tree is valid | |
236 if (scalar($t->get_nodes)<=1){ | |
237 # if tree has only 1 or less nodes, then it's not in correct format | |
238 print STDERR "Tree is not in newick format.\n"; | |
239 exit(2); | |
240 } else{ | |
241 foreach my $label(keys %new_labels){ | |
242 my $tip = $t->find_node(-id =>$label); | |
243 if ($tip){ | |
244 # if found, change to the new label | |
245 $tip->id($new_labels{$label}); | |
246 print $label," found and changed.\n" | |
247 } else{ | |
248 # if label from tab file is not found, notify the user | |
249 print $label," not found.\n"; | |
250 } | |
251 } | |
252 $treeoutput->write_tree($t); | |
253 } | |
254 } | |
255 } | |
256 | |
257 exit; | |
258 | |
259 =head1 NAME | |
260 | |
261 nml_tree_relabeler.pl - Changes the tip labels on a newick formatted tree | |
262 | |
263 =head1 VERSION | |
264 | |
265 This documentation refers to nml_tree_relabeler.pl version 0.0.2. | |
266 | |
267 =head1 SYNOPSIS | |
268 | |
269 nml_tree_relabeler.pl -i treefile [-t tabfile -o outfile (-d delim) (-r) | -p template] | |
270 | |
271 =head1 OPTIONS AND ARGUMENTS | |
272 | |
273 =over | |
274 | |
275 =item B<-i>, B<--treefile> | |
276 | |
277 The name of the tree file containing the tree to adjust the tip laels. Only accepts trees in newick format. (required) | |
278 | |
279 =item B<-t>, B<--tabfile> | |
280 | |
281 The name of the tab delimited file containing current tip labels and the info to be replaced/added tothe labels. The first column must contain the current tree labels. Must not contain one of the Newick reserved characters '(),:;' (required option) | |
282 | |
283 =item B<-o>, B<--out> | |
284 | |
285 The output file. (required option) | |
286 | |
287 =item B<-d>, B<--delim> | |
288 | |
289 The character to use to divide the information of the labels. Must not be one of the Newick reserved characters '(),:;' (optional) | |
290 | |
291 =item B<-r>, B<--replace> | |
292 | |
293 Replace the tip names. This option will replace the tree tip names with the specified labels, instead of adding them to the tip name. | |
294 | |
295 =item B<-p>, B<--print-template> | |
296 | |
297 The name of the output template file. Prints out a template for the tabfile.(required option) | |
298 | |
299 =item B<-h>, B<--help> | |
300 | |
301 To display help message | |
302 | |
303 =item B<-m>, B<--man> | |
304 | |
305 To display manual | |
306 | |
307 =back | |
308 | |
309 =head1 DESCRIPTION | |
310 | |
311 =over | |
312 | |
313 nml_tree_relabeler takes a newick format tree file to modify tip labels and a tab-delimited file containing current tip labels and additional information to add to the tips in 2 or more columns. Header row of the tab delimited file must start with a '#'. An example is below: | |
314 | |
315 #label outbreak year location | |
316 orgs1 outbreak1 year1 location1 | |
317 orgs2 outbreak2 year2 location2 | |
318 | |
319 and so on. | |
320 | |
321 The information in the tab file is inserted into the tree file so the new information will appear on the tip labels. | |
322 | |
323 Alternatively, nml_tree_relabeler can print out the tip names to a tab-delimited template file. | |
324 | |
325 =back | |
326 | |
327 =head1 DIAGNOSTICS | |
328 | |
329 =over | |
330 | |
331 =item B<Tree file, tab file, and output file are required> | |
332 | |
333 Use the proper command line arguments (-i, -t, -o respectively) to add the filenames of the tree file, tab file, and output file. | |
334 | |
335 =item B<Tree file is required> | |
336 | |
337 Use the -i command line argument to add the tree file. | |
338 | |
339 =itemB<Either select a template file, or a tab file and outfile> | |
340 | |
341 Use the proper command line arguments to either add a template file (-p) to print a tab template, or to add a tab file and an output file (-t, -o respectively) to relabel a tree. | |
342 | |
343 =item B<Label not found> | |
344 | |
345 A warning that a label provided in the tab file was not found in the tree file. Relabeling continues. | |
346 | |
347 =item B<Error opening tab/tree file> | |
348 | |
349 An error occured while opening the tab/tree file, please check path/file. | |
350 | |
351 =item B<Tree is not in newick format> | |
352 | |
353 The tree file does not appear to be in newick format. Please check file and convert if necessary. | |
354 | |
355 =item B<Tab file does not contain new labels> | |
356 | |
357 The tab file only contains one column and therefore does not have any additional information to add to the tree. Please check the tab file. | |
358 | |
359 =item B<Delimiter/tabfile cannot contain Newick reserved characters '(),;:' > | |
360 | |
361 The tab file or delimiter selected contains one of the characters used in the Newick format. This will cause an error when trying to read the tree. Please modify your tab file or select a new delimiter. | |
362 | |
363 =back | |
364 | |
365 =head1 CONFIGURATION AND ENVIRONMENT | |
366 | |
367 =head1 DEPENDENCIES | |
368 | |
369 =over | |
370 | |
371 =item use Bio::TreeIO | |
372 | |
373 =item use Bio::Tree::Tree | |
374 | |
375 =back | |
376 | |
377 =head1 INCOMPATIBILITIES | |
378 | |
379 This script only works for NEWICK formatted trees. All other tree formats are not compatible. | |
380 | |
381 =head1 AUTHOR | |
382 | |
383 Jen Cabral, <jencabral@gmail.com> | |
384 | |
385 =head1 BUGS AND LIMITATIONS | |
386 | |
387 There are no known bugs in this module. | |
388 | |
389 Please report problems to Jen Cabral, <jencabral@gmail.com> | |
390 | |
391 =head1 COPYRIGHT & LICENSE | |
392 | |
393 Copyright (C) 2015 by NML | |
394 | |
395 This program is free software: you can redistribute it and/or modify | |
396 it under the terms of the GNU General Public License as published by | |
397 the Free Software Foundation, either version 3 of the License, or | |
398 (at your option) any later version. | |
399 | |
400 This program is distributed in the hope that it will be useful, | |
401 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
402 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
403 GNU General Public License for more details. | |
404 | |
405 You should have received a copy of the GNU General Public License | |
406 along with this program. If not, see <http://www.gnu.org/licenses/> | |
407 | |
408 =cut |