# HG changeset patch # User nml # Date 1556551112 14400 # Node ID 02bc0d7d40b523ab3ea9a3354a5a4dbf6ca20750 planemo upload for repository https://github.com/phac-nml/galaxy_tools/blob/master/tools/tree_relabeler commit 974af0d5e7ccfcbd47284eb85a5f593d5e48daf8 diff -r 000000000000 -r 02bc0d7d40b5 nml_tree_relabeler.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nml_tree_relabeler.pl Mon Apr 29 11:18:32 2019 -0400 @@ -0,0 +1,408 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Bio::TreeIO; +use Bio::Tree::Tree; +use IO::String; +use Getopt::Long; +use Pod::Usage; + +my ($treefile, $tabfile, $delim, $outfile, $template, $help, $replace, $tabline, $man ); + +GetOptions( + 'i|treefile=s' => \$treefile, + 't|tabfile=s' => \$tabfile, + 'o|outfile=s' => \$outfile, + 'd|delim:s' => \$delim, + 'p|print-template=s' => \$template, + 'r|replace' => \$replace, + 'h|help' => \$help, + 'm|man' => \$man +); + +if ($help){ + pod2usage(-verbose => 99, + -sections => "SYNOPSIS|OPTIONS AND ARGUMENTS|DESCRIPTION|DIAGNOSTICS"); +} elsif ($man){ + pod2usage(-verbose => 2); +} elsif (!$treefile) { + pod2usage(-msg => "**Tree file is required.**\n", + -exitval => 2, + -verbose => 1, + -output => \*STDERR); +} elsif ( !(($template) || ($tabfile && $outfile)) ){ + pod2usage(-msg => "**Either select a template file, or a tab file and outfile**\n", + -exitval => 2, + -verbose => 1, + -output => \*STDERR); +} elsif ( $treefile && $template) { + generate_template(); +} elsif ( $treefile && $tabfile && $outfile ){ + relabel_tree(); +} + +sub generate_template +{ + check_treefile(); + print_template(); +} + +sub relabel_tree +{ + my %new_labels; + check_delimiter(); + %new_labels = %{load_tabfile()}; + check_treefile(); + write_treefile(\%new_labels); +} + +sub print_template +{ + open my $tempout, '>', $template or die "Could not open file: $!"; + print $tempout "#label\n"; + + my $treein = Bio::TreeIO->new( + -format => "newick", + -file => "$treefile" + ); + + while(my $t = $treein->next_tree) + { + my @nodes = $t->get_nodes; + + if (scalar(@nodes) <= 1) + { + print STDERR "Tree is not in newick format.\n"; + exit(2); + } + else + { + foreach my $node ($t->get_leaf_nodes) + { + print $tempout $node->id,"\n"; + } + } + } + close ($tempout); +} + +sub check_delimiter +{ + if (!($delim)){ + $delim = ' '; + } else{ + my $delimlen = length $delim; + # delim length less than 1 indicates empty string. + if ($delimlen < 1){ + $delim = ' '; + }elsif ($delim =~ /[\(\)\;\,\:]/){ + print STDERR "Delimiters cannot be Newick reserved characters '(),;:'.\n"; + exit(1); + } + } +} + +sub load_tabfile +{ + my %new_labels; + if (!(-e $tabfile)){ + # exit if error in tab file + print STDERR "Error opening tab file.\n"; + exit(1); + } + + open (my $tabin, '<', $tabfile); + + # append the > to the front of the outfile string + # $outfile = '>'.$outfile; + + # go through tab file to add new labels to a hash file + while ($tabline = <$tabin>){ + # skip the first row if it starts with a # + next if $tabline =~ s/^#//; + $tabline =~ s/\r//g; + chomp $tabline; + + if ($tabline =~ /[\(\)\;\,\:]/){ + print STDERR "New labels cannot contain Newick reserved characters '(),;:'.\n"; + exit(1); + } + + my @splits = split("\t", $tabline); + + # Check that the tab file has more than one column + my $num_cols = scalar @splits; + if ($num_cols <= 1){ + # exit if one column or less; no new info to add to tree/error with tab layout. + print STDERR "Tab file does not contain new labels.\n"; + exit(1); + } + # set the hash label to the first value in a row, is the original tip label. + my $label = $splits[0]; + # If user chose find and replace instead, get rid of the first value. + shift @splits if ($replace); + # join all values from @split into one string with delim separating them + my $new_info = join($delim, @splits); + # add the new info to the hash + $new_labels{$label} = $new_info; + } + + close ($tabin); + + return \%new_labels; +} + +sub check_treefile +{ + if (!(-e $treefile)){ + # exit if error in tree file + print STDERR "Error opening tree file.\n"; + exit(1); + } + + + # open tree file to check format + if (!(-e $treefile)){ + # exit if error in tab file + print STDERR "Error opening tree file.\n"; + exit(1); + } + if (-z $treefile){ + print STDERR "Tree file is empty.\n"; + exit(1); + } + my $linecount = 0; + open (my $treein, '<', $treefile); + my $line = <$treein>; + my $nextline = <$treein>; + if (defined $nextline){ + print STDERR "Tree is not in newick format. More than one line\n"; + exit(2); + } + close ($treein); + + my @chars = split("",$line); + my $lastelem = @chars-2; + my $bracketcount = 0; + # look for non-spaces at end of line + while($chars[$lastelem] eq " "){ + $lastelem--; + } + if ($chars[$lastelem] ne ";"){ + # newick formats end in ; + print STDERR "Tree is not in newick format. Does not end in ; \n"; + exit(2); + } + + foreach my $char (@chars){ + if ($char eq ")"){ + if ($bracketcount == 0){ + # There is a ) before a ( + print STDERR "Tree is not in newick format. Missing a (\n"; + exit(2); + }else { + $bracketcount--; + } + }elsif ($char eq "("){ + $bracketcount++; + } + } + if ($bracketcount != 0){ + # There were not equal number of ( and ) + print STDERR "Tree is not in newick format. Brackets do not match. \n"; + exit(2); + } +} + +sub write_treefile +{ + my $temp = shift; + my %new_labels = %{$temp}; + + # open tree file as a tree + my $treeinput = Bio::TreeIO->new( + -file => "$treefile" + ); + + #create output tree file + my $treeoutput = Bio::TreeIO->new( + -format => "newick", + -file => ">$outfile" + ); + + while(my $t = $treeinput->next_tree ){ + # check if tree is valid + if (scalar($t->get_nodes)<=1){ + # if tree has only 1 or less nodes, then it's not in correct format + print STDERR "Tree is not in newick format.\n"; + exit(2); + } else{ + foreach my $label(keys %new_labels){ + my $tip = $t->find_node(-id =>$label); + if ($tip){ + # if found, change to the new label + $tip->id($new_labels{$label}); + print $label," found and changed.\n" + } else{ + # if label from tab file is not found, notify the user + print $label," not found.\n"; + } + } + $treeoutput->write_tree($t); + } + } +} + +exit; + +=head1 NAME + +nml_tree_relabeler.pl - Changes the tip labels on a newick formatted tree + +=head1 VERSION + +This documentation refers to nml_tree_relabeler.pl version 0.0.2. + +=head1 SYNOPSIS + + nml_tree_relabeler.pl -i treefile [-t tabfile -o outfile (-d delim) (-r) | -p template] + +=head1 OPTIONS AND ARGUMENTS + +=over + +=item B<-i>, B<--treefile> + +The name of the tree file containing the tree to adjust the tip laels. Only accepts trees in newick format. (required) + +=item B<-t>, B<--tabfile> + +The name of the tab delimited file containing current tip labels and the info to be replaced/added tothe labels. The first column must contain the current tree labels. Must not contain one of the Newick reserved characters '(),:;' (required option) + +=item B<-o>, B<--out> + +The output file. (required option) + +=item B<-d>, B<--delim> + +The character to use to divide the information of the labels. Must not be one of the Newick reserved characters '(),:;' (optional) + +=item B<-r>, B<--replace> + +Replace the tip names. This option will replace the tree tip names with the specified labels, instead of adding them to the tip name. + +=item B<-p>, B<--print-template> + +The name of the output template file. Prints out a template for the tabfile.(required option) + +=item B<-h>, B<--help> + +To display help message + +=item B<-m>, B<--man> + +To display manual + +=back + +=head1 DESCRIPTION + +=over + +nml_tree_relabeler takes a newick format tree file to modify tip labels and a tab-delimited file containing current tip labels and additional information to add to the tips in 2 or more columns. Header row of the tab delimited file must start with a '#'. An example is below: + + #label outbreak year location + orgs1 outbreak1 year1 location1 + orgs2 outbreak2 year2 location2 + +and so on. + +The information in the tab file is inserted into the tree file so the new information will appear on the tip labels. + +Alternatively, nml_tree_relabeler can print out the tip names to a tab-delimited template file. + +=back + +=head1 DIAGNOSTICS + +=over + +=item B + +Use the proper command line arguments (-i, -t, -o respectively) to add the filenames of the tree file, tab file, and output file. + +=item B + +Use the -i command line argument to add the tree file. + +=itemB + +Use the proper command line arguments to either add a template file (-p) to print a tab template, or to add a tab file and an output file (-t, -o respectively) to relabel a tree. + +=item B