diff mauve_contig_mover.pl @ 0:b8bc1922226a draft default tip

"planemo upload for repository https://github.com/phac-nml/mauve_contig_mover commit 2d8300acc533f8b9ec95ff24ad2f529e92e8da69"
author nml
date Thu, 21 Nov 2019 12:37:56 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mauve_contig_mover.pl	Thu Nov 21 12:37:56 2019 -0500
@@ -0,0 +1,285 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Data::Dumper;
+use Getopt::Long;
+use Pod::Usage;
+use File::Copy;
+use File::Basename;
+
+
+my ($output, $reference_gbk, $reference_dat, $draft_fasta, $draft_dat, $alignment_file, $fasta_file, $html_file, $help, $best_alignment, 
+	@files, @sorted_files, $num_of_alignments, $mauve_cmd, $out, $best_alignment_file, $best_fasta_file);
+
+Getopt::Long::Configure('bundling');
+GetOptions(
+	'r|reference=s' => \$reference_dat,
+	'd|draft=s'	=> \$draft_dat,
+	'o|output=s'	=> \$output,
+	'a|alignment=s'	=> \$alignment_file,
+	'f|fasta=s'	=> \$fasta_file, 
+	'l|html=s'	=> \$html_file,
+	'h|help'	=> \$help
+);
+pod2usage(1) if $help;
+
+#Format the fasta file. Some fastas were not working, so we're going to format all incoming fastas
+my $draft_temp = "temporary.fasta";
+
+open my $in, '<', $draft_dat or die "Could not open draft file: $?";
+open my $out, '>', $draft_temp or die "Could not open file for writing: $?";
+
+my $first_header = <$in>;
+chomp $first_header;
+print $out "$first_header\n";
+while (my $line = <$in>)
+{
+   chomp $line;
+
+   if ($line =~ />/)
+   {
+      print $out "\n$line\n";
+   }
+   else
+   {
+      print $out $line;
+   }
+}
+
+
+
+#progressiveMauve checks the file extension of inputs, and did not like .dat files passed in.
+
+#First get the file format for the extention.
+my $format = get_format($reference_dat);
+die "Input reference file isn't a properly formatted fasta or genbank file!\n" if $format eq "na";
+
+#So here we create symbolic links to the .dat files using the proper file extensions.
+$reference_gbk = "reference.".$format;
+$draft_fasta = "draft.fasta";
+
+system("ln -s $reference_dat $reference_gbk");
+system("ln -s $draft_temp $draft_fasta");
+
+#First, run mauve with the given inputs
+$mauve_cmd = "java -Djava.awt.headless=true -Xmx500m -cp \$path2jar org.gel.mauve.contigs.ContigOrderer -output ".$output." -ref ".$reference_gbk." -draft ".$draft_fasta;
+
+$out = system("$mauve_cmd");
+
+
+#Get all of the alignmentX folders and pick the latest one
+opendir(DIR, $output) or die "Can't opendir $output";
+@files = readdir(DIR);
+@sorted_files = sort @files;
+$best_alignment = $sorted_files[@sorted_files -1];
+$num_of_alignments = $best_alignment;
+$num_of_alignments  =~ s/[^\d.]//g;
+
+
+#Now let's give galaxy the right outputs. Is there a better way to do this?
+#First extract the names from the paths	
+my ($a_name, $a_path, $a_suffix) = fileparse($best_alignment);
+my ($d_name, $d_path, $d_suffix) = fileparse($draft_fasta, ".fasta");
+
+
+#Now we want some files (alignment and the final fasta file) to show up 
+#in the history. So we copy these files to send them to galaxy.	
+$best_alignment_file = $output."/".$best_alignment."/".$a_name;
+$best_fasta_file = $output."/".$best_alignment."/".$d_name.".fasta";
+
+#Now copy them to the galaxy locations
+copy($best_alignment_file, $alignment_file) or die "$best_alignment_file Copying alignment failed: $!";
+copy($best_fasta_file, $fasta_file) or die "Copying fasta file failed: $!";
+
+
+#Let us write the html file!
+open my $html_out, ">", $html_file;
+printf $html_out "<!DOCTYPE html>
+<html>
+<style type=\"text/css\">
+
+body {
+	font-family: sans-serif;
+	color: #000;
+	}
+
+table {
+	margin-left: 3em;
+	text-align: center;
+	}
+th {
+	text-align:center;
+	background-color: #000080;
+	color: #FFF;
+	padding: 0.4em;
+	}
+td {
+	font-family: monospace;
+	text-align: left;
+	background-color: #EEE;
+	color: #000;
+	padding: 0.4em;
+	}
+h2 {
+	color: #800000;
+	padding-bottom: 0;
+	margin-bottom: 0;
+	clear: left;
+	}
+</style></head>
+
+  <body>
+
+
+    <h2 id=\"M0\">Mauve Output Summary</h2><br>
+ <ul><li>Number of alignments performed: $num_of_alignments (last alignment is usually the best)</li><li>The fasta and alignment files from the last alignment are shown in the history</li><li>To download the complete Mauve output, download the contents of this file</li><li>The contig orders of each alignment are shown below: </li></ul><br>";
+
+
+my %summary;
+my $contig_count;
+
+#generate data html
+foreach my $folder (@sorted_files)
+{
+	my $start = "Ordered Contigs";
+        my $stop = "Contigs with conflicting ordering information";
+        my $header = "type      label   contig  strand  left_end        right_end";
+        my $in_section = 0;
+        $contig_count = 0;
+        my @alignment_info;
+        next if ($folder =~ m/^\./);
+	my $file_path = $output."/".$folder."/".$d_name."_contigs.tab";
+	open my $curr_file, "<", $file_path;
+
+	#Go through the lines
+	while(<$curr_file>)
+	{
+		#find correct section. Note flip flop operators didn't work here for me 
+		if(/$start/) {$in_section=1;}
+		elsif(/$stop/) {$in_section=0;}
+
+		next if(/$start/ || /$stop/ || /$header/);
+
+		#Store the line content
+		if ($in_section and /\S/)
+		{
+			$contig_count++;
+			my @tmp = split('\t', $_);
+			my @columns = ($tmp[1], $tmp[3]);
+			push(@alignment_info, [@columns]);
+		}
+	}
+	close $curr_file;
+	$summary{$folder} = [@alignment_info];
+}
+
+
+#print out the headers
+
+printf $html_out "<table border=\"1\"><tbody><tr><th>Alignment</th>\n";
+
+for my $a (sort keys %summary)
+{
+	my $tmp = $a;
+	$tmp =~ s/[^\d.]//g;
+        printf $html_out "<th colspan=\"2\">$tmp</th>\n";
+}
+
+printf $html_out "</tr>";
+
+#print out the data
+for my $i (0 .. $contig_count-1)
+{
+        printf $html_out "<tr><td></td>\n";
+        for my $alignment (sort keys %summary)
+        {
+                printf $html_out "<td>". $summary{$alignment}[$i][0]."</td>\n<td>".$summary{$alignment}[$i][1]. "</td>\n";
+        }
+        printf $html_out "</tr>\n";
+}
+
+printf $html_out "</tbody></table></body></html>";
+
+#close all the things
+closedir(DIR);
+close $html_out;
+
+exit($out);
+
+
+sub get_format
+{
+	my $file = shift;
+	my $format;
+
+	open my $in, '<', $file or die "Could not open file for reading. $!";
+
+	my $line = <$in>;
+
+	if ($line =~ /LOCUS/)
+	{
+		$format = "gbk";
+	}
+	elsif ($line =~ /^>/)
+	{
+		$format = "fasta";
+	}
+	else
+	{
+		$format = "na";
+	}
+	close $in;
+
+	return $format;
+}
+
+__END__
+
+=head1 NAME
+
+	mauve_contig_mover.pl - A wrapper for galaxy to run Mauve Contig Mover
+
+=head1 SYNOPSIS
+
+	mauve_contig_mover.pl -r <reference> -d <draft> -o <output> -a <alignment file output> -f <fasta file output> -l <html file output> -h <help>
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<-r> B<--reference>
+
+The input reference strain in either a fasta or genbank format
+
+=item B<-d> B<--draft>
+
+The input draft genome in fasta format
+
+=item B<-o> B<--output>
+
+The output folder created by Mauve
+
+=item B<-a> B<--alignment>
+
+The best output alignment produced by Mauve
+
+=item B<-f> B<--fasta>
+
+The best output fasta file produced by Mauve
+
+=item B<-l> B<--html>
+
+The html file containing all of the output files produced by Mauve
+
+=item B<-h> B<--help>
+
+Print a help message and exits
+
+=back
+
+=head1 DESCRIPTION
+
+B<mauve_contig_mover> is a galaxy wrapper for Mauve Contig Mover. This script runs the command line version of the Mauve Contig Mover
+
+=cut