view mauve_contig_mover.pl @ 0:b8bc1922226a draft default tip

"planemo upload for repository https://github.com/phac-nml/mauve_contig_mover commit 2d8300acc533f8b9ec95ff24ad2f529e92e8da69"
author nml
date Thu, 21 Nov 2019 12:37:56 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env perl

use strict;
use warnings;
use Data::Dumper;
use Getopt::Long;
use Pod::Usage;
use File::Copy;
use File::Basename;


my ($output, $reference_gbk, $reference_dat, $draft_fasta, $draft_dat, $alignment_file, $fasta_file, $html_file, $help, $best_alignment, 
	@files, @sorted_files, $num_of_alignments, $mauve_cmd, $out, $best_alignment_file, $best_fasta_file);

Getopt::Long::Configure('bundling');
GetOptions(
	'r|reference=s' => \$reference_dat,
	'd|draft=s'	=> \$draft_dat,
	'o|output=s'	=> \$output,
	'a|alignment=s'	=> \$alignment_file,
	'f|fasta=s'	=> \$fasta_file, 
	'l|html=s'	=> \$html_file,
	'h|help'	=> \$help
);
pod2usage(1) if $help;

#Format the fasta file. Some fastas were not working, so we're going to format all incoming fastas
my $draft_temp = "temporary.fasta";

open my $in, '<', $draft_dat or die "Could not open draft file: $?";
open my $out, '>', $draft_temp or die "Could not open file for writing: $?";

my $first_header = <$in>;
chomp $first_header;
print $out "$first_header\n";
while (my $line = <$in>)
{
   chomp $line;

   if ($line =~ />/)
   {
      print $out "\n$line\n";
   }
   else
   {
      print $out $line;
   }
}



#progressiveMauve checks the file extension of inputs, and did not like .dat files passed in.

#First get the file format for the extention.
my $format = get_format($reference_dat);
die "Input reference file isn't a properly formatted fasta or genbank file!\n" if $format eq "na";

#So here we create symbolic links to the .dat files using the proper file extensions.
$reference_gbk = "reference.".$format;
$draft_fasta = "draft.fasta";

system("ln -s $reference_dat $reference_gbk");
system("ln -s $draft_temp $draft_fasta");

#First, run mauve with the given inputs
$mauve_cmd = "java -Djava.awt.headless=true -Xmx500m -cp \$path2jar org.gel.mauve.contigs.ContigOrderer -output ".$output." -ref ".$reference_gbk." -draft ".$draft_fasta;

$out = system("$mauve_cmd");


#Get all of the alignmentX folders and pick the latest one
opendir(DIR, $output) or die "Can't opendir $output";
@files = readdir(DIR);
@sorted_files = sort @files;
$best_alignment = $sorted_files[@sorted_files -1];
$num_of_alignments = $best_alignment;
$num_of_alignments  =~ s/[^\d.]//g;


#Now let's give galaxy the right outputs. Is there a better way to do this?
#First extract the names from the paths	
my ($a_name, $a_path, $a_suffix) = fileparse($best_alignment);
my ($d_name, $d_path, $d_suffix) = fileparse($draft_fasta, ".fasta");


#Now we want some files (alignment and the final fasta file) to show up 
#in the history. So we copy these files to send them to galaxy.	
$best_alignment_file = $output."/".$best_alignment."/".$a_name;
$best_fasta_file = $output."/".$best_alignment."/".$d_name.".fasta";

#Now copy them to the galaxy locations
copy($best_alignment_file, $alignment_file) or die "$best_alignment_file Copying alignment failed: $!";
copy($best_fasta_file, $fasta_file) or die "Copying fasta file failed: $!";


#Let us write the html file!
open my $html_out, ">", $html_file;
printf $html_out "<!DOCTYPE html>
<html>
<style type=\"text/css\">

body {
	font-family: sans-serif;
	color: #000;
	}

table {
	margin-left: 3em;
	text-align: center;
	}
th {
	text-align:center;
	background-color: #000080;
	color: #FFF;
	padding: 0.4em;
	}
td {
	font-family: monospace;
	text-align: left;
	background-color: #EEE;
	color: #000;
	padding: 0.4em;
	}
h2 {
	color: #800000;
	padding-bottom: 0;
	margin-bottom: 0;
	clear: left;
	}
</style></head>

  <body>


    <h2 id=\"M0\">Mauve Output Summary</h2><br>
 <ul><li>Number of alignments performed: $num_of_alignments (last alignment is usually the best)</li><li>The fasta and alignment files from the last alignment are shown in the history</li><li>To download the complete Mauve output, download the contents of this file</li><li>The contig orders of each alignment are shown below: </li></ul><br>";


my %summary;
my $contig_count;

#generate data html
foreach my $folder (@sorted_files)
{
	my $start = "Ordered Contigs";
        my $stop = "Contigs with conflicting ordering information";
        my $header = "type      label   contig  strand  left_end        right_end";
        my $in_section = 0;
        $contig_count = 0;
        my @alignment_info;
        next if ($folder =~ m/^\./);
	my $file_path = $output."/".$folder."/".$d_name."_contigs.tab";
	open my $curr_file, "<", $file_path;

	#Go through the lines
	while(<$curr_file>)
	{
		#find correct section. Note flip flop operators didn't work here for me 
		if(/$start/) {$in_section=1;}
		elsif(/$stop/) {$in_section=0;}

		next if(/$start/ || /$stop/ || /$header/);

		#Store the line content
		if ($in_section and /\S/)
		{
			$contig_count++;
			my @tmp = split('\t', $_);
			my @columns = ($tmp[1], $tmp[3]);
			push(@alignment_info, [@columns]);
		}
	}
	close $curr_file;
	$summary{$folder} = [@alignment_info];
}


#print out the headers

printf $html_out "<table border=\"1\"><tbody><tr><th>Alignment</th>\n";

for my $a (sort keys %summary)
{
	my $tmp = $a;
	$tmp =~ s/[^\d.]//g;
        printf $html_out "<th colspan=\"2\">$tmp</th>\n";
}

printf $html_out "</tr>";

#print out the data
for my $i (0 .. $contig_count-1)
{
        printf $html_out "<tr><td></td>\n";
        for my $alignment (sort keys %summary)
        {
                printf $html_out "<td>". $summary{$alignment}[$i][0]."</td>\n<td>".$summary{$alignment}[$i][1]. "</td>\n";
        }
        printf $html_out "</tr>\n";
}

printf $html_out "</tbody></table></body></html>";

#close all the things
closedir(DIR);
close $html_out;

exit($out);


sub get_format
{
	my $file = shift;
	my $format;

	open my $in, '<', $file or die "Could not open file for reading. $!";

	my $line = <$in>;

	if ($line =~ /LOCUS/)
	{
		$format = "gbk";
	}
	elsif ($line =~ /^>/)
	{
		$format = "fasta";
	}
	else
	{
		$format = "na";
	}
	close $in;

	return $format;
}

__END__

=head1 NAME

	mauve_contig_mover.pl - A wrapper for galaxy to run Mauve Contig Mover

=head1 SYNOPSIS

	mauve_contig_mover.pl -r <reference> -d <draft> -o <output> -a <alignment file output> -f <fasta file output> -l <html file output> -h <help>

=head1 OPTIONS

=over 8

=item B<-r> B<--reference>

The input reference strain in either a fasta or genbank format

=item B<-d> B<--draft>

The input draft genome in fasta format

=item B<-o> B<--output>

The output folder created by Mauve

=item B<-a> B<--alignment>

The best output alignment produced by Mauve

=item B<-f> B<--fasta>

The best output fasta file produced by Mauve

=item B<-l> B<--html>

The html file containing all of the output files produced by Mauve

=item B<-h> B<--help>

Print a help message and exits

=back

=head1 DESCRIPTION

B<mauve_contig_mover> is a galaxy wrapper for Mauve Contig Mover. This script runs the command line version of the Mauve Contig Mover

=cut