comparison mauve_contig_mover.pl @ 0:b8bc1922226a draft default tip

"planemo upload for repository https://github.com/phac-nml/mauve_contig_mover commit 2d8300acc533f8b9ec95ff24ad2f529e92e8da69"
author nml
date Thu, 21 Nov 2019 12:37:56 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b8bc1922226a
1 #!/usr/bin/env perl
2
3 use strict;
4 use warnings;
5 use Data::Dumper;
6 use Getopt::Long;
7 use Pod::Usage;
8 use File::Copy;
9 use File::Basename;
10
11
12 my ($output, $reference_gbk, $reference_dat, $draft_fasta, $draft_dat, $alignment_file, $fasta_file, $html_file, $help, $best_alignment,
13 @files, @sorted_files, $num_of_alignments, $mauve_cmd, $out, $best_alignment_file, $best_fasta_file);
14
15 Getopt::Long::Configure('bundling');
16 GetOptions(
17 'r|reference=s' => \$reference_dat,
18 'd|draft=s' => \$draft_dat,
19 'o|output=s' => \$output,
20 'a|alignment=s' => \$alignment_file,
21 'f|fasta=s' => \$fasta_file,
22 'l|html=s' => \$html_file,
23 'h|help' => \$help
24 );
25 pod2usage(1) if $help;
26
27 #Format the fasta file. Some fastas were not working, so we're going to format all incoming fastas
28 my $draft_temp = "temporary.fasta";
29
30 open my $in, '<', $draft_dat or die "Could not open draft file: $?";
31 open my $out, '>', $draft_temp or die "Could not open file for writing: $?";
32
33 my $first_header = <$in>;
34 chomp $first_header;
35 print $out "$first_header\n";
36 while (my $line = <$in>)
37 {
38 chomp $line;
39
40 if ($line =~ />/)
41 {
42 print $out "\n$line\n";
43 }
44 else
45 {
46 print $out $line;
47 }
48 }
49
50
51
52 #progressiveMauve checks the file extension of inputs, and did not like .dat files passed in.
53
54 #First get the file format for the extention.
55 my $format = get_format($reference_dat);
56 die "Input reference file isn't a properly formatted fasta or genbank file!\n" if $format eq "na";
57
58 #So here we create symbolic links to the .dat files using the proper file extensions.
59 $reference_gbk = "reference.".$format;
60 $draft_fasta = "draft.fasta";
61
62 system("ln -s $reference_dat $reference_gbk");
63 system("ln -s $draft_temp $draft_fasta");
64
65 #First, run mauve with the given inputs
66 $mauve_cmd = "java -Djava.awt.headless=true -Xmx500m -cp \$path2jar org.gel.mauve.contigs.ContigOrderer -output ".$output." -ref ".$reference_gbk." -draft ".$draft_fasta;
67
68 $out = system("$mauve_cmd");
69
70
71 #Get all of the alignmentX folders and pick the latest one
72 opendir(DIR, $output) or die "Can't opendir $output";
73 @files = readdir(DIR);
74 @sorted_files = sort @files;
75 $best_alignment = $sorted_files[@sorted_files -1];
76 $num_of_alignments = $best_alignment;
77 $num_of_alignments =~ s/[^\d.]//g;
78
79
80 #Now let's give galaxy the right outputs. Is there a better way to do this?
81 #First extract the names from the paths
82 my ($a_name, $a_path, $a_suffix) = fileparse($best_alignment);
83 my ($d_name, $d_path, $d_suffix) = fileparse($draft_fasta, ".fasta");
84
85
86 #Now we want some files (alignment and the final fasta file) to show up
87 #in the history. So we copy these files to send them to galaxy.
88 $best_alignment_file = $output."/".$best_alignment."/".$a_name;
89 $best_fasta_file = $output."/".$best_alignment."/".$d_name.".fasta";
90
91 #Now copy them to the galaxy locations
92 copy($best_alignment_file, $alignment_file) or die "$best_alignment_file Copying alignment failed: $!";
93 copy($best_fasta_file, $fasta_file) or die "Copying fasta file failed: $!";
94
95
96 #Let us write the html file!
97 open my $html_out, ">", $html_file;
98 printf $html_out "<!DOCTYPE html>
99 <html>
100 <style type=\"text/css\">
101
102 body {
103 font-family: sans-serif;
104 color: #000;
105 }
106
107 table {
108 margin-left: 3em;
109 text-align: center;
110 }
111 th {
112 text-align:center;
113 background-color: #000080;
114 color: #FFF;
115 padding: 0.4em;
116 }
117 td {
118 font-family: monospace;
119 text-align: left;
120 background-color: #EEE;
121 color: #000;
122 padding: 0.4em;
123 }
124 h2 {
125 color: #800000;
126 padding-bottom: 0;
127 margin-bottom: 0;
128 clear: left;
129 }
130 </style></head>
131
132 <body>
133
134
135 <h2 id=\"M0\">Mauve Output Summary</h2><br>
136 <ul><li>Number of alignments performed: $num_of_alignments (last alignment is usually the best)</li><li>The fasta and alignment files from the last alignment are shown in the history</li><li>To download the complete Mauve output, download the contents of this file</li><li>The contig orders of each alignment are shown below: </li></ul><br>";
137
138
139 my %summary;
140 my $contig_count;
141
142 #generate data html
143 foreach my $folder (@sorted_files)
144 {
145 my $start = "Ordered Contigs";
146 my $stop = "Contigs with conflicting ordering information";
147 my $header = "type label contig strand left_end right_end";
148 my $in_section = 0;
149 $contig_count = 0;
150 my @alignment_info;
151 next if ($folder =~ m/^\./);
152 my $file_path = $output."/".$folder."/".$d_name."_contigs.tab";
153 open my $curr_file, "<", $file_path;
154
155 #Go through the lines
156 while(<$curr_file>)
157 {
158 #find correct section. Note flip flop operators didn't work here for me
159 if(/$start/) {$in_section=1;}
160 elsif(/$stop/) {$in_section=0;}
161
162 next if(/$start/ || /$stop/ || /$header/);
163
164 #Store the line content
165 if ($in_section and /\S/)
166 {
167 $contig_count++;
168 my @tmp = split('\t', $_);
169 my @columns = ($tmp[1], $tmp[3]);
170 push(@alignment_info, [@columns]);
171 }
172 }
173 close $curr_file;
174 $summary{$folder} = [@alignment_info];
175 }
176
177
178 #print out the headers
179
180 printf $html_out "<table border=\"1\"><tbody><tr><th>Alignment</th>\n";
181
182 for my $a (sort keys %summary)
183 {
184 my $tmp = $a;
185 $tmp =~ s/[^\d.]//g;
186 printf $html_out "<th colspan=\"2\">$tmp</th>\n";
187 }
188
189 printf $html_out "</tr>";
190
191 #print out the data
192 for my $i (0 .. $contig_count-1)
193 {
194 printf $html_out "<tr><td></td>\n";
195 for my $alignment (sort keys %summary)
196 {
197 printf $html_out "<td>". $summary{$alignment}[$i][0]."</td>\n<td>".$summary{$alignment}[$i][1]. "</td>\n";
198 }
199 printf $html_out "</tr>\n";
200 }
201
202 printf $html_out "</tbody></table></body></html>";
203
204 #close all the things
205 closedir(DIR);
206 close $html_out;
207
208 exit($out);
209
210
211 sub get_format
212 {
213 my $file = shift;
214 my $format;
215
216 open my $in, '<', $file or die "Could not open file for reading. $!";
217
218 my $line = <$in>;
219
220 if ($line =~ /LOCUS/)
221 {
222 $format = "gbk";
223 }
224 elsif ($line =~ /^>/)
225 {
226 $format = "fasta";
227 }
228 else
229 {
230 $format = "na";
231 }
232 close $in;
233
234 return $format;
235 }
236
237 __END__
238
239 =head1 NAME
240
241 mauve_contig_mover.pl - A wrapper for galaxy to run Mauve Contig Mover
242
243 =head1 SYNOPSIS
244
245 mauve_contig_mover.pl -r <reference> -d <draft> -o <output> -a <alignment file output> -f <fasta file output> -l <html file output> -h <help>
246
247 =head1 OPTIONS
248
249 =over 8
250
251 =item B<-r> B<--reference>
252
253 The input reference strain in either a fasta or genbank format
254
255 =item B<-d> B<--draft>
256
257 The input draft genome in fasta format
258
259 =item B<-o> B<--output>
260
261 The output folder created by Mauve
262
263 =item B<-a> B<--alignment>
264
265 The best output alignment produced by Mauve
266
267 =item B<-f> B<--fasta>
268
269 The best output fasta file produced by Mauve
270
271 =item B<-l> B<--html>
272
273 The html file containing all of the output files produced by Mauve
274
275 =item B<-h> B<--help>
276
277 Print a help message and exits
278
279 =back
280
281 =head1 DESCRIPTION
282
283 B<mauve_contig_mover> is a galaxy wrapper for Mauve Contig Mover. This script runs the command line version of the Mauve Contig Mover
284
285 =cut