Mercurial > repos > nml > mauve_contig_mover
comparison mauve_contig_mover.pl @ 0:b8bc1922226a draft default tip
"planemo upload for repository https://github.com/phac-nml/mauve_contig_mover commit 2d8300acc533f8b9ec95ff24ad2f529e92e8da69"
author | nml |
---|---|
date | Thu, 21 Nov 2019 12:37:56 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b8bc1922226a |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use Data::Dumper; | |
6 use Getopt::Long; | |
7 use Pod::Usage; | |
8 use File::Copy; | |
9 use File::Basename; | |
10 | |
11 | |
12 my ($output, $reference_gbk, $reference_dat, $draft_fasta, $draft_dat, $alignment_file, $fasta_file, $html_file, $help, $best_alignment, | |
13 @files, @sorted_files, $num_of_alignments, $mauve_cmd, $out, $best_alignment_file, $best_fasta_file); | |
14 | |
15 Getopt::Long::Configure('bundling'); | |
16 GetOptions( | |
17 'r|reference=s' => \$reference_dat, | |
18 'd|draft=s' => \$draft_dat, | |
19 'o|output=s' => \$output, | |
20 'a|alignment=s' => \$alignment_file, | |
21 'f|fasta=s' => \$fasta_file, | |
22 'l|html=s' => \$html_file, | |
23 'h|help' => \$help | |
24 ); | |
25 pod2usage(1) if $help; | |
26 | |
27 #Format the fasta file. Some fastas were not working, so we're going to format all incoming fastas | |
28 my $draft_temp = "temporary.fasta"; | |
29 | |
30 open my $in, '<', $draft_dat or die "Could not open draft file: $?"; | |
31 open my $out, '>', $draft_temp or die "Could not open file for writing: $?"; | |
32 | |
33 my $first_header = <$in>; | |
34 chomp $first_header; | |
35 print $out "$first_header\n"; | |
36 while (my $line = <$in>) | |
37 { | |
38 chomp $line; | |
39 | |
40 if ($line =~ />/) | |
41 { | |
42 print $out "\n$line\n"; | |
43 } | |
44 else | |
45 { | |
46 print $out $line; | |
47 } | |
48 } | |
49 | |
50 | |
51 | |
52 #progressiveMauve checks the file extension of inputs, and did not like .dat files passed in. | |
53 | |
54 #First get the file format for the extention. | |
55 my $format = get_format($reference_dat); | |
56 die "Input reference file isn't a properly formatted fasta or genbank file!\n" if $format eq "na"; | |
57 | |
58 #So here we create symbolic links to the .dat files using the proper file extensions. | |
59 $reference_gbk = "reference.".$format; | |
60 $draft_fasta = "draft.fasta"; | |
61 | |
62 system("ln -s $reference_dat $reference_gbk"); | |
63 system("ln -s $draft_temp $draft_fasta"); | |
64 | |
65 #First, run mauve with the given inputs | |
66 $mauve_cmd = "java -Djava.awt.headless=true -Xmx500m -cp \$path2jar org.gel.mauve.contigs.ContigOrderer -output ".$output." -ref ".$reference_gbk." -draft ".$draft_fasta; | |
67 | |
68 $out = system("$mauve_cmd"); | |
69 | |
70 | |
71 #Get all of the alignmentX folders and pick the latest one | |
72 opendir(DIR, $output) or die "Can't opendir $output"; | |
73 @files = readdir(DIR); | |
74 @sorted_files = sort @files; | |
75 $best_alignment = $sorted_files[@sorted_files -1]; | |
76 $num_of_alignments = $best_alignment; | |
77 $num_of_alignments =~ s/[^\d.]//g; | |
78 | |
79 | |
80 #Now let's give galaxy the right outputs. Is there a better way to do this? | |
81 #First extract the names from the paths | |
82 my ($a_name, $a_path, $a_suffix) = fileparse($best_alignment); | |
83 my ($d_name, $d_path, $d_suffix) = fileparse($draft_fasta, ".fasta"); | |
84 | |
85 | |
86 #Now we want some files (alignment and the final fasta file) to show up | |
87 #in the history. So we copy these files to send them to galaxy. | |
88 $best_alignment_file = $output."/".$best_alignment."/".$a_name; | |
89 $best_fasta_file = $output."/".$best_alignment."/".$d_name.".fasta"; | |
90 | |
91 #Now copy them to the galaxy locations | |
92 copy($best_alignment_file, $alignment_file) or die "$best_alignment_file Copying alignment failed: $!"; | |
93 copy($best_fasta_file, $fasta_file) or die "Copying fasta file failed: $!"; | |
94 | |
95 | |
96 #Let us write the html file! | |
97 open my $html_out, ">", $html_file; | |
98 printf $html_out "<!DOCTYPE html> | |
99 <html> | |
100 <style type=\"text/css\"> | |
101 | |
102 body { | |
103 font-family: sans-serif; | |
104 color: #000; | |
105 } | |
106 | |
107 table { | |
108 margin-left: 3em; | |
109 text-align: center; | |
110 } | |
111 th { | |
112 text-align:center; | |
113 background-color: #000080; | |
114 color: #FFF; | |
115 padding: 0.4em; | |
116 } | |
117 td { | |
118 font-family: monospace; | |
119 text-align: left; | |
120 background-color: #EEE; | |
121 color: #000; | |
122 padding: 0.4em; | |
123 } | |
124 h2 { | |
125 color: #800000; | |
126 padding-bottom: 0; | |
127 margin-bottom: 0; | |
128 clear: left; | |
129 } | |
130 </style></head> | |
131 | |
132 <body> | |
133 | |
134 | |
135 <h2 id=\"M0\">Mauve Output Summary</h2><br> | |
136 <ul><li>Number of alignments performed: $num_of_alignments (last alignment is usually the best)</li><li>The fasta and alignment files from the last alignment are shown in the history</li><li>To download the complete Mauve output, download the contents of this file</li><li>The contig orders of each alignment are shown below: </li></ul><br>"; | |
137 | |
138 | |
139 my %summary; | |
140 my $contig_count; | |
141 | |
142 #generate data html | |
143 foreach my $folder (@sorted_files) | |
144 { | |
145 my $start = "Ordered Contigs"; | |
146 my $stop = "Contigs with conflicting ordering information"; | |
147 my $header = "type label contig strand left_end right_end"; | |
148 my $in_section = 0; | |
149 $contig_count = 0; | |
150 my @alignment_info; | |
151 next if ($folder =~ m/^\./); | |
152 my $file_path = $output."/".$folder."/".$d_name."_contigs.tab"; | |
153 open my $curr_file, "<", $file_path; | |
154 | |
155 #Go through the lines | |
156 while(<$curr_file>) | |
157 { | |
158 #find correct section. Note flip flop operators didn't work here for me | |
159 if(/$start/) {$in_section=1;} | |
160 elsif(/$stop/) {$in_section=0;} | |
161 | |
162 next if(/$start/ || /$stop/ || /$header/); | |
163 | |
164 #Store the line content | |
165 if ($in_section and /\S/) | |
166 { | |
167 $contig_count++; | |
168 my @tmp = split('\t', $_); | |
169 my @columns = ($tmp[1], $tmp[3]); | |
170 push(@alignment_info, [@columns]); | |
171 } | |
172 } | |
173 close $curr_file; | |
174 $summary{$folder} = [@alignment_info]; | |
175 } | |
176 | |
177 | |
178 #print out the headers | |
179 | |
180 printf $html_out "<table border=\"1\"><tbody><tr><th>Alignment</th>\n"; | |
181 | |
182 for my $a (sort keys %summary) | |
183 { | |
184 my $tmp = $a; | |
185 $tmp =~ s/[^\d.]//g; | |
186 printf $html_out "<th colspan=\"2\">$tmp</th>\n"; | |
187 } | |
188 | |
189 printf $html_out "</tr>"; | |
190 | |
191 #print out the data | |
192 for my $i (0 .. $contig_count-1) | |
193 { | |
194 printf $html_out "<tr><td></td>\n"; | |
195 for my $alignment (sort keys %summary) | |
196 { | |
197 printf $html_out "<td>". $summary{$alignment}[$i][0]."</td>\n<td>".$summary{$alignment}[$i][1]. "</td>\n"; | |
198 } | |
199 printf $html_out "</tr>\n"; | |
200 } | |
201 | |
202 printf $html_out "</tbody></table></body></html>"; | |
203 | |
204 #close all the things | |
205 closedir(DIR); | |
206 close $html_out; | |
207 | |
208 exit($out); | |
209 | |
210 | |
211 sub get_format | |
212 { | |
213 my $file = shift; | |
214 my $format; | |
215 | |
216 open my $in, '<', $file or die "Could not open file for reading. $!"; | |
217 | |
218 my $line = <$in>; | |
219 | |
220 if ($line =~ /LOCUS/) | |
221 { | |
222 $format = "gbk"; | |
223 } | |
224 elsif ($line =~ /^>/) | |
225 { | |
226 $format = "fasta"; | |
227 } | |
228 else | |
229 { | |
230 $format = "na"; | |
231 } | |
232 close $in; | |
233 | |
234 return $format; | |
235 } | |
236 | |
237 __END__ | |
238 | |
239 =head1 NAME | |
240 | |
241 mauve_contig_mover.pl - A wrapper for galaxy to run Mauve Contig Mover | |
242 | |
243 =head1 SYNOPSIS | |
244 | |
245 mauve_contig_mover.pl -r <reference> -d <draft> -o <output> -a <alignment file output> -f <fasta file output> -l <html file output> -h <help> | |
246 | |
247 =head1 OPTIONS | |
248 | |
249 =over 8 | |
250 | |
251 =item B<-r> B<--reference> | |
252 | |
253 The input reference strain in either a fasta or genbank format | |
254 | |
255 =item B<-d> B<--draft> | |
256 | |
257 The input draft genome in fasta format | |
258 | |
259 =item B<-o> B<--output> | |
260 | |
261 The output folder created by Mauve | |
262 | |
263 =item B<-a> B<--alignment> | |
264 | |
265 The best output alignment produced by Mauve | |
266 | |
267 =item B<-f> B<--fasta> | |
268 | |
269 The best output fasta file produced by Mauve | |
270 | |
271 =item B<-l> B<--html> | |
272 | |
273 The html file containing all of the output files produced by Mauve | |
274 | |
275 =item B<-h> B<--help> | |
276 | |
277 Print a help message and exits | |
278 | |
279 =back | |
280 | |
281 =head1 DESCRIPTION | |
282 | |
283 B<mauve_contig_mover> is a galaxy wrapper for Mauve Contig Mover. This script runs the command line version of the Mauve Contig Mover | |
284 | |
285 =cut |