Mercurial > repos > bzeitouni > svdetect
comparison svdetect/SVDetect_compare.pl @ 13:f090bf6ec765 draft
Uploaded
author | bzeitouni |
---|---|
date | Mon, 11 Jun 2012 12:59:11 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12:602e6912ac67 | 13:f090bf6ec765 |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 =pod | |
4 | |
5 =head1 NAME | |
6 | |
7 SVDetect Compare for Galaxy | |
8 | |
9 Version: 0.8 for Galaxy | |
10 | |
11 =head1 SYNOPSIS | |
12 | |
13 SVDetect_compare.pl links2compare -conf <configuration_file> [-help] [-man] | |
14 | |
15 =cut | |
16 | |
17 # ------------------------------------------------------------------- | |
18 | |
19 use strict; | |
20 use warnings; | |
21 | |
22 use Pod::Usage; | |
23 use Getopt::Long; | |
24 | |
25 use Config::General; | |
26 use Tie::IxHash; | |
27 | |
28 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | |
29 #PARSE THE COMMAND LINE | |
30 my %OPT; | |
31 GetOptions(\%OPT, | |
32 'conf=s', | |
33 'out1=s', #GALAXY | |
34 'out2=s', #GALAXY | |
35 'out3=s', #GALAXY | |
36 'out4=s', #GALAXY | |
37 'out5=s', #GALAXY | |
38 'out6=s', #GALAXY | |
39 'out7=s', #GALAXY | |
40 'out8=s', #GALAXY | |
41 'out9=s', #GALAXY | |
42 'l=s', #GALAXY | |
43 'N=s', #GALAXY | |
44 'help', | |
45 'man' | |
46 ); | |
47 | |
48 pod2usage() if $OPT{help}; | |
49 pod2usage(-verbose=>2) if $OPT{man}; | |
50 pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf}); | |
51 | |
52 | |
53 pod2usage() if(@ARGV<1); | |
54 | |
55 tie (my %func, 'Tie::IxHash',links2compare=>\&links2compare); | |
56 | |
57 foreach my $command (@ARGV){ | |
58 pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command})); | |
59 } | |
60 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | |
61 | |
62 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | |
63 #READ THE CONFIGURATION FILE | |
64 my $conf=Config::General->new( -ConfigFile => $OPT{conf}, | |
65 -Tie => "Tie::IxHash", | |
66 -AllowMultiOptions => 1, | |
67 -LowerCaseNames => 1, | |
68 -AutoTrue => 1); | |
69 my %CONF= $conf->getall; | |
70 validateconfiguration(\%CONF); #validation of the configuration parameters | |
71 | |
72 | |
73 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY | |
74 my $BEDTOOLS_BIN_DIR="/bioinfo/local/BEDTools/bin"; #GALAXY | |
75 | |
76 my $pt_log_file=$OPT{l}; #GALAXY | |
77 my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_compare.log"; #GALAXY | |
78 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY | |
79 | |
80 my @pt_sv_file=($OPT{out1},$OPT{out2},$OPT{out3}) if($OPT{out1}); #GALAXY common,sample,reference | |
81 my @pt_circos_file=($OPT{out4},$OPT{out5},$OPT{out6}) if($OPT{out4}); #GALAXY common,sample,reference | |
82 my @pt_bed_file=($OPT{out7},$OPT{out8},$OPT{out9}) if($OPT{out7}); #GALAXY common,sample,reference | |
83 | |
84 $CONF{compare}{sample_link_file}=readlink($CONF{compare}{sample_link_file});#GALAXY | |
85 $CONF{compare}{sample_link_file}=~s/.sv.txt//; #GALAXY | |
86 | |
87 $CONF{compare}{reference_link_file}=readlink($CONF{compare}{reference_link_file});#GALAXY | |
88 $CONF{compare}{reference_link_file}=~s/.sv.txt//; #GALAXY | |
89 | |
90 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | |
91 | |
92 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | |
93 #COMMAND EXECUTION | |
94 foreach my $command (@ARGV){ | |
95 &{$func{$command}}(); | |
96 } | |
97 print LOG "-- end\n"; | |
98 | |
99 close LOG;#GALAXY | |
100 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY | |
101 | |
102 exit(0); | |
103 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | |
104 | |
105 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | |
106 #FUNCTIONS | |
107 | |
108 # -----------------------------------------------------------------------------# | |
109 #MAIN FUNCTION number 5:Comparison between samples, common or specific links | |
110 sub links2compare{ | |
111 | |
112 my @compare_files; | |
113 | |
114 compareSamples($CONF{general}{output_dir}, | |
115 $CONF{compare}{list_samples}, | |
116 $CONF{compare}{sample_link_file}, | |
117 $CONF{compare}{reference_link_file}, | |
118 $CONF{compare}{min_overlap}, | |
119 $CONF{compare}{same_sv_type}, | |
120 \@compare_files); | |
121 | |
122 my $pt_ind=0; | |
123 | |
124 for my $input_file (@compare_files){ | |
125 | |
126 $input_file=$CONF{general}{output_dir}.$input_file; | |
127 | |
128 my $output_file=$input_file; | |
129 $output_file=~s/unique$/compared/; | |
130 | |
131 sortLinks($input_file, $output_file,1); | |
132 | |
133 if($CONF{compare}{circos_output}){ | |
134 links2segdup($CONF{circos}{organism_id}, | |
135 $CONF{circos}{colorcode}, | |
136 $output_file, | |
137 $output_file.".segdup.txt"); | |
138 system "rm $pt_circos_file[$pt_ind]; ln -s $output_file.segdup.txt $pt_circos_file[$pt_ind]" if (defined $pt_circos_file[$pt_ind]); #GALAXY | |
139 } | |
140 | |
141 if($CONF{compare}{bed_output}){ | |
142 links2bedfile($CONF{compare}{read_lengths}, | |
143 $CONF{bed}{colorcode}, | |
144 $output_file, | |
145 $output_file.".bed"); | |
146 system "rm $pt_bed_file[$pt_ind]; ln -s $output_file.bed $pt_bed_file[$pt_ind]" if (defined $pt_bed_file[$pt_ind]); #GALAXY | |
147 } | |
148 | |
149 if($CONF{compare}{sv_output}){ | |
150 | |
151 links2SVfile ($output_file, $output_file.".sv.txt"); | |
152 system "rm $pt_sv_file[$pt_ind]; ln -s $output_file.sv.txt $pt_sv_file[$pt_ind]" if (defined $pt_sv_file[$pt_ind]); #GALAXY | |
153 } | |
154 $pt_ind++; | |
155 | |
156 } | |
157 unlink(@compare_files); | |
158 | |
159 } | |
160 #------------------------------------------------------------------------------# | |
161 #------------------------------------------------------------------------------# | |
162 sub compareSamples{ | |
163 | |
164 my ($dir,$list_samples,$sample_file,$reference_file,$min_overlap,$same_sv_type,$file_names)=@_; | |
165 | |
166 my @bedpefiles; | |
167 my @list=split(",",$list_samples); | |
168 my @list_files=($sample_file,$reference_file); | |
169 | |
170 print LOG "\# Comparison procedure...\n"; | |
171 print LOG "-- samples=$list_samples\n". | |
172 "-- minimum overlap=$min_overlap\n". | |
173 "-- same SV type=$same_sv_type\n"; | |
174 | |
175 #conversion of links to bedPE format file | |
176 print LOG "-- Conversion of links.filtered files to bedPE format\n"; | |
177 for my $s (0..$#list) { | |
178 | |
179 links2bedPElinksfile($list[$s],$list_files[$s],$list_files[$s].".bedpe.txt"); | |
180 push(@bedpefiles,$list_files[$s].".bedpe.txt"); | |
181 | |
182 } | |
183 | |
184 #get common links between all samples compared | |
185 print LOG "-- Getting common links between all samples with BEDTools\n"; | |
186 my $common_name=join(".",@list); | |
187 | |
188 my $nb=scalar @list; | |
189 my $command=""; | |
190 my $prompt=">"; | |
191 | |
192 while ($nb>0){ | |
193 | |
194 for my $i (0..$#list_files){ | |
195 | |
196 $command.="$BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a ".$list_files[$i].".bedpe.txt"; | |
197 my $pipe=0; | |
198 | |
199 for my $j ($i+1..$#list_files){ | |
200 | |
201 $command.="| $BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a stdin" if($pipe); | |
202 $command.=" -b ".$list_files[$j].".bedpe.txt"; | |
203 $pipe=1; | |
204 | |
205 } | |
206 | |
207 $command.=$prompt.$dir.$common_name.".bedpe.tmp;"; | |
208 $prompt=">>"; | |
209 | |
210 my $first=shift(@list_files); push(@list_files,$first); | |
211 last; | |
212 } | |
213 $nb--; | |
214 } | |
215 | |
216 system ($command); | |
217 | |
218 push(@bedpefiles,$dir.$common_name.".bedpe.tmp"); | |
219 | |
220 #Post comparison to get common links if same type only (as an option) | |
221 open( FILE, "<".$dir.$common_name.".bedpe.tmp") or die "Can't open".$dir.$common_name.".bedpe.tmp : $!"; | |
222 open( OUT, ">".$dir.$common_name.".bedpe.unique") or die "Can't write in ".$dir.$common_name.".bedpe.unique : $!"; | |
223 | |
224 while(<FILE>){ | |
225 my @t=split("\t",$_); | |
226 my $s=(split("_",$t[6]))[0]; | |
227 my ($sv1,$sv2)=($t[7],$t[18]); | |
228 splice(@t,11,$#t); | |
229 | |
230 if($same_sv_type){ | |
231 print OUT join("\t",@t)."\n" if($sv1 eq $sv2); | |
232 }else{ | |
233 print OUT join("\t",@t)."\n"; | |
234 } | |
235 } | |
236 close FILE; | |
237 close OUT; | |
238 | |
239 bedPElinks2linksfile($dir.$common_name.".bedpe.unique", $dir.$common_name.".unique"); | |
240 push(@bedpefiles,$dir.$common_name.".bedpe.unique"); | |
241 push(@$file_names,$common_name.".unique"); | |
242 print LOG "-- output created: ".$dir.$common_name.".compared\n"; | |
243 | |
244 #get specific links for each sample | |
245 print LOG "-- Getting specific links for each sample\n"; | |
246 for my $s (0..$#list) { | |
247 system("grep -Fxv -f ".$dir.$common_name.".bedpe.unique ".$list_files[$s].".bedpe.txt >".$dir.$list[$s].".bedpe.unique"); | |
248 bedPElinks2linksfile($dir.$list[$s].".bedpe.unique",$dir.$list[$s].".unique"); | |
249 push(@bedpefiles,$dir.$list[$s].".bedpe.unique"); | |
250 push(@$file_names,$list[$s].".unique"); | |
251 print LOG "-- output created: ".$dir.$list[$s].".compared\n"; | |
252 } | |
253 | |
254 unlink(@bedpefiles); | |
255 | |
256 } | |
257 #------------------------------------------------------------------------------# | |
258 #------------------------------------------------------------------------------# | |
259 #convert the links file to the circos format | |
260 sub links2segdup{ | |
261 | |
262 my($id,$color_code,$links_file,$segdup_file)=@_; | |
263 | |
264 print LOG "\# Converting to the circos format...\n"; | |
265 | |
266 tie (my %hcolor,'Tie::IxHash'); #color-code hash table | |
267 foreach my $col (keys %{$color_code}){ | |
268 my ($min_links,$max_links)=split(",",$color_code->{$col}); | |
269 $hcolor{$col}=[$min_links,$max_links]; | |
270 } | |
271 | |
272 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n"; | |
273 open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n"; | |
274 | |
275 my $index=1; | |
276 while(<LINKS>){ | |
277 | |
278 my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6]; | |
279 | |
280 my $color=getColor($count,\%hcolor,"circos"); #get the color-code according the number of links | |
281 | |
282 print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n". #circos output | |
283 "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n"; | |
284 $index++; | |
285 } | |
286 | |
287 close LINKS; | |
288 close SEGDUP; | |
289 print LOG "-- output created: $segdup_file\n"; | |
290 } | |
291 #------------------------------------------------------------------------------# | |
292 #------------------------------------------------------------------------------# | |
293 #convert the links file to the bedPE format for BEDTools usage | |
294 sub links2bedPElinksfile{ | |
295 | |
296 my ($sample,$links_file,$bedpe_file)=@_; | |
297 | |
298 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n"; | |
299 open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n"; | |
300 | |
301 my $nb_links=1; | |
302 | |
303 while(<LINKS>){ | |
304 | |
305 chomp; | |
306 my @t=split("\t",$_); | |
307 my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6); | |
308 my $type=($chr1 eq $chr2)? "INTRA":"INTER"; | |
309 $type.="_".$t[10]; | |
310 | |
311 $start1--; $start2--; | |
312 | |
313 print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2". | |
314 "\t$sample"."_link$nb_links\t$type\t.\t.". | |
315 "\t".join("|",@t)."\n"; | |
316 | |
317 $nb_links++; | |
318 } | |
319 | |
320 close LINKS; | |
321 close BEDPE; | |
322 | |
323 } | |
324 #------------------------------------------------------------------------------# | |
325 #------------------------------------------------------------------------------# | |
326 sub bedPElinks2linksfile{ | |
327 | |
328 my ($bedpe_file,$links_file)=@_; | |
329 | |
330 open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n"; | |
331 open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n"; | |
332 | |
333 while(<BEDPE>){ | |
334 | |
335 chomp; | |
336 my $sample=(split("_",(split("\t",$_))[6]))[0]; | |
337 my @t1=(split("\t",$_))[0,1,2,3,4,5]; | |
338 my @t2=split(/\|/,(split("\t",$_))[10]); | |
339 push(@t2,$sample); | |
340 | |
341 print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n"; | |
342 | |
343 } | |
344 close BEDPE; | |
345 close LINKS; | |
346 | |
347 } | |
348 #------------------------------------------------------------------------------# | |
349 #------------------------------------------------------------------------------# | |
350 #convert the links file to the bed format | |
351 sub links2bedfile{ | |
352 | |
353 my ($tag_length,$color_code,$links_file,$bed_file)=@_; | |
354 | |
355 print LOG "\# Converting to the bed format...\n"; | |
356 | |
357 my $compare=1; | |
358 if($links_file!~/compared$/){ | |
359 $compare=0; | |
360 $tag_length->{none}->{1}=$tag_length->{1}; | |
361 $tag_length->{none}->{2}=$tag_length->{2}; | |
362 } | |
363 | |
364 #color-code hash table | |
365 tie (my %hcolor,'Tie::IxHash'); | |
366 my %color_order; | |
367 $color_order{"255,255,255"}=0; | |
368 my $n=1; | |
369 foreach my $col (keys %{$color_code}){ | |
370 my ($min_links,$max_links)=split(",",$color_code->{$col}); | |
371 $hcolor{$col}=[$min_links,$max_links]; | |
372 $color_order{$col}=$n; | |
373 $n++; | |
374 } | |
375 | |
376 my %pair; | |
377 my %pt; | |
378 $n=1; | |
379 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n"; | |
380 | |
381 my %str=( "F"=>"+", "R"=>"-" ); | |
382 | |
383 while(<LINKS>){ | |
384 | |
385 my @t=split; | |
386 my $sample=($compare)? pop(@t):"none"; | |
387 | |
388 my $chr1=$t[0]; | |
389 my $chr2=$t[3]; | |
390 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i); | |
391 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i); | |
392 my $same_chr=($chr1 eq $chr2)? 1:0; | |
393 | |
394 my $count=$t[6]; | |
395 my $color=getColor($count,\%hcolor,"bed"); | |
396 | |
397 my @pairs=deleteBadOrderSensePairs(split(",",$t[7])); | |
398 my @strand1=deleteBadOrderSensePairs(split(",",$t[8])); | |
399 my @strand2=deleteBadOrderSensePairs(split(",",$t[9])); | |
400 my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10])); | |
401 my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11])); | |
402 my @position1=deleteBadOrderSensePairs(split(",",$t[14])); | |
403 my @position2=deleteBadOrderSensePairs(split(",",$t[15])); | |
404 my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample}); | |
405 my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample}); | |
406 | |
407 | |
408 for my $p (0..$#pairs){ | |
409 | |
410 if (!exists $pair{$pairs[$p]}){ | |
411 | |
412 if($same_chr){ | |
413 | |
414 $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]}, | |
415 $start1[$p]-1, $end2[$p], $color, | |
416 2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ]; | |
417 $pt{$n}=$pair{$pairs[$p]}->{0}; | |
418 $n++; | |
419 | |
420 }else{ | |
421 | |
422 $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]}, | |
423 $start1[$p]-1, $end1[$p], $color, | |
424 1, $tag_length->{$sample}->{$ends_order1[$p]}, 0]; | |
425 $pt{$n}=$pair{$pairs[$p]}->{1}; | |
426 $n++; | |
427 | |
428 | |
429 $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]}, | |
430 $start2[$p]-1, $end2[$p], $color, | |
431 1, $tag_length->{$sample}->{$ends_order2[$p]}, 0]; | |
432 $pt{$n}=$pair{$pairs[$p]}->{2}; | |
433 $n++; | |
434 } | |
435 }else{ | |
436 | |
437 if($same_chr){ | |
438 ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]}); | |
439 }else{ | |
440 ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]}); | |
441 ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]}); | |
442 } | |
443 } | |
444 } | |
445 } | |
446 close LINKS; | |
447 | |
448 my $nb_pairs=$n-1; | |
449 | |
450 open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n"; | |
451 print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ". | |
452 "visibility=2 itemRgb=\"On\"\n"; | |
453 | |
454 for my $i (1..$nb_pairs){ | |
455 print BED join("\t",@{$pt{$i}})."\n"; | |
456 } | |
457 | |
458 close BED; | |
459 | |
460 print LOG "-- output created: $bed_file\n"; | |
461 | |
462 undef %pair; | |
463 undef %pt; | |
464 | |
465 } | |
466 #------------------------------------------------------------------------------# | |
467 #------------------------------------------------------------------------------# | |
468 sub links2SVfile{ | |
469 | |
470 my($links_file,$sv_file)=@_; | |
471 | |
472 print LOG "\# Converting to the sv output table...\n"; | |
473 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n"; | |
474 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n"; | |
475 | |
476 my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist | |
477 chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering | |
478 final_score breakpoint1_start1-end1 breakpoint2_start2-end2); | |
479 | |
480 my $nb_links=0; | |
481 | |
482 while (<LINKS>){ | |
483 | |
484 my @t=split; | |
485 my @sv=(); | |
486 my $sv_type="-"; | |
487 my $strand_ratio="-"; | |
488 my $eq_ratio="-"; | |
489 my $eq_type="-"; | |
490 my $insert_ratio="-"; | |
491 my $link="-"; | |
492 my ($bk1, $bk2)=("-","-"); | |
493 my $score="-"; | |
494 | |
495 my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]); | |
496 my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]); | |
497 my $nb_pairs=$t[6]; | |
498 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i); | |
499 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i); | |
500 my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER"; | |
501 | |
502 #if strand filtering | |
503 if (defined $t[16]){ | |
504 #if inter-chr link | |
505 $sv_type=$t[16]; | |
506 if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){ | |
507 $strand_ratio=floor($1/$2*100)."%"; | |
508 $score=$t[18]; | |
509 } | |
510 if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){ | |
511 #if intra-chr link with insert size filtering | |
512 $strand_ratio=floor($1/$2*100)."%"; | |
513 $link=floor($t[17]); | |
514 if($sv_type!~/^INV/){ | |
515 $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/); | |
516 $score=$t[20]; | |
517 }else{ | |
518 $score=$t[19]; | |
519 } | |
520 } | |
521 } | |
522 | |
523 if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){ | |
524 | |
525 #if strand and order filtering only and/or interchr link | |
526 $eq_type=$t[18]; | |
527 $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/); | |
528 ($bk1, $bk2)=($t[20],$t[21]); | |
529 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;} | |
530 $score=$t[22]; | |
531 | |
532 }elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){ | |
533 | |
534 #if all three filtering | |
535 $link=floor($t[17]); | |
536 $eq_type=$t[19]; | |
537 $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/); | |
538 | |
539 if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){ | |
540 $insert_ratio=floor($1/$2*100)."%"; | |
541 ($bk1, $bk2)=($t[22],$t[23]); | |
542 $score=$t[24]; | |
543 | |
544 }else{ | |
545 ($bk1, $bk2)=($t[21],$t[22]); | |
546 $score=$t[23]; | |
547 } | |
548 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;} | |
549 | |
550 } | |
551 | |
552 | |
553 push(@sv, $chr_type, $sv_type,$eq_type); | |
554 push(@sv,"$chr1\t$start1-$end1"); | |
555 push(@sv, $link); | |
556 push(@sv,"$chr2\t$start2-$end2", | |
557 $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2); | |
558 | |
559 | |
560 print SV join("\t",@sv)."\n"; | |
561 } | |
562 | |
563 close LINKS; | |
564 close SV; | |
565 | |
566 system "sort -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted"; | |
567 | |
568 open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n"; | |
569 my @links=<SV>; | |
570 close SV; | |
571 | |
572 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n"; | |
573 | |
574 print SV join("\t",@header)."\n"; | |
575 print SV @links; | |
576 close SV; | |
577 | |
578 unlink($sv_file.".sorted"); | |
579 | |
580 print LOG "-- output created: $sv_file\n"; | |
581 | |
582 } | |
583 #------------------------------------------------------------------------------# | |
584 #------------------------------------------------------------------------------# | |
585 sub deleteBadOrderSensePairs{ | |
586 | |
587 my (@tab)=@_; | |
588 my @tab2; | |
589 | |
590 foreach my $v (@tab){ | |
591 | |
592 $v=~s/[\(\)]//g; | |
593 push(@tab2,$v) if($v!~/[\$\*\@]$/); | |
594 } | |
595 return @tab2; | |
596 } | |
597 #------------------------------------------------------------------------------# | |
598 #------------------------------------------------------------------------------# | |
599 #gets starts and ends Coords when start=leftmost given positions, directions and orders | |
600 sub getCoordswithLeftMost { | |
601 | |
602 my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_; | |
603 | |
604 for my $i (0..scalar(@{$positions})-1) { | |
605 | |
606 if($strand->[$i] eq 'F'){ | |
607 $starts->[$i]=$positions->[$i]; | |
608 $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1; | |
609 }else{ | |
610 $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1; | |
611 $ends->[$i]=$positions->[$i]; | |
612 } | |
613 } | |
614 } | |
615 #------------------------------------------------------------------------------# | |
616 #------------------------------------------------------------------------------# | |
617 sub floor { | |
618 my $nb = $_[0]; | |
619 $nb=~ s/\..*//; | |
620 return $nb; | |
621 } | |
622 #------------------------------------------------------------------------------# | |
623 #------------------------------------------------------------------------------# | |
624 sub decimal{ | |
625 | |
626 my $num=shift; | |
627 my $digs_to_cut=shift; | |
628 | |
629 $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/); | |
630 | |
631 return $num; | |
632 } | |
633 | |
634 #------------------------------------------------------------------------------# | |
635 #------------------------------------------------------------------------------# | |
636 #Sort links according the concerned chromosomes and their coordinates | |
637 sub sortLinks{ | |
638 | |
639 my ($links_file,$sortedlinks_file,$unique)=@_; | |
640 | |
641 print LOG "# Sorting links...\n"; | |
642 | |
643 my $pipe=($unique)? "| sort -u":""; | |
644 system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file"; | |
645 | |
646 } | |
647 #------------------------------------------------------------------------------# | |
648 #------------------------------------------------------------------------------# | |
649 sub getColor{ | |
650 | |
651 my($count,$hcolor,$format)=@_; | |
652 for my $col ( keys % { $hcolor} ) { | |
653 return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]); | |
654 } | |
655 return "white" if($format eq "circos"); | |
656 return "255,255,255" if($format eq "bed"); | |
657 } | |
658 #------------------------------------------------------------------------------# | |
659 #------------------------------------------------------------------------------# | |
660 #check if the configuration file is correct | |
661 sub validateconfiguration{ | |
662 | |
663 my %conf=%{$_[0]}; | |
664 my $list_prgs="@ARGV"; | |
665 | |
666 my @circos_params=qw(organism_id colorcode); | |
667 my @bed_params=qw(colorcode); | |
668 my @compare_params=qw(list_samples list_read_lengths sample_link_file reference_link_file); | |
669 | |
670 unless (defined($conf{general}{output_dir})) { | |
671 $conf{general}{output_dir} = "."; | |
672 } | |
673 unless (-d $conf{general}{output_dir}){ | |
674 mkdir $conf{general}{output_dir} or die; | |
675 } | |
676 $conf{general}{output_dir}.="/" if($conf{general}{output_dir}!~/\/$/); | |
677 | |
678 | |
679 if($list_prgs=~/links2compare/){ | |
680 foreach my $p (@compare_params) { | |
681 die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p}); | |
682 } | |
683 | |
684 unless (defined($conf{compare}{same_sv_type})) { | |
685 $conf{compare}{same_sv_type} = 0; | |
686 } | |
687 | |
688 unless (defined($conf{compare}{min_overlap})) { | |
689 $conf{compare}{min_overlap} = 1E-9; | |
690 } | |
691 | |
692 if($conf{compare}{circos_output}){ | |
693 foreach my $p (@circos_params) { | |
694 next if($list_prgs=~/^ratio/ && $p eq "colorcode"); | |
695 die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p}); | |
696 } | |
697 } | |
698 if($conf{compare}{bed_output}){ | |
699 foreach my $p (@bed_params) { | |
700 die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p}); | |
701 } | |
702 die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths}); | |
703 | |
704 my @samples=split(",",$conf{compare}{list_samples}); | |
705 my @read_lengths=split(",",$conf{compare}{list_read_lengths}); | |
706 for my $i (0..$#samples){ | |
707 my @l=split("-",$read_lengths[$i]); | |
708 $conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]}; | |
709 } | |
710 } | |
711 } | |
712 | |
713 | |
714 } | |
715 #------------------------------------------------------------------------------# | |
716 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# |