comparison svdetect/SVDetect_compare.pl @ 13:f090bf6ec765 draft

Uploaded
author bzeitouni
date Mon, 11 Jun 2012 12:59:11 -0400
parents
children
comparison
equal deleted inserted replaced
12:602e6912ac67 13:f090bf6ec765
1 #!/usr/bin/perl -w
2
3 =pod
4
5 =head1 NAME
6
7 SVDetect Compare for Galaxy
8
9 Version: 0.8 for Galaxy
10
11 =head1 SYNOPSIS
12
13 SVDetect_compare.pl links2compare -conf <configuration_file> [-help] [-man]
14
15 =cut
16
17 # -------------------------------------------------------------------
18
19 use strict;
20 use warnings;
21
22 use Pod::Usage;
23 use Getopt::Long;
24
25 use Config::General;
26 use Tie::IxHash;
27
28 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
29 #PARSE THE COMMAND LINE
30 my %OPT;
31 GetOptions(\%OPT,
32 'conf=s',
33 'out1=s', #GALAXY
34 'out2=s', #GALAXY
35 'out3=s', #GALAXY
36 'out4=s', #GALAXY
37 'out5=s', #GALAXY
38 'out6=s', #GALAXY
39 'out7=s', #GALAXY
40 'out8=s', #GALAXY
41 'out9=s', #GALAXY
42 'l=s', #GALAXY
43 'N=s', #GALAXY
44 'help',
45 'man'
46 );
47
48 pod2usage() if $OPT{help};
49 pod2usage(-verbose=>2) if $OPT{man};
50 pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});
51
52
53 pod2usage() if(@ARGV<1);
54
55 tie (my %func, 'Tie::IxHash',links2compare=>\&links2compare);
56
57 foreach my $command (@ARGV){
58 pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command}));
59 }
60 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
61
62 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
63 #READ THE CONFIGURATION FILE
64 my $conf=Config::General->new( -ConfigFile => $OPT{conf},
65 -Tie => "Tie::IxHash",
66 -AllowMultiOptions => 1,
67 -LowerCaseNames => 1,
68 -AutoTrue => 1);
69 my %CONF= $conf->getall;
70 validateconfiguration(\%CONF); #validation of the configuration parameters
71
72
73 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY
74 my $BEDTOOLS_BIN_DIR="/bioinfo/local/BEDTools/bin"; #GALAXY
75
76 my $pt_log_file=$OPT{l}; #GALAXY
77 my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_compare.log"; #GALAXY
78 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY
79
80 my @pt_sv_file=($OPT{out1},$OPT{out2},$OPT{out3}) if($OPT{out1}); #GALAXY common,sample,reference
81 my @pt_circos_file=($OPT{out4},$OPT{out5},$OPT{out6}) if($OPT{out4}); #GALAXY common,sample,reference
82 my @pt_bed_file=($OPT{out7},$OPT{out8},$OPT{out9}) if($OPT{out7}); #GALAXY common,sample,reference
83
84 $CONF{compare}{sample_link_file}=readlink($CONF{compare}{sample_link_file});#GALAXY
85 $CONF{compare}{sample_link_file}=~s/.sv.txt//; #GALAXY
86
87 $CONF{compare}{reference_link_file}=readlink($CONF{compare}{reference_link_file});#GALAXY
88 $CONF{compare}{reference_link_file}=~s/.sv.txt//; #GALAXY
89
90 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
91
92 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
93 #COMMAND EXECUTION
94 foreach my $command (@ARGV){
95 &{$func{$command}}();
96 }
97 print LOG "-- end\n";
98
99 close LOG;#GALAXY
100 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
101
102 exit(0);
103 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
104
105 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
106 #FUNCTIONS
107
108 # -----------------------------------------------------------------------------#
109 #MAIN FUNCTION number 5:Comparison between samples, common or specific links
110 sub links2compare{
111
112 my @compare_files;
113
114 compareSamples($CONF{general}{output_dir},
115 $CONF{compare}{list_samples},
116 $CONF{compare}{sample_link_file},
117 $CONF{compare}{reference_link_file},
118 $CONF{compare}{min_overlap},
119 $CONF{compare}{same_sv_type},
120 \@compare_files);
121
122 my $pt_ind=0;
123
124 for my $input_file (@compare_files){
125
126 $input_file=$CONF{general}{output_dir}.$input_file;
127
128 my $output_file=$input_file;
129 $output_file=~s/unique$/compared/;
130
131 sortLinks($input_file, $output_file,1);
132
133 if($CONF{compare}{circos_output}){
134 links2segdup($CONF{circos}{organism_id},
135 $CONF{circos}{colorcode},
136 $output_file,
137 $output_file.".segdup.txt");
138 system "rm $pt_circos_file[$pt_ind]; ln -s $output_file.segdup.txt $pt_circos_file[$pt_ind]" if (defined $pt_circos_file[$pt_ind]); #GALAXY
139 }
140
141 if($CONF{compare}{bed_output}){
142 links2bedfile($CONF{compare}{read_lengths},
143 $CONF{bed}{colorcode},
144 $output_file,
145 $output_file.".bed");
146 system "rm $pt_bed_file[$pt_ind]; ln -s $output_file.bed $pt_bed_file[$pt_ind]" if (defined $pt_bed_file[$pt_ind]); #GALAXY
147 }
148
149 if($CONF{compare}{sv_output}){
150
151 links2SVfile ($output_file, $output_file.".sv.txt");
152 system "rm $pt_sv_file[$pt_ind]; ln -s $output_file.sv.txt $pt_sv_file[$pt_ind]" if (defined $pt_sv_file[$pt_ind]); #GALAXY
153 }
154 $pt_ind++;
155
156 }
157 unlink(@compare_files);
158
159 }
160 #------------------------------------------------------------------------------#
161 #------------------------------------------------------------------------------#
162 sub compareSamples{
163
164 my ($dir,$list_samples,$sample_file,$reference_file,$min_overlap,$same_sv_type,$file_names)=@_;
165
166 my @bedpefiles;
167 my @list=split(",",$list_samples);
168 my @list_files=($sample_file,$reference_file);
169
170 print LOG "\# Comparison procedure...\n";
171 print LOG "-- samples=$list_samples\n".
172 "-- minimum overlap=$min_overlap\n".
173 "-- same SV type=$same_sv_type\n";
174
175 #conversion of links to bedPE format file
176 print LOG "-- Conversion of links.filtered files to bedPE format\n";
177 for my $s (0..$#list) {
178
179 links2bedPElinksfile($list[$s],$list_files[$s],$list_files[$s].".bedpe.txt");
180 push(@bedpefiles,$list_files[$s].".bedpe.txt");
181
182 }
183
184 #get common links between all samples compared
185 print LOG "-- Getting common links between all samples with BEDTools\n";
186 my $common_name=join(".",@list);
187
188 my $nb=scalar @list;
189 my $command="";
190 my $prompt=">";
191
192 while ($nb>0){
193
194 for my $i (0..$#list_files){
195
196 $command.="$BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a ".$list_files[$i].".bedpe.txt";
197 my $pipe=0;
198
199 for my $j ($i+1..$#list_files){
200
201 $command.="| $BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a stdin" if($pipe);
202 $command.=" -b ".$list_files[$j].".bedpe.txt";
203 $pipe=1;
204
205 }
206
207 $command.=$prompt.$dir.$common_name.".bedpe.tmp;";
208 $prompt=">>";
209
210 my $first=shift(@list_files); push(@list_files,$first);
211 last;
212 }
213 $nb--;
214 }
215
216 system ($command);
217
218 push(@bedpefiles,$dir.$common_name.".bedpe.tmp");
219
220 #Post comparison to get common links if same type only (as an option)
221 open( FILE, "<".$dir.$common_name.".bedpe.tmp") or die "Can't open".$dir.$common_name.".bedpe.tmp : $!";
222 open( OUT, ">".$dir.$common_name.".bedpe.unique") or die "Can't write in ".$dir.$common_name.".bedpe.unique : $!";
223
224 while(<FILE>){
225 my @t=split("\t",$_);
226 my $s=(split("_",$t[6]))[0];
227 my ($sv1,$sv2)=($t[7],$t[18]);
228 splice(@t,11,$#t);
229
230 if($same_sv_type){
231 print OUT join("\t",@t)."\n" if($sv1 eq $sv2);
232 }else{
233 print OUT join("\t",@t)."\n";
234 }
235 }
236 close FILE;
237 close OUT;
238
239 bedPElinks2linksfile($dir.$common_name.".bedpe.unique", $dir.$common_name.".unique");
240 push(@bedpefiles,$dir.$common_name.".bedpe.unique");
241 push(@$file_names,$common_name.".unique");
242 print LOG "-- output created: ".$dir.$common_name.".compared\n";
243
244 #get specific links for each sample
245 print LOG "-- Getting specific links for each sample\n";
246 for my $s (0..$#list) {
247 system("grep -Fxv -f ".$dir.$common_name.".bedpe.unique ".$list_files[$s].".bedpe.txt >".$dir.$list[$s].".bedpe.unique");
248 bedPElinks2linksfile($dir.$list[$s].".bedpe.unique",$dir.$list[$s].".unique");
249 push(@bedpefiles,$dir.$list[$s].".bedpe.unique");
250 push(@$file_names,$list[$s].".unique");
251 print LOG "-- output created: ".$dir.$list[$s].".compared\n";
252 }
253
254 unlink(@bedpefiles);
255
256 }
257 #------------------------------------------------------------------------------#
258 #------------------------------------------------------------------------------#
259 #convert the links file to the circos format
260 sub links2segdup{
261
262 my($id,$color_code,$links_file,$segdup_file)=@_;
263
264 print LOG "\# Converting to the circos format...\n";
265
266 tie (my %hcolor,'Tie::IxHash'); #color-code hash table
267 foreach my $col (keys %{$color_code}){
268 my ($min_links,$max_links)=split(",",$color_code->{$col});
269 $hcolor{$col}=[$min_links,$max_links];
270 }
271
272 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
273 open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
274
275 my $index=1;
276 while(<LINKS>){
277
278 my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6];
279
280 my $color=getColor($count,\%hcolor,"circos"); #get the color-code according the number of links
281
282 print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n". #circos output
283 "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n";
284 $index++;
285 }
286
287 close LINKS;
288 close SEGDUP;
289 print LOG "-- output created: $segdup_file\n";
290 }
291 #------------------------------------------------------------------------------#
292 #------------------------------------------------------------------------------#
293 #convert the links file to the bedPE format for BEDTools usage
294 sub links2bedPElinksfile{
295
296 my ($sample,$links_file,$bedpe_file)=@_;
297
298 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
299 open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n";
300
301 my $nb_links=1;
302
303 while(<LINKS>){
304
305 chomp;
306 my @t=split("\t",$_);
307 my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6);
308 my $type=($chr1 eq $chr2)? "INTRA":"INTER";
309 $type.="_".$t[10];
310
311 $start1--; $start2--;
312
313 print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2".
314 "\t$sample"."_link$nb_links\t$type\t.\t.".
315 "\t".join("|",@t)."\n";
316
317 $nb_links++;
318 }
319
320 close LINKS;
321 close BEDPE;
322
323 }
324 #------------------------------------------------------------------------------#
325 #------------------------------------------------------------------------------#
326 sub bedPElinks2linksfile{
327
328 my ($bedpe_file,$links_file)=@_;
329
330 open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n";
331 open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n";
332
333 while(<BEDPE>){
334
335 chomp;
336 my $sample=(split("_",(split("\t",$_))[6]))[0];
337 my @t1=(split("\t",$_))[0,1,2,3,4,5];
338 my @t2=split(/\|/,(split("\t",$_))[10]);
339 push(@t2,$sample);
340
341 print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n";
342
343 }
344 close BEDPE;
345 close LINKS;
346
347 }
348 #------------------------------------------------------------------------------#
349 #------------------------------------------------------------------------------#
350 #convert the links file to the bed format
351 sub links2bedfile{
352
353 my ($tag_length,$color_code,$links_file,$bed_file)=@_;
354
355 print LOG "\# Converting to the bed format...\n";
356
357 my $compare=1;
358 if($links_file!~/compared$/){
359 $compare=0;
360 $tag_length->{none}->{1}=$tag_length->{1};
361 $tag_length->{none}->{2}=$tag_length->{2};
362 }
363
364 #color-code hash table
365 tie (my %hcolor,'Tie::IxHash');
366 my %color_order;
367 $color_order{"255,255,255"}=0;
368 my $n=1;
369 foreach my $col (keys %{$color_code}){
370 my ($min_links,$max_links)=split(",",$color_code->{$col});
371 $hcolor{$col}=[$min_links,$max_links];
372 $color_order{$col}=$n;
373 $n++;
374 }
375
376 my %pair;
377 my %pt;
378 $n=1;
379 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
380
381 my %str=( "F"=>"+", "R"=>"-" );
382
383 while(<LINKS>){
384
385 my @t=split;
386 my $sample=($compare)? pop(@t):"none";
387
388 my $chr1=$t[0];
389 my $chr2=$t[3];
390 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
391 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
392 my $same_chr=($chr1 eq $chr2)? 1:0;
393
394 my $count=$t[6];
395 my $color=getColor($count,\%hcolor,"bed");
396
397 my @pairs=deleteBadOrderSensePairs(split(",",$t[7]));
398 my @strand1=deleteBadOrderSensePairs(split(",",$t[8]));
399 my @strand2=deleteBadOrderSensePairs(split(",",$t[9]));
400 my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10]));
401 my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11]));
402 my @position1=deleteBadOrderSensePairs(split(",",$t[14]));
403 my @position2=deleteBadOrderSensePairs(split(",",$t[15]));
404 my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample});
405 my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample});
406
407
408 for my $p (0..$#pairs){
409
410 if (!exists $pair{$pairs[$p]}){
411
412 if($same_chr){
413
414 $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]},
415 $start1[$p]-1, $end2[$p], $color,
416 2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ];
417 $pt{$n}=$pair{$pairs[$p]}->{0};
418 $n++;
419
420 }else{
421
422 $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]},
423 $start1[$p]-1, $end1[$p], $color,
424 1, $tag_length->{$sample}->{$ends_order1[$p]}, 0];
425 $pt{$n}=$pair{$pairs[$p]}->{1};
426 $n++;
427
428
429 $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]},
430 $start2[$p]-1, $end2[$p], $color,
431 1, $tag_length->{$sample}->{$ends_order2[$p]}, 0];
432 $pt{$n}=$pair{$pairs[$p]}->{2};
433 $n++;
434 }
435 }else{
436
437 if($same_chr){
438 ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]});
439 }else{
440 ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]});
441 ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]});
442 }
443 }
444 }
445 }
446 close LINKS;
447
448 my $nb_pairs=$n-1;
449
450 open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
451 print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ".
452 "visibility=2 itemRgb=\"On\"\n";
453
454 for my $i (1..$nb_pairs){
455 print BED join("\t",@{$pt{$i}})."\n";
456 }
457
458 close BED;
459
460 print LOG "-- output created: $bed_file\n";
461
462 undef %pair;
463 undef %pt;
464
465 }
466 #------------------------------------------------------------------------------#
467 #------------------------------------------------------------------------------#
468 sub links2SVfile{
469
470 my($links_file,$sv_file)=@_;
471
472 print LOG "\# Converting to the sv output table...\n";
473 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
474 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
475
476 my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist
477 chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering
478 final_score breakpoint1_start1-end1 breakpoint2_start2-end2);
479
480 my $nb_links=0;
481
482 while (<LINKS>){
483
484 my @t=split;
485 my @sv=();
486 my $sv_type="-";
487 my $strand_ratio="-";
488 my $eq_ratio="-";
489 my $eq_type="-";
490 my $insert_ratio="-";
491 my $link="-";
492 my ($bk1, $bk2)=("-","-");
493 my $score="-";
494
495 my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]);
496 my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]);
497 my $nb_pairs=$t[6];
498 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
499 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
500 my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER";
501
502 #if strand filtering
503 if (defined $t[16]){
504 #if inter-chr link
505 $sv_type=$t[16];
506 if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){
507 $strand_ratio=floor($1/$2*100)."%";
508 $score=$t[18];
509 }
510 if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){
511 #if intra-chr link with insert size filtering
512 $strand_ratio=floor($1/$2*100)."%";
513 $link=floor($t[17]);
514 if($sv_type!~/^INV/){
515 $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
516 $score=$t[20];
517 }else{
518 $score=$t[19];
519 }
520 }
521 }
522
523 if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){
524
525 #if strand and order filtering only and/or interchr link
526 $eq_type=$t[18];
527 $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
528 ($bk1, $bk2)=($t[20],$t[21]);
529 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
530 $score=$t[22];
531
532 }elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){
533
534 #if all three filtering
535 $link=floor($t[17]);
536 $eq_type=$t[19];
537 $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/);
538
539 if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){
540 $insert_ratio=floor($1/$2*100)."%";
541 ($bk1, $bk2)=($t[22],$t[23]);
542 $score=$t[24];
543
544 }else{
545 ($bk1, $bk2)=($t[21],$t[22]);
546 $score=$t[23];
547 }
548 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
549
550 }
551
552
553 push(@sv, $chr_type, $sv_type,$eq_type);
554 push(@sv,"$chr1\t$start1-$end1");
555 push(@sv, $link);
556 push(@sv,"$chr2\t$start2-$end2",
557 $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2);
558
559
560 print SV join("\t",@sv)."\n";
561 }
562
563 close LINKS;
564 close SV;
565
566 system "sort -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted";
567
568 open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n";
569 my @links=<SV>;
570 close SV;
571
572 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
573
574 print SV join("\t",@header)."\n";
575 print SV @links;
576 close SV;
577
578 unlink($sv_file.".sorted");
579
580 print LOG "-- output created: $sv_file\n";
581
582 }
583 #------------------------------------------------------------------------------#
584 #------------------------------------------------------------------------------#
585 sub deleteBadOrderSensePairs{
586
587 my (@tab)=@_;
588 my @tab2;
589
590 foreach my $v (@tab){
591
592 $v=~s/[\(\)]//g;
593 push(@tab2,$v) if($v!~/[\$\*\@]$/);
594 }
595 return @tab2;
596 }
597 #------------------------------------------------------------------------------#
598 #------------------------------------------------------------------------------#
599 #gets starts and ends Coords when start=leftmost given positions, directions and orders
600 sub getCoordswithLeftMost {
601
602 my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_;
603
604 for my $i (0..scalar(@{$positions})-1) {
605
606 if($strand->[$i] eq 'F'){
607 $starts->[$i]=$positions->[$i];
608 $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;
609 }else{
610 $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;
611 $ends->[$i]=$positions->[$i];
612 }
613 }
614 }
615 #------------------------------------------------------------------------------#
616 #------------------------------------------------------------------------------#
617 sub floor {
618 my $nb = $_[0];
619 $nb=~ s/\..*//;
620 return $nb;
621 }
622 #------------------------------------------------------------------------------#
623 #------------------------------------------------------------------------------#
624 sub decimal{
625
626 my $num=shift;
627 my $digs_to_cut=shift;
628
629 $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
630
631 return $num;
632 }
633
634 #------------------------------------------------------------------------------#
635 #------------------------------------------------------------------------------#
636 #Sort links according the concerned chromosomes and their coordinates
637 sub sortLinks{
638
639 my ($links_file,$sortedlinks_file,$unique)=@_;
640
641 print LOG "# Sorting links...\n";
642
643 my $pipe=($unique)? "| sort -u":"";
644 system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";
645
646 }
647 #------------------------------------------------------------------------------#
648 #------------------------------------------------------------------------------#
649 sub getColor{
650
651 my($count,$hcolor,$format)=@_;
652 for my $col ( keys % { $hcolor} ) {
653 return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);
654 }
655 return "white" if($format eq "circos");
656 return "255,255,255" if($format eq "bed");
657 }
658 #------------------------------------------------------------------------------#
659 #------------------------------------------------------------------------------#
660 #check if the configuration file is correct
661 sub validateconfiguration{
662
663 my %conf=%{$_[0]};
664 my $list_prgs="@ARGV";
665
666 my @circos_params=qw(organism_id colorcode);
667 my @bed_params=qw(colorcode);
668 my @compare_params=qw(list_samples list_read_lengths sample_link_file reference_link_file);
669
670 unless (defined($conf{general}{output_dir})) {
671 $conf{general}{output_dir} = ".";
672 }
673 unless (-d $conf{general}{output_dir}){
674 mkdir $conf{general}{output_dir} or die;
675 }
676 $conf{general}{output_dir}.="/" if($conf{general}{output_dir}!~/\/$/);
677
678
679 if($list_prgs=~/links2compare/){
680 foreach my $p (@compare_params) {
681 die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p});
682 }
683
684 unless (defined($conf{compare}{same_sv_type})) {
685 $conf{compare}{same_sv_type} = 0;
686 }
687
688 unless (defined($conf{compare}{min_overlap})) {
689 $conf{compare}{min_overlap} = 1E-9;
690 }
691
692 if($conf{compare}{circos_output}){
693 foreach my $p (@circos_params) {
694 next if($list_prgs=~/^ratio/ && $p eq "colorcode");
695 die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
696 }
697 }
698 if($conf{compare}{bed_output}){
699 foreach my $p (@bed_params) {
700 die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
701 }
702 die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths});
703
704 my @samples=split(",",$conf{compare}{list_samples});
705 my @read_lengths=split(",",$conf{compare}{list_read_lengths});
706 for my $i (0..$#samples){
707 my @l=split("-",$read_lengths[$i]);
708 $conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};
709 }
710 }
711 }
712
713
714 }
715 #------------------------------------------------------------------------------#
716 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#