7
|
1 #!/usr/bin/perl -w
|
|
2
|
|
3 =pod
|
|
4
|
|
5 =head1 NAME
|
|
6
|
|
7 SVDetect Compare for Galaxy
|
|
8
|
|
9 Version: 0.8 for Galaxy
|
|
10
|
|
11 =head1 SYNOPSIS
|
|
12
|
|
13 SVDetect_compare.pl links2compare -conf <configuration_file> [-help] [-man]
|
|
14
|
|
15 =cut
|
|
16
|
|
17 # -------------------------------------------------------------------
|
|
18
|
|
19 use strict;
|
|
20 use warnings;
|
|
21
|
|
22 use Pod::Usage;
|
|
23 use Getopt::Long;
|
|
24
|
|
25 use Config::General;
|
|
26 use Tie::IxHash;
|
|
27
|
|
28 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|
|
29 #PARSE THE COMMAND LINE
|
|
30 my %OPT;
|
|
31 GetOptions(\%OPT,
|
|
32 'conf=s',
|
|
33 'out1=s', #GALAXY
|
|
34 'out2=s', #GALAXY
|
|
35 'out3=s', #GALAXY
|
|
36 'out4=s', #GALAXY
|
|
37 'out5=s', #GALAXY
|
|
38 'out6=s', #GALAXY
|
|
39 'out7=s', #GALAXY
|
|
40 'out8=s', #GALAXY
|
|
41 'out9=s', #GALAXY
|
|
42 'l=s', #GALAXY
|
|
43 'N=s', #GALAXY
|
|
44 'help',
|
|
45 'man'
|
|
46 );
|
|
47
|
|
48 pod2usage() if $OPT{help};
|
|
49 pod2usage(-verbose=>2) if $OPT{man};
|
|
50 pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});
|
|
51
|
|
52
|
|
53 pod2usage() if(@ARGV<1);
|
|
54
|
|
55 tie (my %func, 'Tie::IxHash',links2compare=>\&links2compare);
|
|
56
|
|
57 foreach my $command (@ARGV){
|
|
58 pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command}));
|
|
59 }
|
|
60 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|
|
61
|
|
62 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|
|
63 #READ THE CONFIGURATION FILE
|
|
64 my $conf=Config::General->new( -ConfigFile => $OPT{conf},
|
|
65 -Tie => "Tie::IxHash",
|
|
66 -AllowMultiOptions => 1,
|
|
67 -LowerCaseNames => 1,
|
|
68 -AutoTrue => 1);
|
|
69 my %CONF= $conf->getall;
|
|
70 validateconfiguration(\%CONF); #validation of the configuration parameters
|
|
71
|
|
72
|
|
73 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY
|
|
74 my $BEDTOOLS_BIN_DIR="/bioinfo/local/BEDTools/bin"; #GALAXY
|
|
75
|
|
76 my $pt_log_file=$OPT{l}; #GALAXY
|
|
77 my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_compare.log"; #GALAXY
|
|
78 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY
|
|
79
|
|
80 my @pt_sv_file=($OPT{out1},$OPT{out2},$OPT{out3}) if($OPT{out1}); #GALAXY common,sample,reference
|
|
81 my @pt_circos_file=($OPT{out4},$OPT{out5},$OPT{out6}) if($OPT{out4}); #GALAXY common,sample,reference
|
|
82 my @pt_bed_file=($OPT{out7},$OPT{out8},$OPT{out9}) if($OPT{out7}); #GALAXY common,sample,reference
|
|
83
|
|
84 $CONF{compare}{sample_link_file}=readlink($CONF{compare}{sample_link_file});#GALAXY
|
|
85 $CONF{compare}{sample_link_file}=~s/.sv.txt//; #GALAXY
|
|
86
|
|
87 $CONF{compare}{reference_link_file}=readlink($CONF{compare}{reference_link_file});#GALAXY
|
|
88 $CONF{compare}{reference_link_file}=~s/.sv.txt//; #GALAXY
|
|
89
|
|
90 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|
|
91
|
|
92 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|
|
93 #COMMAND EXECUTION
|
|
94 foreach my $command (@ARGV){
|
|
95 &{$func{$command}}();
|
|
96 }
|
|
97 print LOG "-- end\n";
|
|
98
|
|
99 close LOG;#GALAXY
|
|
100 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
|
|
101
|
|
102 exit(0);
|
|
103 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|
|
104
|
|
105 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|
|
106 #FUNCTIONS
|
|
107
|
|
108 # -----------------------------------------------------------------------------#
|
|
109 #MAIN FUNCTION number 5:Comparison between samples, common or specific links
|
|
110 sub links2compare{
|
|
111
|
|
112 my @compare_files;
|
|
113
|
|
114 compareSamples($CONF{general}{output_dir},
|
|
115 $CONF{compare}{list_samples},
|
|
116 $CONF{compare}{sample_link_file},
|
|
117 $CONF{compare}{reference_link_file},
|
|
118 $CONF{compare}{min_overlap},
|
|
119 $CONF{compare}{same_sv_type},
|
|
120 \@compare_files);
|
|
121
|
|
122 my $pt_ind=0;
|
|
123
|
|
124 for my $input_file (@compare_files){
|
|
125
|
|
126 $input_file=$CONF{general}{output_dir}.$input_file;
|
|
127
|
|
128 my $output_file=$input_file;
|
|
129 $output_file=~s/unique$/compared/;
|
|
130
|
|
131 sortLinks($input_file, $output_file,1);
|
|
132
|
|
133 if($CONF{compare}{circos_output}){
|
|
134 links2segdup($CONF{circos}{organism_id},
|
|
135 $CONF{circos}{colorcode},
|
|
136 $output_file,
|
|
137 $output_file.".segdup.txt");
|
|
138 system "rm $pt_circos_file[$pt_ind]; ln -s $output_file.segdup.txt $pt_circos_file[$pt_ind]" if (defined $pt_circos_file[$pt_ind]); #GALAXY
|
|
139 }
|
|
140
|
|
141 if($CONF{compare}{bed_output}){
|
|
142 links2bedfile($CONF{compare}{read_lengths},
|
|
143 $CONF{bed}{colorcode},
|
|
144 $output_file,
|
|
145 $output_file.".bed");
|
|
146 system "rm $pt_bed_file[$pt_ind]; ln -s $output_file.bed $pt_bed_file[$pt_ind]" if (defined $pt_bed_file[$pt_ind]); #GALAXY
|
|
147 }
|
|
148
|
|
149 if($CONF{compare}{sv_output}){
|
|
150
|
|
151 links2SVfile ($output_file, $output_file.".sv.txt");
|
|
152 system "rm $pt_sv_file[$pt_ind]; ln -s $output_file.sv.txt $pt_sv_file[$pt_ind]" if (defined $pt_sv_file[$pt_ind]); #GALAXY
|
|
153 }
|
|
154 $pt_ind++;
|
|
155
|
|
156 }
|
|
157 unlink(@compare_files);
|
|
158
|
|
159 }
|
|
160 #------------------------------------------------------------------------------#
|
|
161 #------------------------------------------------------------------------------#
|
|
162 sub compareSamples{
|
|
163
|
|
164 my ($dir,$list_samples,$sample_file,$reference_file,$min_overlap,$same_sv_type,$file_names)=@_;
|
|
165
|
|
166 my @bedpefiles;
|
|
167 my @list=split(",",$list_samples);
|
|
168 my @list_files=($sample_file,$reference_file);
|
|
169
|
|
170 print LOG "\# Comparison procedure...\n";
|
|
171 print LOG "-- samples=$list_samples\n".
|
|
172 "-- minimum overlap=$min_overlap\n".
|
|
173 "-- same SV type=$same_sv_type\n";
|
|
174
|
|
175 #conversion of links to bedPE format file
|
|
176 print LOG "-- Conversion of links.filtered files to bedPE format\n";
|
|
177 for my $s (0..$#list) {
|
|
178
|
|
179 links2bedPElinksfile($list[$s],$list_files[$s],$list_files[$s].".bedpe.txt");
|
|
180 push(@bedpefiles,$list_files[$s].".bedpe.txt");
|
|
181
|
|
182 }
|
|
183
|
|
184 #get common links between all samples compared
|
|
185 print LOG "-- Getting common links between all samples with BEDTools\n";
|
|
186 my $common_name=join(".",@list);
|
|
187
|
|
188 my $nb=scalar @list;
|
|
189 my $command="";
|
|
190 my $prompt=">";
|
|
191
|
|
192 while ($nb>0){
|
|
193
|
|
194 for my $i (0..$#list_files){
|
|
195
|
|
196 $command.="$BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a ".$list_files[$i].".bedpe.txt";
|
|
197 my $pipe=0;
|
|
198
|
|
199 for my $j ($i+1..$#list_files){
|
|
200
|
|
201 $command.="| $BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a stdin" if($pipe);
|
|
202 $command.=" -b ".$list_files[$j].".bedpe.txt";
|
|
203 $pipe=1;
|
|
204
|
|
205 }
|
|
206
|
|
207 $command.=$prompt.$dir.$common_name.".bedpe.tmp;";
|
|
208 $prompt=">>";
|
|
209
|
|
210 my $first=shift(@list_files); push(@list_files,$first);
|
|
211 last;
|
|
212 }
|
|
213 $nb--;
|
|
214 }
|
|
215
|
|
216 system ($command);
|
|
217
|
|
218 push(@bedpefiles,$dir.$common_name.".bedpe.tmp");
|
|
219
|
|
220 #Post comparison to get common links if same type only (as an option)
|
|
221 open( FILE, "<".$dir.$common_name.".bedpe.tmp") or die "Can't open".$dir.$common_name.".bedpe.tmp : $!";
|
|
222 open( OUT, ">".$dir.$common_name.".bedpe.unique") or die "Can't write in ".$dir.$common_name.".bedpe.unique : $!";
|
|
223
|
|
224 while(<FILE>){
|
|
225 my @t=split("\t",$_);
|
|
226 my $s=(split("_",$t[6]))[0];
|
|
227 my ($sv1,$sv2)=($t[7],$t[18]);
|
|
228 splice(@t,11,$#t);
|
|
229
|
|
230 if($same_sv_type){
|
|
231 print OUT join("\t",@t)."\n" if($sv1 eq $sv2);
|
|
232 }else{
|
|
233 print OUT join("\t",@t)."\n";
|
|
234 }
|
|
235 }
|
|
236 close FILE;
|
|
237 close OUT;
|
|
238
|
|
239 bedPElinks2linksfile($dir.$common_name.".bedpe.unique", $dir.$common_name.".unique");
|
|
240 push(@bedpefiles,$dir.$common_name.".bedpe.unique");
|
|
241 push(@$file_names,$common_name.".unique");
|
|
242 print LOG "-- output created: ".$dir.$common_name.".compared\n";
|
|
243
|
|
244 #get specific links for each sample
|
|
245 print LOG "-- Getting specific links for each sample\n";
|
|
246 for my $s (0..$#list) {
|
|
247 system("grep -Fxv -f ".$dir.$common_name.".bedpe.unique ".$list_files[$s].".bedpe.txt >".$dir.$list[$s].".bedpe.unique");
|
|
248 bedPElinks2linksfile($dir.$list[$s].".bedpe.unique",$dir.$list[$s].".unique");
|
|
249 push(@bedpefiles,$dir.$list[$s].".bedpe.unique");
|
|
250 push(@$file_names,$list[$s].".unique");
|
|
251 print LOG "-- output created: ".$dir.$list[$s].".compared\n";
|
|
252 }
|
|
253
|
|
254 unlink(@bedpefiles);
|
|
255
|
|
256 }
|
|
257 #------------------------------------------------------------------------------#
|
|
258 #------------------------------------------------------------------------------#
|
|
259 #convert the links file to the circos format
|
|
260 sub links2segdup{
|
|
261
|
|
262 my($id,$color_code,$links_file,$segdup_file)=@_;
|
|
263
|
|
264 print LOG "\# Converting to the circos format...\n";
|
|
265
|
|
266 tie (my %hcolor,'Tie::IxHash'); #color-code hash table
|
|
267 foreach my $col (keys %{$color_code}){
|
|
268 my ($min_links,$max_links)=split(",",$color_code->{$col});
|
|
269 $hcolor{$col}=[$min_links,$max_links];
|
|
270 }
|
|
271
|
|
272 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
|
|
273 open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
|
|
274
|
|
275 my $index=1;
|
|
276 while(<LINKS>){
|
|
277
|
|
278 my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6];
|
|
279
|
|
280 my $color=getColor($count,\%hcolor,"circos"); #get the color-code according the number of links
|
|
281
|
|
282 print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n". #circos output
|
|
283 "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n";
|
|
284 $index++;
|
|
285 }
|
|
286
|
|
287 close LINKS;
|
|
288 close SEGDUP;
|
|
289 print LOG "-- output created: $segdup_file\n";
|
|
290 }
|
|
291 #------------------------------------------------------------------------------#
|
|
292 #------------------------------------------------------------------------------#
|
|
293 #convert the links file to the bedPE format for BEDTools usage
|
|
294 sub links2bedPElinksfile{
|
|
295
|
|
296 my ($sample,$links_file,$bedpe_file)=@_;
|
|
297
|
|
298 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
|
|
299 open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n";
|
|
300
|
|
301 my $nb_links=1;
|
|
302
|
|
303 while(<LINKS>){
|
|
304
|
|
305 chomp;
|
|
306 my @t=split("\t",$_);
|
|
307 my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6);
|
|
308 my $type=($chr1 eq $chr2)? "INTRA":"INTER";
|
|
309 $type.="_".$t[10];
|
|
310
|
|
311 $start1--; $start2--;
|
|
312
|
|
313 print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2".
|
|
314 "\t$sample"."_link$nb_links\t$type\t.\t.".
|
|
315 "\t".join("|",@t)."\n";
|
|
316
|
|
317 $nb_links++;
|
|
318 }
|
|
319
|
|
320 close LINKS;
|
|
321 close BEDPE;
|
|
322
|
|
323 }
|
|
324 #------------------------------------------------------------------------------#
|
|
325 #------------------------------------------------------------------------------#
|
|
326 sub bedPElinks2linksfile{
|
|
327
|
|
328 my ($bedpe_file,$links_file)=@_;
|
|
329
|
|
330 open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n";
|
|
331 open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n";
|
|
332
|
|
333 while(<BEDPE>){
|
|
334
|
|
335 chomp;
|
|
336 my $sample=(split("_",(split("\t",$_))[6]))[0];
|
|
337 my @t1=(split("\t",$_))[0,1,2,3,4,5];
|
|
338 my @t2=split(/\|/,(split("\t",$_))[10]);
|
|
339 push(@t2,$sample);
|
|
340
|
|
341 print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n";
|
|
342
|
|
343 }
|
|
344 close BEDPE;
|
|
345 close LINKS;
|
|
346
|
|
347 }
|
|
348 #------------------------------------------------------------------------------#
|
|
349 #------------------------------------------------------------------------------#
|
|
350 #convert the links file to the bed format
|
|
351 sub links2bedfile{
|
|
352
|
|
353 my ($tag_length,$color_code,$links_file,$bed_file)=@_;
|
|
354
|
|
355 print LOG "\# Converting to the bed format...\n";
|
|
356
|
|
357 my $compare=1;
|
|
358 if($links_file!~/compared$/){
|
|
359 $compare=0;
|
|
360 $tag_length->{none}->{1}=$tag_length->{1};
|
|
361 $tag_length->{none}->{2}=$tag_length->{2};
|
|
362 }
|
|
363
|
|
364 #color-code hash table
|
|
365 tie (my %hcolor,'Tie::IxHash');
|
|
366 my %color_order;
|
|
367 $color_order{"255,255,255"}=0;
|
|
368 my $n=1;
|
|
369 foreach my $col (keys %{$color_code}){
|
|
370 my ($min_links,$max_links)=split(",",$color_code->{$col});
|
|
371 $hcolor{$col}=[$min_links,$max_links];
|
|
372 $color_order{$col}=$n;
|
|
373 $n++;
|
|
374 }
|
|
375
|
|
376 my %pair;
|
|
377 my %pt;
|
|
378 $n=1;
|
|
379 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
|
|
380
|
|
381 my %str=( "F"=>"+", "R"=>"-" );
|
|
382
|
|
383 while(<LINKS>){
|
|
384
|
|
385 my @t=split;
|
|
386 my $sample=($compare)? pop(@t):"none";
|
|
387
|
|
388 my $chr1=$t[0];
|
|
389 my $chr2=$t[3];
|
|
390 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
|
|
391 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
|
|
392 my $same_chr=($chr1 eq $chr2)? 1:0;
|
|
393
|
|
394 my $count=$t[6];
|
|
395 my $color=getColor($count,\%hcolor,"bed");
|
|
396
|
|
397 my @pairs=deleteBadOrderSensePairs(split(",",$t[7]));
|
|
398 my @strand1=deleteBadOrderSensePairs(split(",",$t[8]));
|
|
399 my @strand2=deleteBadOrderSensePairs(split(",",$t[9]));
|
|
400 my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10]));
|
|
401 my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11]));
|
|
402 my @position1=deleteBadOrderSensePairs(split(",",$t[14]));
|
|
403 my @position2=deleteBadOrderSensePairs(split(",",$t[15]));
|
|
404 my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample});
|
|
405 my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample});
|
|
406
|
|
407
|
|
408 for my $p (0..$#pairs){
|
|
409
|
|
410 if (!exists $pair{$pairs[$p]}){
|
|
411
|
|
412 if($same_chr){
|
|
413
|
|
414 $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]},
|
|
415 $start1[$p]-1, $end2[$p], $color,
|
|
416 2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ];
|
|
417 $pt{$n}=$pair{$pairs[$p]}->{0};
|
|
418 $n++;
|
|
419
|
|
420 }else{
|
|
421
|
|
422 $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]},
|
|
423 $start1[$p]-1, $end1[$p], $color,
|
|
424 1, $tag_length->{$sample}->{$ends_order1[$p]}, 0];
|
|
425 $pt{$n}=$pair{$pairs[$p]}->{1};
|
|
426 $n++;
|
|
427
|
|
428
|
|
429 $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]},
|
|
430 $start2[$p]-1, $end2[$p], $color,
|
|
431 1, $tag_length->{$sample}->{$ends_order2[$p]}, 0];
|
|
432 $pt{$n}=$pair{$pairs[$p]}->{2};
|
|
433 $n++;
|
|
434 }
|
|
435 }else{
|
|
436
|
|
437 if($same_chr){
|
|
438 ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]});
|
|
439 }else{
|
|
440 ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]});
|
|
441 ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]});
|
|
442 }
|
|
443 }
|
|
444 }
|
|
445 }
|
|
446 close LINKS;
|
|
447
|
|
448 my $nb_pairs=$n-1;
|
|
449
|
|
450 open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
|
|
451 print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ".
|
|
452 "visibility=2 itemRgb=\"On\"\n";
|
|
453
|
|
454 for my $i (1..$nb_pairs){
|
|
455 print BED join("\t",@{$pt{$i}})."\n";
|
|
456 }
|
|
457
|
|
458 close BED;
|
|
459
|
|
460 print LOG "-- output created: $bed_file\n";
|
|
461
|
|
462 undef %pair;
|
|
463 undef %pt;
|
|
464
|
|
465 }
|
|
466 #------------------------------------------------------------------------------#
|
|
467 #------------------------------------------------------------------------------#
|
|
468 sub links2SVfile{
|
|
469
|
|
470 my($links_file,$sv_file)=@_;
|
|
471
|
|
472 print LOG "\# Converting to the sv output table...\n";
|
|
473 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
|
|
474 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
|
|
475
|
|
476 my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist
|
|
477 chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering
|
|
478 final_score breakpoint1_start1-end1 breakpoint2_start2-end2);
|
|
479
|
|
480 my $nb_links=0;
|
|
481
|
|
482 while (<LINKS>){
|
|
483
|
|
484 my @t=split;
|
|
485 my @sv=();
|
|
486 my $sv_type="-";
|
|
487 my $strand_ratio="-";
|
|
488 my $eq_ratio="-";
|
|
489 my $eq_type="-";
|
|
490 my $insert_ratio="-";
|
|
491 my $link="-";
|
|
492 my ($bk1, $bk2)=("-","-");
|
|
493 my $score="-";
|
|
494
|
|
495 my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]);
|
|
496 my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]);
|
|
497 my $nb_pairs=$t[6];
|
|
498 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
|
|
499 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
|
|
500 my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER";
|
|
501
|
|
502 #if strand filtering
|
|
503 if (defined $t[16]){
|
|
504 #if inter-chr link
|
|
505 $sv_type=$t[16];
|
|
506 if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){
|
|
507 $strand_ratio=floor($1/$2*100)."%";
|
|
508 $score=$t[18];
|
|
509 }
|
|
510 if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){
|
|
511 #if intra-chr link with insert size filtering
|
|
512 $strand_ratio=floor($1/$2*100)."%";
|
|
513 $link=floor($t[17]);
|
|
514 if($sv_type!~/^INV/){
|
|
515 $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
|
|
516 $score=$t[20];
|
|
517 }else{
|
|
518 $score=$t[19];
|
|
519 }
|
|
520 }
|
|
521 }
|
|
522
|
|
523 if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){
|
|
524
|
|
525 #if strand and order filtering only and/or interchr link
|
|
526 $eq_type=$t[18];
|
|
527 $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
|
|
528 ($bk1, $bk2)=($t[20],$t[21]);
|
|
529 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
|
|
530 $score=$t[22];
|
|
531
|
|
532 }elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){
|
|
533
|
|
534 #if all three filtering
|
|
535 $link=floor($t[17]);
|
|
536 $eq_type=$t[19];
|
|
537 $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/);
|
|
538
|
|
539 if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){
|
|
540 $insert_ratio=floor($1/$2*100)."%";
|
|
541 ($bk1, $bk2)=($t[22],$t[23]);
|
|
542 $score=$t[24];
|
|
543
|
|
544 }else{
|
|
545 ($bk1, $bk2)=($t[21],$t[22]);
|
|
546 $score=$t[23];
|
|
547 }
|
|
548 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
|
|
549
|
|
550 }
|
|
551
|
|
552
|
|
553 push(@sv, $chr_type, $sv_type,$eq_type);
|
|
554 push(@sv,"$chr1\t$start1-$end1");
|
|
555 push(@sv, $link);
|
|
556 push(@sv,"$chr2\t$start2-$end2",
|
|
557 $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2);
|
|
558
|
|
559
|
|
560 print SV join("\t",@sv)."\n";
|
|
561 }
|
|
562
|
|
563 close LINKS;
|
|
564 close SV;
|
|
565
|
|
566 system "sort -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted";
|
|
567
|
|
568 open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n";
|
|
569 my @links=<SV>;
|
|
570 close SV;
|
|
571
|
|
572 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
|
|
573
|
|
574 print SV join("\t",@header)."\n";
|
|
575 print SV @links;
|
|
576 close SV;
|
|
577
|
|
578 unlink($sv_file.".sorted");
|
|
579
|
|
580 print LOG "-- output created: $sv_file\n";
|
|
581
|
|
582 }
|
|
583 #------------------------------------------------------------------------------#
|
|
584 #------------------------------------------------------------------------------#
|
|
585 sub deleteBadOrderSensePairs{
|
|
586
|
|
587 my (@tab)=@_;
|
|
588 my @tab2;
|
|
589
|
|
590 foreach my $v (@tab){
|
|
591
|
|
592 $v=~s/[\(\)]//g;
|
|
593 push(@tab2,$v) if($v!~/[\$\*\@]$/);
|
|
594 }
|
|
595 return @tab2;
|
|
596 }
|
|
597 #------------------------------------------------------------------------------#
|
|
598 #------------------------------------------------------------------------------#
|
|
599 #gets starts and ends Coords when start=leftmost given positions, directions and orders
|
|
600 sub getCoordswithLeftMost {
|
|
601
|
|
602 my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_;
|
|
603
|
|
604 for my $i (0..scalar(@{$positions})-1) {
|
|
605
|
|
606 if($strand->[$i] eq 'F'){
|
|
607 $starts->[$i]=$positions->[$i];
|
|
608 $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;
|
|
609 }else{
|
|
610 $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;
|
|
611 $ends->[$i]=$positions->[$i];
|
|
612 }
|
|
613 }
|
|
614 }
|
|
615 #------------------------------------------------------------------------------#
|
|
616 #------------------------------------------------------------------------------#
|
|
617 sub floor {
|
|
618 my $nb = $_[0];
|
|
619 $nb=~ s/\..*//;
|
|
620 return $nb;
|
|
621 }
|
|
622 #------------------------------------------------------------------------------#
|
|
623 #------------------------------------------------------------------------------#
|
|
624 sub decimal{
|
|
625
|
|
626 my $num=shift;
|
|
627 my $digs_to_cut=shift;
|
|
628
|
|
629 $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
|
|
630
|
|
631 return $num;
|
|
632 }
|
|
633
|
|
634 #------------------------------------------------------------------------------#
|
|
635 #------------------------------------------------------------------------------#
|
|
636 #Sort links according the concerned chromosomes and their coordinates
|
|
637 sub sortLinks{
|
|
638
|
|
639 my ($links_file,$sortedlinks_file,$unique)=@_;
|
|
640
|
|
641 print LOG "# Sorting links...\n";
|
|
642
|
|
643 my $pipe=($unique)? "| sort -u":"";
|
|
644 system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";
|
|
645
|
|
646 }
|
|
647 #------------------------------------------------------------------------------#
|
|
648 #------------------------------------------------------------------------------#
|
|
649 sub getColor{
|
|
650
|
|
651 my($count,$hcolor,$format)=@_;
|
|
652 for my $col ( keys % { $hcolor} ) {
|
|
653 return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);
|
|
654 }
|
|
655 return "white" if($format eq "circos");
|
|
656 return "255,255,255" if($format eq "bed");
|
|
657 }
|
|
658 #------------------------------------------------------------------------------#
|
|
659 #------------------------------------------------------------------------------#
|
|
660 #check if the configuration file is correct
|
|
661 sub validateconfiguration{
|
|
662
|
|
663 my %conf=%{$_[0]};
|
|
664 my $list_prgs="@ARGV";
|
|
665
|
|
666 my @circos_params=qw(organism_id colorcode);
|
|
667 my @bed_params=qw(colorcode);
|
|
668 my @compare_params=qw(list_samples list_read_lengths sample_link_file reference_link_file);
|
|
669
|
|
670 unless (defined($conf{general}{output_dir})) {
|
|
671 $conf{general}{output_dir} = ".";
|
|
672 }
|
|
673 unless (-d $conf{general}{output_dir}){
|
|
674 mkdir $conf{general}{output_dir} or die;
|
|
675 }
|
|
676 $conf{general}{output_dir}.="/" if($conf{general}{output_dir}!~/\/$/);
|
|
677
|
|
678
|
|
679 if($list_prgs=~/links2compare/){
|
|
680 foreach my $p (@compare_params) {
|
|
681 die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p});
|
|
682 }
|
|
683
|
|
684 unless (defined($conf{compare}{same_sv_type})) {
|
|
685 $conf{compare}{same_sv_type} = 0;
|
|
686 }
|
|
687
|
|
688 unless (defined($conf{compare}{min_overlap})) {
|
|
689 $conf{compare}{min_overlap} = 1E-9;
|
|
690 }
|
|
691
|
|
692 if($conf{compare}{circos_output}){
|
|
693 foreach my $p (@circos_params) {
|
|
694 next if($list_prgs=~/^ratio/ && $p eq "colorcode");
|
|
695 die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
|
|
696 }
|
|
697 }
|
|
698 if($conf{compare}{bed_output}){
|
|
699 foreach my $p (@bed_params) {
|
|
700 die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
|
|
701 }
|
|
702 die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths});
|
|
703
|
|
704 my @samples=split(",",$conf{compare}{list_samples});
|
|
705 my @read_lengths=split(",",$conf{compare}{list_read_lengths});
|
|
706 for my $i (0..$#samples){
|
|
707 my @l=split("-",$read_lengths[$i]);
|
|
708 $conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};
|
|
709 }
|
|
710 }
|
|
711 }
|
|
712
|
|
713
|
|
714 }
|
|
715 #------------------------------------------------------------------------------#
|
|
716 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
|