Mercurial > repos > bzeitouni > svdetect
diff svdetect/SVDetect_compare.pl @ 13:f090bf6ec765 draft
Uploaded
author | bzeitouni |
---|---|
date | Mon, 11 Jun 2012 12:59:11 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/svdetect/SVDetect_compare.pl Mon Jun 11 12:59:11 2012 -0400 @@ -0,0 +1,716 @@ +#!/usr/bin/perl -w + +=pod + +=head1 NAME + +SVDetect Compare for Galaxy + +Version: 0.8 for Galaxy + +=head1 SYNOPSIS + +SVDetect_compare.pl links2compare -conf <configuration_file> [-help] [-man] + +=cut + +# ------------------------------------------------------------------- + +use strict; +use warnings; + +use Pod::Usage; +use Getopt::Long; + +use Config::General; +use Tie::IxHash; + +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# +#PARSE THE COMMAND LINE +my %OPT; +GetOptions(\%OPT, + 'conf=s', + 'out1=s', #GALAXY + 'out2=s', #GALAXY + 'out3=s', #GALAXY + 'out4=s', #GALAXY + 'out5=s', #GALAXY + 'out6=s', #GALAXY + 'out7=s', #GALAXY + 'out8=s', #GALAXY + 'out9=s', #GALAXY + 'l=s', #GALAXY + 'N=s', #GALAXY + 'help', + 'man' + ); + +pod2usage() if $OPT{help}; +pod2usage(-verbose=>2) if $OPT{man}; +pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf}); + + +pod2usage() if(@ARGV<1); + +tie (my %func, 'Tie::IxHash',links2compare=>\&links2compare); + +foreach my $command (@ARGV){ + pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command})); +} +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# + +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# +#READ THE CONFIGURATION FILE +my $conf=Config::General->new( -ConfigFile => $OPT{conf}, + -Tie => "Tie::IxHash", + -AllowMultiOptions => 1, + -LowerCaseNames => 1, + -AutoTrue => 1); +my %CONF= $conf->getall; +validateconfiguration(\%CONF); #validation of the configuration parameters + + +my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY +my $BEDTOOLS_BIN_DIR="/bioinfo/local/BEDTools/bin"; #GALAXY + +my $pt_log_file=$OPT{l}; #GALAXY +my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_compare.log"; #GALAXY +open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY + +my @pt_sv_file=($OPT{out1},$OPT{out2},$OPT{out3}) if($OPT{out1}); #GALAXY common,sample,reference +my @pt_circos_file=($OPT{out4},$OPT{out5},$OPT{out6}) if($OPT{out4}); #GALAXY common,sample,reference +my @pt_bed_file=($OPT{out7},$OPT{out8},$OPT{out9}) if($OPT{out7}); #GALAXY common,sample,reference + +$CONF{compare}{sample_link_file}=readlink($CONF{compare}{sample_link_file});#GALAXY +$CONF{compare}{sample_link_file}=~s/.sv.txt//; #GALAXY + +$CONF{compare}{reference_link_file}=readlink($CONF{compare}{reference_link_file});#GALAXY +$CONF{compare}{reference_link_file}=~s/.sv.txt//; #GALAXY + +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# + +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# +#COMMAND EXECUTION +foreach my $command (@ARGV){ + &{$func{$command}}(); +} +print LOG "-- end\n"; + +close LOG;#GALAXY +system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY + +exit(0); +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# + +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# +#FUNCTIONS + +# -----------------------------------------------------------------------------# +#MAIN FUNCTION number 5:Comparison between samples, common or specific links +sub links2compare{ + + my @compare_files; + + compareSamples($CONF{general}{output_dir}, + $CONF{compare}{list_samples}, + $CONF{compare}{sample_link_file}, + $CONF{compare}{reference_link_file}, + $CONF{compare}{min_overlap}, + $CONF{compare}{same_sv_type}, + \@compare_files); + + my $pt_ind=0; + + for my $input_file (@compare_files){ + + $input_file=$CONF{general}{output_dir}.$input_file; + + my $output_file=$input_file; + $output_file=~s/unique$/compared/; + + sortLinks($input_file, $output_file,1); + + if($CONF{compare}{circos_output}){ + links2segdup($CONF{circos}{organism_id}, + $CONF{circos}{colorcode}, + $output_file, + $output_file.".segdup.txt"); + system "rm $pt_circos_file[$pt_ind]; ln -s $output_file.segdup.txt $pt_circos_file[$pt_ind]" if (defined $pt_circos_file[$pt_ind]); #GALAXY + } + + if($CONF{compare}{bed_output}){ + links2bedfile($CONF{compare}{read_lengths}, + $CONF{bed}{colorcode}, + $output_file, + $output_file.".bed"); + system "rm $pt_bed_file[$pt_ind]; ln -s $output_file.bed $pt_bed_file[$pt_ind]" if (defined $pt_bed_file[$pt_ind]); #GALAXY + } + + if($CONF{compare}{sv_output}){ + + links2SVfile ($output_file, $output_file.".sv.txt"); + system "rm $pt_sv_file[$pt_ind]; ln -s $output_file.sv.txt $pt_sv_file[$pt_ind]" if (defined $pt_sv_file[$pt_ind]); #GALAXY + } + $pt_ind++; + + } + unlink(@compare_files); + +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +sub compareSamples{ + + my ($dir,$list_samples,$sample_file,$reference_file,$min_overlap,$same_sv_type,$file_names)=@_; + + my @bedpefiles; + my @list=split(",",$list_samples); + my @list_files=($sample_file,$reference_file); + + print LOG "\# Comparison procedure...\n"; + print LOG "-- samples=$list_samples\n". + "-- minimum overlap=$min_overlap\n". + "-- same SV type=$same_sv_type\n"; + + #conversion of links to bedPE format file + print LOG "-- Conversion of links.filtered files to bedPE format\n"; + for my $s (0..$#list) { + + links2bedPElinksfile($list[$s],$list_files[$s],$list_files[$s].".bedpe.txt"); + push(@bedpefiles,$list_files[$s].".bedpe.txt"); + + } + + #get common links between all samples compared + print LOG "-- Getting common links between all samples with BEDTools\n"; + my $common_name=join(".",@list); + + my $nb=scalar @list; + my $command=""; + my $prompt=">"; + + while ($nb>0){ + + for my $i (0..$#list_files){ + + $command.="$BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a ".$list_files[$i].".bedpe.txt"; + my $pipe=0; + + for my $j ($i+1..$#list_files){ + + $command.="| $BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a stdin" if($pipe); + $command.=" -b ".$list_files[$j].".bedpe.txt"; + $pipe=1; + + } + + $command.=$prompt.$dir.$common_name.".bedpe.tmp;"; + $prompt=">>"; + + my $first=shift(@list_files); push(@list_files,$first); + last; + } + $nb--; + } + + system ($command); + + push(@bedpefiles,$dir.$common_name.".bedpe.tmp"); + + #Post comparison to get common links if same type only (as an option) + open( FILE, "<".$dir.$common_name.".bedpe.tmp") or die "Can't open".$dir.$common_name.".bedpe.tmp : $!"; + open( OUT, ">".$dir.$common_name.".bedpe.unique") or die "Can't write in ".$dir.$common_name.".bedpe.unique : $!"; + + while(<FILE>){ + my @t=split("\t",$_); + my $s=(split("_",$t[6]))[0]; + my ($sv1,$sv2)=($t[7],$t[18]); + splice(@t,11,$#t); + + if($same_sv_type){ + print OUT join("\t",@t)."\n" if($sv1 eq $sv2); + }else{ + print OUT join("\t",@t)."\n"; + } + } + close FILE; + close OUT; + + bedPElinks2linksfile($dir.$common_name.".bedpe.unique", $dir.$common_name.".unique"); + push(@bedpefiles,$dir.$common_name.".bedpe.unique"); + push(@$file_names,$common_name.".unique"); + print LOG "-- output created: ".$dir.$common_name.".compared\n"; + + #get specific links for each sample + print LOG "-- Getting specific links for each sample\n"; + for my $s (0..$#list) { + system("grep -Fxv -f ".$dir.$common_name.".bedpe.unique ".$list_files[$s].".bedpe.txt >".$dir.$list[$s].".bedpe.unique"); + bedPElinks2linksfile($dir.$list[$s].".bedpe.unique",$dir.$list[$s].".unique"); + push(@bedpefiles,$dir.$list[$s].".bedpe.unique"); + push(@$file_names,$list[$s].".unique"); + print LOG "-- output created: ".$dir.$list[$s].".compared\n"; + } + + unlink(@bedpefiles); + +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +#convert the links file to the circos format +sub links2segdup{ + + my($id,$color_code,$links_file,$segdup_file)=@_; + + print LOG "\# Converting to the circos format...\n"; + + tie (my %hcolor,'Tie::IxHash'); #color-code hash table + foreach my $col (keys %{$color_code}){ + my ($min_links,$max_links)=split(",",$color_code->{$col}); + $hcolor{$col}=[$min_links,$max_links]; + } + + open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n"; + open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n"; + + my $index=1; + while(<LINKS>){ + + my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6]; + + my $color=getColor($count,\%hcolor,"circos"); #get the color-code according the number of links + + print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n". #circos output + "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n"; + $index++; + } + + close LINKS; + close SEGDUP; + print LOG "-- output created: $segdup_file\n"; +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +#convert the links file to the bedPE format for BEDTools usage +sub links2bedPElinksfile{ + + my ($sample,$links_file,$bedpe_file)=@_; + + open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n"; + open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n"; + + my $nb_links=1; + + while(<LINKS>){ + + chomp; + my @t=split("\t",$_); + my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6); + my $type=($chr1 eq $chr2)? "INTRA":"INTER"; + $type.="_".$t[10]; + + $start1--; $start2--; + + print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2". + "\t$sample"."_link$nb_links\t$type\t.\t.". + "\t".join("|",@t)."\n"; + + $nb_links++; + } + + close LINKS; + close BEDPE; + +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +sub bedPElinks2linksfile{ + + my ($bedpe_file,$links_file)=@_; + + open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n"; + open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n"; + + while(<BEDPE>){ + + chomp; + my $sample=(split("_",(split("\t",$_))[6]))[0]; + my @t1=(split("\t",$_))[0,1,2,3,4,5]; + my @t2=split(/\|/,(split("\t",$_))[10]); + push(@t2,$sample); + + print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n"; + + } + close BEDPE; + close LINKS; + +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +#convert the links file to the bed format +sub links2bedfile{ + + my ($tag_length,$color_code,$links_file,$bed_file)=@_; + + print LOG "\# Converting to the bed format...\n"; + + my $compare=1; + if($links_file!~/compared$/){ + $compare=0; + $tag_length->{none}->{1}=$tag_length->{1}; + $tag_length->{none}->{2}=$tag_length->{2}; + } + + #color-code hash table + tie (my %hcolor,'Tie::IxHash'); + my %color_order; + $color_order{"255,255,255"}=0; + my $n=1; + foreach my $col (keys %{$color_code}){ + my ($min_links,$max_links)=split(",",$color_code->{$col}); + $hcolor{$col}=[$min_links,$max_links]; + $color_order{$col}=$n; + $n++; + } + + my %pair; + my %pt; + $n=1; + open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n"; + + my %str=( "F"=>"+", "R"=>"-" ); + + while(<LINKS>){ + + my @t=split; + my $sample=($compare)? pop(@t):"none"; + + my $chr1=$t[0]; + my $chr2=$t[3]; + $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i); + $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i); + my $same_chr=($chr1 eq $chr2)? 1:0; + + my $count=$t[6]; + my $color=getColor($count,\%hcolor,"bed"); + + my @pairs=deleteBadOrderSensePairs(split(",",$t[7])); + my @strand1=deleteBadOrderSensePairs(split(",",$t[8])); + my @strand2=deleteBadOrderSensePairs(split(",",$t[9])); + my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10])); + my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11])); + my @position1=deleteBadOrderSensePairs(split(",",$t[14])); + my @position2=deleteBadOrderSensePairs(split(",",$t[15])); + my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample}); + my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample}); + + + for my $p (0..$#pairs){ + + if (!exists $pair{$pairs[$p]}){ + + if($same_chr){ + + $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]}, + $start1[$p]-1, $end2[$p], $color, + 2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ]; + $pt{$n}=$pair{$pairs[$p]}->{0}; + $n++; + + }else{ + + $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]}, + $start1[$p]-1, $end1[$p], $color, + 1, $tag_length->{$sample}->{$ends_order1[$p]}, 0]; + $pt{$n}=$pair{$pairs[$p]}->{1}; + $n++; + + + $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]}, + $start2[$p]-1, $end2[$p], $color, + 1, $tag_length->{$sample}->{$ends_order2[$p]}, 0]; + $pt{$n}=$pair{$pairs[$p]}->{2}; + $n++; + } + }else{ + + if($same_chr){ + ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]}); + }else{ + ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]}); + ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]}); + } + } + } + } + close LINKS; + + my $nb_pairs=$n-1; + + open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n"; + print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ". + "visibility=2 itemRgb=\"On\"\n"; + + for my $i (1..$nb_pairs){ + print BED join("\t",@{$pt{$i}})."\n"; + } + + close BED; + + print LOG "-- output created: $bed_file\n"; + + undef %pair; + undef %pt; + +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +sub links2SVfile{ + + my($links_file,$sv_file)=@_; + + print LOG "\# Converting to the sv output table...\n"; + open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n"; + open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n"; + + my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist + chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering + final_score breakpoint1_start1-end1 breakpoint2_start2-end2); + + my $nb_links=0; + + while (<LINKS>){ + + my @t=split; + my @sv=(); + my $sv_type="-"; + my $strand_ratio="-"; + my $eq_ratio="-"; + my $eq_type="-"; + my $insert_ratio="-"; + my $link="-"; + my ($bk1, $bk2)=("-","-"); + my $score="-"; + + my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]); + my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]); + my $nb_pairs=$t[6]; + $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i); + $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i); + my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER"; + + #if strand filtering + if (defined $t[16]){ + #if inter-chr link + $sv_type=$t[16]; + if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){ + $strand_ratio=floor($1/$2*100)."%"; + $score=$t[18]; + } + if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){ + #if intra-chr link with insert size filtering + $strand_ratio=floor($1/$2*100)."%"; + $link=floor($t[17]); + if($sv_type!~/^INV/){ + $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/); + $score=$t[20]; + }else{ + $score=$t[19]; + } + } + } + + if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){ + + #if strand and order filtering only and/or interchr link + $eq_type=$t[18]; + $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/); + ($bk1, $bk2)=($t[20],$t[21]); + foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;} + $score=$t[22]; + + }elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){ + + #if all three filtering + $link=floor($t[17]); + $eq_type=$t[19]; + $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/); + + if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){ + $insert_ratio=floor($1/$2*100)."%"; + ($bk1, $bk2)=($t[22],$t[23]); + $score=$t[24]; + + }else{ + ($bk1, $bk2)=($t[21],$t[22]); + $score=$t[23]; + } + foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;} + + } + + + push(@sv, $chr_type, $sv_type,$eq_type); + push(@sv,"$chr1\t$start1-$end1"); + push(@sv, $link); + push(@sv,"$chr2\t$start2-$end2", + $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2); + + + print SV join("\t",@sv)."\n"; + } + + close LINKS; + close SV; + + system "sort -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted"; + + open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n"; + my @links=<SV>; + close SV; + + open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n"; + + print SV join("\t",@header)."\n"; + print SV @links; + close SV; + + unlink($sv_file.".sorted"); + + print LOG "-- output created: $sv_file\n"; + +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +sub deleteBadOrderSensePairs{ + + my (@tab)=@_; + my @tab2; + + foreach my $v (@tab){ + + $v=~s/[\(\)]//g; + push(@tab2,$v) if($v!~/[\$\*\@]$/); + } + return @tab2; +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +#gets starts and ends Coords when start=leftmost given positions, directions and orders +sub getCoordswithLeftMost { + + my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_; + + for my $i (0..scalar(@{$positions})-1) { + + if($strand->[$i] eq 'F'){ + $starts->[$i]=$positions->[$i]; + $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1; + }else{ + $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1; + $ends->[$i]=$positions->[$i]; + } + } +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +sub floor { + my $nb = $_[0]; + $nb=~ s/\..*//; + return $nb; +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +sub decimal{ + + my $num=shift; + my $digs_to_cut=shift; + + $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/); + + return $num; +} + +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +#Sort links according the concerned chromosomes and their coordinates +sub sortLinks{ + + my ($links_file,$sortedlinks_file,$unique)=@_; + + print LOG "# Sorting links...\n"; + + my $pipe=($unique)? "| sort -u":""; + system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file"; + +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +sub getColor{ + + my($count,$hcolor,$format)=@_; + for my $col ( keys % { $hcolor} ) { + return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]); + } + return "white" if($format eq "circos"); + return "255,255,255" if($format eq "bed"); +} +#------------------------------------------------------------------------------# +#------------------------------------------------------------------------------# +#check if the configuration file is correct +sub validateconfiguration{ + + my %conf=%{$_[0]}; + my $list_prgs="@ARGV"; + + my @circos_params=qw(organism_id colorcode); + my @bed_params=qw(colorcode); + my @compare_params=qw(list_samples list_read_lengths sample_link_file reference_link_file); + + unless (defined($conf{general}{output_dir})) { + $conf{general}{output_dir} = "."; + } + unless (-d $conf{general}{output_dir}){ + mkdir $conf{general}{output_dir} or die; + } + $conf{general}{output_dir}.="/" if($conf{general}{output_dir}!~/\/$/); + + + if($list_prgs=~/links2compare/){ + foreach my $p (@compare_params) { + die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p}); + } + + unless (defined($conf{compare}{same_sv_type})) { + $conf{compare}{same_sv_type} = 0; + } + + unless (defined($conf{compare}{min_overlap})) { + $conf{compare}{min_overlap} = 1E-9; + } + + if($conf{compare}{circos_output}){ + foreach my $p (@circos_params) { + next if($list_prgs=~/^ratio/ && $p eq "colorcode"); + die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p}); + } + } + if($conf{compare}{bed_output}){ + foreach my $p (@bed_params) { + die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p}); + } + die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths}); + + my @samples=split(",",$conf{compare}{list_samples}); + my @read_lengths=split(",",$conf{compare}{list_read_lengths}); + for my $i (0..$#samples){ + my @l=split("-",$read_lengths[$i]); + $conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]}; + } + } + } + + +} +#------------------------------------------------------------------------------# +#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#