annotate svdetect/SVDetect_run_parallel.pl @ 22:eb65c0fa5542 draft

Uploaded
author bzeitouni
date Thu, 12 Jul 2012 12:19:40 -0400
parents f090bf6ec765
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1 #!/usr/bin/perl -w
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3 =pod
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
4
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
5 =head1 NAME
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
6
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
7 SVDetect - Program designed to the detection of structural variations
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
8 from paired-end/mate-pair sequencing data, compatible with SOLiD and Illumina (>=1.3) reads
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
9
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
10 Version: 0.8 for Galaxy
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
11
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
12 =head1 SYNOPSIS
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
13
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
14 SVDetect <command> -conf <configuration_file> [-help] [-man]
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
15
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
16 Command:
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
17
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
18 linking detection and isolation of links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
19 filtering filtering of links according different parameters
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
20 links2circos links conversion to circos format
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
21 links2bed paired-ends of links converted to bed format (UCSC)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
22 links2SV formatted output to show most significant SVs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
23 cnv calculate copy-number profiles
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
24 ratio2circos ratio conversion to circos density format
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
25 ratio2bedgraph ratio conversion to bedGraph density format (UCSC)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
26
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
27 =head1 DESCRIPTION
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
28
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
29 This is a command-line interface to SVDetect.
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
30
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
31
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
32 =head1 AUTHORS
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
33
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
34 Bruno Zeitouni E<lt>bruno.zeitouni@curie.frE<gt>,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
35 Valentina Boeva E<lt>valentina.boeva@curie.frE<gt>
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
36
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
37 =cut
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
38
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
39 # -------------------------------------------------------------------
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
40
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
41 use strict;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
42 use warnings;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
43
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
44 use Pod::Usage;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
45 use Getopt::Long;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
46
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
47 use Config::General;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
48 use Tie::IxHash;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
49 use FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
50 use Parallel::ForkManager;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
51
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
52 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
53 #PARSE THE COMMAND LINE
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
54 my %OPT;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
55 GetOptions(\%OPT,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
56 'conf=s',
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
57 'out1=s', #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
58 'out2=s', #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
59 'out3=s', #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
60 'out4=s', #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
61 'out5=s', #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
62 'l=s', #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
63 'N=s',#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
64 'help',#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
65 'man'
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
66 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
67
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
68 pod2usage() if $OPT{help};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
69 pod2usage(-verbose=>2) if $OPT{man};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
70 pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
71
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
72 pod2usage() if(@ARGV<1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
73
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
74 tie (my %func, 'Tie::IxHash',linking=>\&createlinks,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
75 filtering=>\&filterlinks,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
76 links2circos=>\&links2circos,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
77 links2bed=>\&links2bed,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
78 links2compare=>\&links2compare,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
79 links2SV=>\&links2SV,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
80 cnv=>\&cnv,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
81 ratio2circos=>\&ratio2circos,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
82 ratio2bedgraph=>\&ratio2bedgraph);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
83
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
84 foreach my $command (@ARGV){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
85 pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command}));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
86 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
87 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
88
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
89
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
90 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
91 #READ THE CONFIGURATION FILE
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
92 my $conf=Config::General->new( -ConfigFile => $OPT{conf},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
93 -Tie => "Tie::IxHash",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
94 -AllowMultiOptions => 1,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
95 -LowerCaseNames => 1,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
96 -AutoTrue => 1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
97 my %CONF= $conf->getall;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
98 validateconfiguration(\%CONF); #validation of the configuration parameters
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
99
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
100 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
101
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
102 my $pt_log_file=$OPT{l}; #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
103 my $pt_links_file=$OPT{out1} if($OPT{out1}); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
104 my $pt_flinks_file=$OPT{out2} if($OPT{out2}); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
105 my $pt_sv_file=$OPT{out3} if($OPT{out3}); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
106 my $pt_circos_file=$OPT{out4} if($OPT{out4}); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
107 my $pt_bed_file=$OPT{out5} if($OPT{out5}); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
108
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
109 $CONF{general}{mates_file}=readlink($CONF{general}{mates_file});#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
110 $CONF{general}{cmap_file}=readlink($CONF{general}{cmap_file});#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
111
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
112 my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_run.log"; #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
113 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
114 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
115
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
116 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
117 #COMMAND EXECUTION
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
118 foreach my $command (@ARGV){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
119 &{$func{$command}}();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
120 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
121 print LOG "-- end\n";#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
122
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
123 close LOG;#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
124 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
125 exit(0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
126 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
127
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
128
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
129 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
130 #FUNCTIONS
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
131 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
132 #MAIN FUNCTION number 1: Detection of links from mate-pairs data
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
133 sub createlinks{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
134
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
135 my %CHR; #main hash table 1: fragments, links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
136 my %CHRID;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
137 my @MATEFILES;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
138
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
139 my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
140 my @path=split(/\//,$output_prefix);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
141 $output_prefix=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
142 my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
143 my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
144
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
145 shearingChromosome(\%CHR, \%CHRID, #making the genomic fragment library with the detection parameters
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
146 $CONF{detection}{window_size},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
147 $CONF{detection}{step_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
148 $CONF{general}{cmap_file});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
149
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
150 if($CONF{detection}{split_mate_file}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
151
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
152 splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
153 $CONF{general}{sv_type},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
154 $CONF{general}{mates_file},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
155 $CONF{general}{input_format},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
156 $CONF{general}{read_lengths}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
157 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
158 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
159
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
160 @MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted mate files already created at $CONF{general}{tmp_dir} :$!";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
161 chomp(@MATEFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
162 print LOG "# Splitted mate files already created.\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
163 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
164
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
165
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
166 #Parallelization of the linking per chromosome for intra + interchrs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
167 my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
168
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
169 foreach my $matefile (@MATEFILES){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
170
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
171 my $pid = $pm->start and next;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
172 getlinks(\%CHR, \%CHRID, $matefile);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
173 $pm->finish;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
174
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
175 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
176 $pm->wait_all_children;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
177
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
178 #Merge the chromosome links file into only one
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
179 my @LINKFILES= qx{ls $tmp_links_prefix*links} or die "# Error: No links files created at $CONF{general}{tmp_dir} :$!";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
180 chomp(@LINKFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
181 catFiles( \@LINKFILES => "$output_prefix.links" );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
182
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
183 system "rm $pt_links_file; ln -s $output_prefix.links $pt_links_file" if (defined $pt_links_file); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
184 print LOG "# Linking end procedure : output created: $output_prefix.links\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
185 #unlink(@LINKFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
186 #unlink(@MATEFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
187
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
188 undef %CHR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
189 undef %CHRID;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
190
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
191 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
192 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
193 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
194 sub getlinks {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
195
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
196 my ($chr,$chrID,$tmp_mates_prefix)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
197
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
198 my $tmp_links_prefix=$tmp_mates_prefix;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
199 $tmp_links_prefix=~s/\/mates\//\/links\//;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
200
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
201 my %PAIR; #main hash table 2: pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
202
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
203 linking($chr,$chrID, \%PAIR, #creation of all links from chromosome coordinates of pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
204 $CONF{general}{read_lengths},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
205 $CONF{detection}{window_size},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
206 $CONF{detection}{step_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
207 $tmp_mates_prefix,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
208 $CONF{general}{input_format},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
209 $CONF{general}{sv_type},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
210 "$tmp_links_prefix.links.mapped"
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
211 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
212
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
213 getUniqueLinks("$tmp_links_prefix.links.mapped", #remove the doublons
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
214 "$tmp_links_prefix.links.unique");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
215
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
216 defineCoordsLinks($chr,$chrID, \%PAIR, #definition of the precise coordinates of links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
217 $CONF{general}{input_format},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
218 $CONF{general}{sv_type},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
219 $CONF{general}{read_lengths},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
220 "$tmp_links_prefix.links.unique",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
221 "$tmp_links_prefix.links.unique_defined");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
222
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
223 sortLinks("$tmp_links_prefix.links.unique_defined", #sorting links from coordinates
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
224 "$tmp_links_prefix.links.sorted");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
225
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
226 removeFullyOverlappedLinks("$tmp_links_prefix.links.sorted", #remove redundant links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
227 "$tmp_links_prefix.links",1); #file output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
228
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
229
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
230 undef %PAIR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
231
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
232 unlink("$tmp_links_prefix.links.mapped",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
233 "$tmp_links_prefix.links.unique",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
234 "$tmp_links_prefix.links.unique_defined",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
235 "$tmp_links_prefix.links.sorted");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
236 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
237 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
238 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
239 sub splitMateFile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
240
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
241 my ($chr,$chrID,$files_list,$output_prefix,$sv_type,$mates_file,$input_format,$tag_length)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
242
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
243 print LOG "# Splitting the mate file \"$mates_file\" for parallel processing...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
244
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
245 my %filesHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
246
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
247 #fichier matefile inter
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
248 if($sv_type=~/^(all|inter)$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
249 my $newFileName="$output_prefix.interchrs";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
250 push(@{$files_list},$newFileName);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
251 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
252 $fh->open(">$newFileName");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
253 $filesHandle{inter}=$fh;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
254 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
255
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
256 #fichiers matefiles intra
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
257 if($sv_type=~/^(all|intra)$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
258 foreach my $k (1..$chr->{nb_chrs}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
259 my $newFileName=$output_prefix.".".$chr->{$k}->{name};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
260 push(@{$files_list},$newFileName);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
261 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
262 $fh->open(">$newFileName");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
263 $filesHandle{$k}=$fh;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
264 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
265 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
266
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
267 if ($mates_file =~ /.gz$/) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
268 open(MATES, "gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
269 }elsif($mates_file =~ /.bam$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
270 open(MATES, "$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
271 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
272 open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
273 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
274
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
275
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
276 while(<MATES>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
277
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
278 my @t=split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
279 my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
280
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
281 next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
282
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
283 next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
284
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
285 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
286
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
287 if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
288 my $fh2print=$filesHandle{inter};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
289 print $fh2print join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
290 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
291
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
292 if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
293 my $fh2print=$filesHandle{$chr_read1};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
294 print $fh2print join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
295
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
296 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
297 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
298
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
299 foreach my $name (keys %filesHandle){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
300 my $fh=$filesHandle{$name};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
301 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
302 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
303
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
304 print LOG "# Splitted mate files of \"$mates_file\" created.\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
305 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
306
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
307
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
308 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
309 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
310 sub filterlinks{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
311
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
312 my %CHR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
313 my %CHRID;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
314 my @LINKFILES;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
315 my @FLINKFILES;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
316
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
317 my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
318 my @path=split(/\//,$output_prefix);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
319 $output_prefix=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
320 my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
321
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
322 createChrHashTables(\%CHR,\%CHRID,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
323 $CONF{general}{cmap_file});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
324
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
325 if($CONF{filtering}{split_link_file}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
326
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
327 splitLinkFile(\%CHR, \%CHRID, \@LINKFILES,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
328 $tmp_links_prefix,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
329 $CONF{general}{sv_type},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
330 "$output_prefix.links",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
331 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
332 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
333
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
334 @LINKFILES=qx{ls $tmp_links_prefix*links} or die "# Error: No splitted link files already created\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
335 chomp(@LINKFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
336 print LOG "# Splitted link files already created.\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
337 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
338
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
339 #Parallelization of the filtering per chromosome for intra + interchrs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
340 my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
341
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
342 foreach my $linkfile (@LINKFILES){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
343
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
344 my $pid = $pm->start and next;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
345 getFilteredlinks(\%CHR, \%CHRID, $linkfile);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
346 $pm->finish;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
347
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
348 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
349 $pm->wait_all_children;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
350
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
351 #Merge the chromosome links file into only one
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
352 @FLINKFILES= qx{ls $tmp_links_prefix*filtered} or die "# Error: No links files created\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
353 chomp(@FLINKFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
354 catFiles( \@FLINKFILES => "$output_prefix.links.filtered" );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
355
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
356 system "rm $pt_flinks_file; ln -s $output_prefix.links.filtered $pt_flinks_file" if (defined $pt_flinks_file); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
357 print LOG"# Filtering end procedure : output created: $output_prefix.links.filtered\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
358
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
359 undef %CHR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
360 undef %CHRID;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
361
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
362 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
363 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
364 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
365 sub splitLinkFile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
366
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
367 my ($chr,$chrID,$files_list,$input_prefix,$sv_type,$link_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
368
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
369 print LOG "# Splitting the link file for parallel processing...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
370
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
371 my %filesHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
372
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
373 #fichier matefile inter
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
374 if($sv_type=~/^(all|inter)$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
375 my $newFileName="$input_prefix.interchrs.links";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
376 push(@{$files_list},$newFileName);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
377 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
378 $fh->open(">$newFileName");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
379 $filesHandle{inter}=$fh;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
380 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
381
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
382 #fichiers matefiles intra
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
383 if($sv_type=~/^(all|intra)$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
384 foreach my $k (1..$chr->{nb_chrs}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
385 my $newFileName=$input_prefix.".".$chr->{$k}->{name}.".links";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
386 push(@{$files_list},$newFileName);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
387 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
388 $fh->open(">$newFileName");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
389 $filesHandle{$k}=$fh;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
390 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
391 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
392
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
393 open LINKS, "<".$link_file or die "$0: can't open ".$link_file.":$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
394 while(<LINKS>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
395
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
396 my @t=split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
397 my ($chr_read1,$chr_read2)=($t[0],$t[3]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
398
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
399 next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
400
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
401 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
402
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
403 if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
404 my $fh2print=$filesHandle{inter};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
405 print $fh2print join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
406 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
407
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
408 if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
409 my $fh2print=$filesHandle{$chr_read1};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
410 print $fh2print join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
411
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
412 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
413 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
414
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
415 foreach my $name (keys %filesHandle){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
416 my $fh=$filesHandle{$name};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
417 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
418 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
419
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
420 print LOG "# Splitted link files created.\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
421 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
422
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
423
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
424 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
425 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
426 #MAIN FUNCTION number 2: Filtering processing
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
427 sub getFilteredlinks {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
428
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
429 my ($chr,$chrID,$tmp_links_prefix)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
430 my %PAIR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
431
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
432 strandFiltering($chr,$chrID,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
433 $CONF{filtering}{nb_pairs_threshold}, #filtering of links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
434 $CONF{filtering}{strand_filtering},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
435 $CONF{filtering}{chromosomes},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
436 $CONF{general}{input_format},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
437 $CONF{general}{cmap_file},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
438 $CONF{general}{mates_orientation},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
439 $CONF{general}{read_lengths},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
440 $tmp_links_prefix,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
441 "$tmp_links_prefix.filtered",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
442 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
443
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
444 if($CONF{filtering}{strand_filtering}){ #re-definition of links coordinates with strand filtering
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
445
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
446 my @tmpfiles;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
447
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
448 rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_unique");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
449
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
450 getUniqueLinks("$tmp_links_prefix.filtered_unique",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
451 "$tmp_links_prefix.filtered");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
452
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
453 push(@tmpfiles,"$tmp_links_prefix.filtered_unique");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
454
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
455 if($CONF{filtering}{order_filtering}){ #filtering using the order
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
456
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
457 rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_ordered");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
458
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
459 orderFiltering($chr,$chrID,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
460 $CONF{filtering}{nb_pairs_threshold},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
461 $CONF{filtering}{nb_pairs_order_threshold},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
462 $CONF{filtering}{mu_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
463 $CONF{filtering}{sigma_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
464 $CONF{general}{mates_orientation},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
465 $CONF{general}{read_lengths},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
466 "$tmp_links_prefix.filtered_ordered",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
467 "$tmp_links_prefix.filtered",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
468 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
469
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
470 push(@tmpfiles,"$tmp_links_prefix.filtered_ordered");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
471 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
472
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
473 if (($CONF{filtering}{insert_size_filtering})&&
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
474 ($CONF{general}{sv_type} ne 'inter')){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
475
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
476 rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_withoutIndelSize");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
477
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
478 addInsertionInfo($chr,$chrID,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
479 $CONF{filtering}{nb_pairs_threshold},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
480 $CONF{filtering}{order_filtering},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
481 $CONF{filtering}{indel_sigma_threshold},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
482 $CONF{filtering}{dup_sigma_threshold},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
483 $CONF{filtering}{singleton_sigma_threshold},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
484 $CONF{filtering}{mu_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
485 $CONF{filtering}{sigma_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
486 $CONF{general}{mates_orientation},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
487 $CONF{general}{read_lengths},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
488 "$tmp_links_prefix.filtered_withoutIndelSize",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
489 "$tmp_links_prefix.filtered"
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
490 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
491
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
492 push(@tmpfiles,"$tmp_links_prefix.filtered_withoutIndelSize");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
493 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
494
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
495 sortLinks("$tmp_links_prefix.filtered",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
496 "$tmp_links_prefix.filtered_sorted");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
497
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
498 removeFullyOverlappedLinks("$tmp_links_prefix.filtered_sorted",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
499 "$tmp_links_prefix.filtered_nodup",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
500 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
501
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
502 postFiltering("$tmp_links_prefix.filtered_nodup",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
503 "$tmp_links_prefix.filtered",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
504 $CONF{filtering}{final_score_threshold});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
505
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
506 push(@tmpfiles,"$tmp_links_prefix.filtered_sorted","$tmp_links_prefix.filtered_nodup");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
507
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
508 unlink(@tmpfiles);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
509
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
510
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
511 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
512 undef %PAIR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
513
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
514 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
515 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
516 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
517 #MAIN FUNCTION number 3: Circos format conversion for links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
518 sub links2circos{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
519
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
520 my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
521 my @path=split(/\//,$input_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
522 $input_file=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
523
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
524 my $output_file.=$input_file.".segdup.txt";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
525
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
526 links2segdup($CONF{circos}{organism_id},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
527 $CONF{circos}{colorcode},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
528 $input_file,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
529 $output_file); #circos file output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
530
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
531 system "rm $pt_circos_file; ln -s $output_file $pt_circos_file" if (defined $pt_circos_file); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
532 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
533 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
534 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
535 #MAIN FUNCTION number 4: Bed format conversion for links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
536 sub links2bed{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
537
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
538 my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
539 my @path=split(/\//,$input_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
540 $input_file=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
541
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
542 my $output_file.=$input_file.".bed.txt";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
543
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
544 links2bedfile($CONF{general}{read_lengths},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
545 $CONF{bed}{colorcode},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
546 $input_file,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
547 $output_file); #bed file output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
548
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
549 system "rm $pt_bed_file; ln -s $output_file $pt_bed_file" if (defined $pt_bed_file); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
550
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
551 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
552 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
553 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
554 #MAIN FUNCTION number 6: Bed format conversion for links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
555 sub links2SV{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
556
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
557 my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
558
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
559 my @path=split(/\//,$input_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
560 $input_file=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
561
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
562 my $output_file.=$input_file.".sv.txt";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
563
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
564
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
565 links2SVfile( $input_file,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
566 $output_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
567
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
568 system "rm $pt_sv_file; ln -s $output_file $pt_sv_file" if (defined $pt_sv_file); #GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
569 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
570 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
571 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
572 #MAIN FUNCTION number 7: copy number variations, coverage ratio calculation
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
573 sub cnv{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
574
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
575 my %CHR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
576 my %CHRID;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
577 my @MATEFILES;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
578 my @MATEFILES_REF;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
579
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
580 my $output_prefix=$CONF{general}{mates_file};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
581 my $output_prefix_ref=$CONF{detection}{mates_file_ref};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
582 my @path=split(/\//,$output_prefix);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
583 my @path_ref=split(/\//,$output_prefix_ref);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
584 $output_prefix=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
585 $output_prefix_ref=$CONF{general}{output_dir}.$path_ref[$#path_ref];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
586 my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
587 my $tmp_mates_prefix_ref=$CONF{general}{tmp_dir}."mates/".$path_ref[$#path_ref];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
588 my $tmp_density_prefix=$CONF{general}{tmp_dir}."density/".$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
589
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
590 shearingChromosome(\%CHR, \%CHRID, #making the genomic fragment library with the detection parameters
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
591 $CONF{detection}{window_size},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
592 $CONF{detection}{step_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
593 $CONF{general}{cmap_file});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
594
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
595 if($CONF{detection}{split_mate_file}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
596
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
597 splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
598 "intra",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
599 $CONF{general}{mates_file},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
600 $CONF{general}{input_format},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
601 $CONF{general}{read_lengths}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
602 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
603
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
604 splitMateFile(\%CHR, \%CHRID, \@MATEFILES_REF, $tmp_mates_prefix_ref,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
605 "intra",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
606 $CONF{detection}{mates_file_ref},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
607 $CONF{general}{input_format},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
608 $CONF{general}{read_lengths}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
609 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
610
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
611
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
612 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
613
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
614 @MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted sample mate files of \"$CONF{general}{mates_file}\" already created at $CONF{general}{tmp_dir} :$!";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
615 chomp(@MATEFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
616 @MATEFILES_REF=qx{ls $tmp_mates_prefix_ref*} or die "# Error: No splitted reference mate files of \"$CONF{detection}{mates_file_ref}\" already created at $CONF{general}{tmp_dir} :$!";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
617 chomp(@MATEFILES_REF);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
618 print LOG "# Splitted sample and reference mate files already created.\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
619 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
620
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
621 #Parallelization of the cnv per chromosome
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
622 my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
623
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
624 foreach my $file (0..$#MATEFILES){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
625
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
626 my $pid = $pm->start and next;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
627
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
628 densityCalculation(\%CHR, \%CHRID, $file,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
629 $CONF{general}{read_lengths},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
630 $CONF{detection}{window_size},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
631 $CONF{detection}{step_length},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
632 \@MATEFILES,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
633 \@MATEFILES_REF,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
634 $MATEFILES[$file].".density",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
635 $CONF{general}{input_format});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
636
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
637 $pm->finish;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
638
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
639 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
640 $pm->wait_all_children;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
641
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
642 #Merge the chromosome links file into only one
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
643 my @DENSITYFILES= qx{ls $tmp_density_prefix*density} or die "# Error: No density files created at $CONF{general}{tmp_dir} :$!";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
644 chomp(@DENSITYFILES);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
645 catFiles( \@DENSITYFILES => "$output_prefix.density" );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
646
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
647 print LOG "# cnv end procedure : output created: $output_prefix.density\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
648
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
649
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
650 undef %CHR;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
651 undef %CHRID;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
652
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
653 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
654 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
655 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
656 #MAIN FUNCTION number 8: Circos format conversion for cnv ratios
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
657 sub ratio2circos{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
658
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
659 my $input_file=$CONF{general}{mates_file}.".density";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
660
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
661 my @path=split(/\//,$input_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
662 $input_file=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
663
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
664 my $output_file.=$input_file.".segdup.txt";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
665
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
666 ratio2segdup($CONF{circos}{organism_id},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
667 $input_file,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
668 $output_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
669 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
670 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
671 #MAIN FUNCTION number 9: BedGraph format conversion for cnv ratios
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
672 sub ratio2bedgraph{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
673
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
674 my $input_file=$CONF{general}{mates_file}.".density";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
675
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
676 my @path=split(/\//,$input_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
677 $input_file=$CONF{general}{output_dir}.$path[$#path];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
678
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
679 my $output_file.=$input_file.".bedgraph.txt";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
680
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
681 ratio2bedfile($input_file,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
682 $output_file); #bed file output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
683 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
684 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
685 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
686 #Creation of the fragment library
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
687 sub shearingChromosome{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
688
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
689 print LOG "# Making the fragments library...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
690
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
691 my ($chr,$chrID,$window,$step,$cmap_file)=@_; #window and step sizes parameters
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
692
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
693 createChrHashTables($chr,$chrID,$cmap_file); #hash tables: chromosome ID <=> chromsomes Name
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
694
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
695 foreach my $k (1..$chr->{nb_chrs}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
696
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
697 print LOG"-- $chr->{$k}->{name}\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
698
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
699 my $frag=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
700 for (my $start=0; $start<$chr->{$k}->{length}; $start+=$step){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
701
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
702 my $end=($start<($chr->{$k}->{length})-$window)? $start+$window-1:($chr->{$k}->{length})-1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
703 $chr->{$k}->{$frag}=[$start,$end]; #creation of fragments, coordinates storage
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
704
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
705 if($end==($chr->{$k}->{length})-1){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
706 $chr->{$k}->{nb_frag}=$frag; #nb of fragments per chromosome
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
707 last;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
708 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
709 $frag++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
710 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
711 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
712 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
713 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
714 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
715 #Creation of chromosome hash tables from the cmap file
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
716 sub createChrHashTables{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
717
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
718 my ($chr,$chrID,$cmap_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
719 $chr->{nb_chrs}=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
720
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
721 open CMAP, "<".$cmap_file or die "$0: can't open ".$cmap_file.":$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
722 while(<CMAP>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
723
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
724 if(/^\s+$/){ next;}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
725 my ($k,$name,$length) = split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
726 $chr->{$k}->{name}=$name;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
727 $chr->{$k}->{length}=$length;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
728 $chrID->{$name}=$k;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
729 $chr->{nb_chrs}++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
730
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
731 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
732 close CMAP;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
733 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
734 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
735 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
736 #Read the mate file according the input format file (solid, eland or sam)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
737 sub readMateFile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
738
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
739 my ($chr1,$chr2,$pos1,$pos2,$order1,$order2,$t,$file_type,$tag_length)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
740 my ($strand1,$strand2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
741
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
742 if($file_type eq "solid"){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
743
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
744 ($$chr1,$$chr2,$$pos1,$$pos2,$$order1,$$order2)=($$t[6],$$t[7],$$t[8]+1,$$t[9]+1,1,2); #0-based
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
745
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
746 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
747 my ($tag_length1,$tag_length2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
748 ($$chr1,$$chr2,$$pos1,$strand1,$$pos2,$strand2,$$order1,$$order2,$tag_length1,$tag_length2)=($$t[11],$$t[12],$$t[7],$$t[8],$$t[9],$$t[10],1,2,length($$t[1]),length($$t[2])) #1-based
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
749 if($file_type eq "eland");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
750
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
751 if($file_type eq "sam"){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
752
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
753 return 0 if ($$t[0]=~/^@/); #header sam filtered out
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
754
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
755 ($$chr1,$$chr2,$$pos1,$$pos2)=($$t[2],$$t[6],$$t[3],$$t[7]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
756
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
757 return 0 if ($$chr1 eq "*" || $$chr2 eq "*");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
758
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
759 $$chr2=$$chr1 if($$chr2 eq "=");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
760
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
761 $strand1 = (($$t[1]&0x0010))? 'R':'F';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
762 $strand2 = (($$t[1]&0x0020))? 'R':'F';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
763
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
764 $$order1= (($$t[1]&0x0040))? '1':'2';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
765 $$order2= (($$t[1]&0x0080))? '1':'2';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
766 $tag_length1 = $tag_length->{$$order1};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
767 $tag_length2 = $tag_length->{$$order2};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
768 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
769
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
770 $$pos1 = -($$pos1+$tag_length1) if ($strand1 eq "R"); #get sequencing starts
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
771 $$pos2 = -($$pos2+$tag_length2) if ($strand2 eq "R");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
772 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
773 return 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
774 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
775 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
776 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
777 #Parsing of the mates files and creation of links between 2 chromosomal fragments
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
778 sub linking{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
779
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
780 my ($chr,$chrID,$pair,$tag_length,$window_dist,$step,$mates_file,$input_format,$sv_type,$links_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
781 my %link;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
782
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
783 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
784 my $nb_links=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
785 my $warn=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
786
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
787 my @sfile=split(/\./,$mates_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
788 my $fchr=$sfile[$#sfile];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
789
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
790 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
791
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
792 print LOG "# $fchr : Linking procedure...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
793 print LOG "-- file=$mates_file\n".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
794 "-- chromosome=$fchr\n".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
795 "-- input format=$input_format\n".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
796 "-- type=$sv_type\n".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
797 "-- read1 length=$tag_length->{1}, read2 length=$tag_length->{2}\n".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
798 "-- window size=$window_dist, step length=$step\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
799
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
800 if ($mates_file =~ /.gz$/) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
801 $fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
802 }elsif($mates_file =~ /.bam$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
803 $fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
804 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
805 $fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
806 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
807
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
808
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
809 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
810
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
811 my @t=split; #for each mate-pair
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
812 my $mate=$t[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
813 my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
814
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
815 next if(exists $$pair{$mate});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
816
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
817 next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
818
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
819 next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
820
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
821 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
822
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
823 if($sv_type ne "all"){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
824 if( ($sv_type eq "inter") && ($chr_read1 ne $chr_read2) ||
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
825 ($sv_type eq "intra") && ($chr_read1 eq $chr_read2) ){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
826 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
827 next;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
828 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
829 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
830
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
831 $$pair{$mate}=[$chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2 ]; #fill out the hash pair table (ready for the defineCoordsLinks function)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
832
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
833 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
834
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
835 my ($coord_start_read1,$coord_end_read1,$coord_start_read2,$coord_end_read2); #get the coordinates of each read
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
836
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
837 recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
838 recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
839
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
840 for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){ #fast genome parsing for link creation
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
841
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
842 if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
843
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
844 if(overlap($coord_start_read1,$coord_end_read1,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1])){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
845
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
846 for(my $j=1;$j<=$chr->{$chr_read2}->{'nb_frag'};$j++){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
847
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
848 if (abs ($coord_start_read2-${$chr->{$chr_read2}->{$j}}[0]) <= $window_dist) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
849
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
850 if(overlap($coord_start_read2,$coord_end_read2,${$chr->{$chr_read2}->{$j}}[0],${$chr->{$chr_read2}->{$j}}[1])){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
851
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
852 makeLink(\%link,$chr_read1,$i,$chr_read2,$j,$mate,\$nb_links); #make the link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
853 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
854
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
855 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
856
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
857 $j=getNextFrag($coord_start_read2,$j,${$chr->{$chr_read2}->{$j}}[0],$chr->{$chr_read2}->{nb_frag},$window_dist,$step);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
858 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
859 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
860 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
861
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
862 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
863
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
864 $i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
865 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
866 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
867
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
868 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
869 print LOG "-- $fchr : $warn mate-pairs analysed - $nb_links links done\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
870 $warn+=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
871 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
872 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
873 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
874
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
875 if(!$nb_links){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
876 print LOG "-- $fchr : No mate-pairs !\n".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
877 "-- $fchr : No links have been found with the selected type of structural variations \($sv_type\)\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
878 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
879
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
880 print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_links links done\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
881
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
882 print LOG "-- $fchr : writing...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
883
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
884 $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
885
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
886 $fh->open(">".$links_file) or die "$0: can't write in the output ".$links_file." :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
887
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
888 foreach my $chr1 ( sort { $a <=> $b} keys %link){ #Sorted links output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
889
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
890 foreach my $chr2 ( sort { $a <=> $b} keys %{$link{$chr1}}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
891
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
892 foreach my $frag1 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
893
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
894 foreach my $frag2 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}{$frag1}}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
895
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
896 my @count=split(",",$link{$chr1}{$chr2}{$frag1}{$frag2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
897 print $fh "$chr->{$chr1}->{name}\t".(${$chr->{$chr1}->{$frag1}}[0]+1)."\t".(${$chr->{$chr1}->{$frag1}}[1]+1)."\t".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
898 "$chr->{$chr2}->{name}\t".(${$chr->{$chr2}->{$frag2}}[0]+1)."\t".(${$chr->{$chr2}->{$frag2}}[1]+1)."\t".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
899 scalar @count."\t". #nb of read
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
900 $link{$chr1}{$chr2}{$frag1}{$frag2}."\n"; #mate list
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
901 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
902 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
903 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
904 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
905
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
906 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
907
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
908 undef %link;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
909
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
910 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
911 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
912 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
913 #remove exact links doublons according to the mate list
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
914 sub getUniqueLinks{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
915
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
916 my ($links_file,$nrlinks_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
917 my %links;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
918 my %pt;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
919 my $nb_links;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
920 my $n=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
921
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
922 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
923 my $warn=300000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
924
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
925 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
926 my $fchr=$sfile[$#sfile-2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
927
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
928 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
929
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
930 print LOG "# $fchr : Getting unique links...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
931 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
932
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
933 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
934
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
935 my @t=split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
936 my $mates=$t[7];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
937 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
938
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
939 if(!exists $links{$mates}){ #Unique links selection
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
940
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
941 $links{$mates}=[@t];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
942 $pt{$n}=$links{$mates};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
943 $n++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
944
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
945
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
946 }else{ #get the link coordinates from the mate-pairs list
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
947
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
948 for my $i (1,2,4,5){ #get the shortest regions
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
949
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
950 $links{$mates}->[$i]=($t[$i]>$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i] #maximum start
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
951 if($i==1 || $i==4);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
952 $links{$mates}->[$i]=($t[$i]<$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i] #minimum end
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
953 if($i==2 || $i==5);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
954 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
955 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
956 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
957 print LOG "-- $fchr : $warn links analysed - ".($n-1)." unique links done\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
958 $warn+=300000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
959 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
960 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
961 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
962
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
963 $nb_links=$n-1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
964 print LOG "-- $fchr : Total : $record links analysed - $nb_links unique links done\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
965
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
966 $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
967 $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
968 print LOG "-- $fchr : writing...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
969 for my $i (1..$nb_links){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
970
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
971 print $fh join("\t",@{$pt{$i}})."\n"; #all links output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
972 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
973
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
974 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
975
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
976 undef %links;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
977 undef %pt;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
978
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
979 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
980 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
981 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
982 #get the new coordinates of each link from the mate list
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
983 sub defineCoordsLinks{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
984
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
985 my ($chr,$chrID,$pair,$input_format,$sv_type,$tag_length,$links_file,$clinks_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
986
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
987 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
988 my $fchr=$sfile[$#sfile-2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
989
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
990 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
991 my $fh2 = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
992
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
993 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
994 $fh2->open(">$clinks_file") or die "$0: can't write in the output: $clinks_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
995
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
996 print LOG "# $fchr : Defining precise link coordinates...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
997
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
998 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
999 my $warn=100000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1000
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1001 my %coords;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1002 my %strands;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1003 my %order;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1004 my %ends_order;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1005
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1006 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1007
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1008
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1009 my ($col1,$col2)=(1,2); #for an intrachromosomal link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1010 my $diffchr=0; #difference between chr1 and chr2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1011 my ($chr1,$chr2,$mates_list,$npairs)=(split)[0,3,7,8];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1012 ($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1013 if ($chr1 != $chr2){ #for an interchromosomal link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1014 $col1=$col2=0; #no distinction
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1015 $diffchr=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1016 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1017
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1018 my @pairs=split(",",$mates_list);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1019
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1020 $coords{$col1}{$chr1}->{start}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1021 $coords{$col1}{$chr1}->{end}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1022 $coords{$col2}{$chr2}->{start}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1023 $coords{$col2}{$chr2}->{end}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1024 $strands{$col1}{$chr1}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1025 $strands{$col2}{$chr2}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1026 $ends_order{$col1}{$chr1}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1027 $ends_order{$col2}{$chr2}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1028
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1029
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1030 $order{$col1}{$chr1}->{index}->{1}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1031 $order{$col1}{$chr1}->{index}->{2}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1032 $order{$col2}{$chr2}->{index}->{1}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1033 $order{$col2}{$chr2}->{index}->{2}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1034 $order{$col1}{$chr1}->{order}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1035 $order{$col2}{$chr2}->{order}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1036
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1037 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1038
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1039 for my $p (0..$#pairs){ #for each pair
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1040
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1041 my ($coord_start_read1,$coord_end_read1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1042 my ($coord_start_read2,$coord_end_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1043 my $strand_read1=recupCoords(${$$pair{$pairs[$p]}}[2],\$coord_start_read1,\$coord_end_read1,$tag_length->{${$$pair{$pairs[$p]}}[4]},$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1044 my $strand_read2=recupCoords(${$$pair{$pairs[$p]}}[3],\$coord_start_read2,\$coord_end_read2,$tag_length->{${$$pair{$pairs[$p]}}[5]},$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1045
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1046 if(!$diffchr){ #for a intrachromosomal link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1047 if($coord_start_read2<$coord_start_read1){ #get the closer start coordinate for each column
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1048 ($col1,$col2)=(2,1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1049 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1050 ($col1,$col2)=(1,2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1051 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1052 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1053
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1054 push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{start}},$coord_start_read1); #get coords and strands of f3 and r3 reads
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1055 push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{end}},$coord_end_read1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1056 push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{start}},$coord_start_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1057 push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{end}},$coord_end_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1058 push(@{$strands{$col1}{${$$pair{$pairs[$p]}}[0]}},$strand_read1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1059 push(@{$strands{$col2}{${$$pair{$pairs[$p]}}[1]}},$strand_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1060 push(@{$ends_order{$col1}{${$$pair{$pairs[$p]}}[0]}},${$$pair{$pairs[$p]}}[4]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1061 push(@{$ends_order{$col2}{${$$pair{$pairs[$p]}}[1]}},${$$pair{$pairs[$p]}}[5]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1062 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1063
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1064 ($col1,$col2)=(1,2) if(!$diffchr);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1065
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1066 my $coord_start_chr1=min(min(@{$coords{$col1}{$chr1}->{start}}),min(@{$coords{$col1}{$chr1}->{end}})); #get the biggest region
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1067 my $coord_end_chr1=max(max(@{$coords{$col1}{$chr1}->{start}}),max(@{$coords{$col1}{$chr1}->{end}}));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1068 my $coord_start_chr2=min(min(@{$coords{$col2}{$chr2}->{start}}),min(@{$coords{$col2}{$chr2}->{end}}));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1069 my $coord_end_chr2=max(max(@{$coords{$col2}{$chr2}->{start}}),max(@{$coords{$col2}{$chr2}->{end}}));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1070
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1071 @{$order{$col1}{$chr1}->{index}->{1}}= sort {${$coords{$col1}{$chr1}->{start}}[$a] <=> ${$coords{$col1}{$chr1}->{start}}[$b]} 0 .. $#{$coords{$col1}{$chr1}->{start}};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1072 @{$order{$col2}{$chr2}->{index}->{1}}= sort {${$coords{$col2}{$chr2}->{start}}[$a] <=> ${$coords{$col2}{$chr2}->{start}}[$b]} 0 .. $#{$coords{$col2}{$chr2}->{start}};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1073
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1074 foreach my $i (@{$order{$col1}{$chr1}->{index}->{1}}){ #get the rank of the chr2 reads according to the sorted chr1 reads (start coordinate sorting)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1075 foreach my $j (@{$order{$col2}{$chr2}->{index}->{1}}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1076
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1077 if(${$order{$col1}{$chr1}->{index}->{1}}[$i] == ${$order{$col2}{$chr2}->{index}->{1}}[$j]){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1078 ${$order{$col1}{$chr1}->{index}->{2}}[$i]=$i;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1079 ${$order{$col2}{$chr2}->{index}->{2}}[$i]=$j;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1080 last;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1081 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1082 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1083 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1084
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1085 foreach my $i (@{$order{$col1}{$chr1}->{index}->{2}}){ #use rank chr1 as an ID
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1086 foreach my $j (@{$order{$col2}{$chr2}->{index}->{2}}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1087
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1088 if(${$order{$col1}{$chr1}->{index}->{2}}[$i] == ${$order{$col2}{$chr2}->{index}->{2}}[$j]){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1089 ${$order{$col1}{$chr1}->{order}}[$i]=$i+1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1090 ${$order{$col2}{$chr2}->{order}}[$i]=$j+1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1091 last;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1092 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1093 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1094 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1095
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1096 @pairs=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},\@pairs);#sorting of the pairs, strands, and start coords from the sorted chr2 reads
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1097 @{$strands{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col1}{$chr1});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1098 @{$strands{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col2}{$chr2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1099 @{$ends_order{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col1}{$chr1});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1100 @{$ends_order{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col2}{$chr2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1101 @{$coords{$col1}{$chr1}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col1}{$chr1}->{start});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1102 @{$coords{$col2}{$chr2}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col2}{$chr2}->{start});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1103
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1104
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1105 my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1, #all information output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1106 $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1107 scalar @pairs,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1108 join(",",@pairs),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1109 join(",",@{$strands{$col1}{$chr1}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1110 join(",",@{$strands{$col2}{$chr2}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1111 join(",",@{$ends_order{$col1}{$chr1}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1112 join(",",@{$ends_order{$col2}{$chr2}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1113 join(",",@{$order{$col1}{$chr1}->{order}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1114 join(",",@{$order{$col2}{$chr2}->{order}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1115 join(",",@{$coords{$col1}{$chr1}->{start}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1116 join(",",@{$coords{$col2}{$chr2}->{start}}));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1117
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1118 print $fh2 join("\t",@link)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1119
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1120 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1121 print LOG "-- $fchr : $warn links processed\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1122 $warn+=100000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1123 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1124 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1125 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1126 $fh2->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1127
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1128 print LOG "-- $fchr : Total : $record links processed\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1129
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1130 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1131 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1132 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1133 #Sort links according the concerned chromosomes and their coordinates
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1134 sub sortLinks{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1135
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1136 my ($links_file,$sortedlinks_file,$unique)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1137
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1138 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1139 my $fchr=$sfile[$#sfile-2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1140
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1141
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1142 print LOG "# $fchr : Sorting links...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1143
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1144 my $pipe=($unique)? "| sort -u":"";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1145 system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1146
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1147 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1148 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1149 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1150 #removal of fully overlapped links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1151 sub removeFullyOverlappedLinks{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1152
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1153 my ($links_file,$nrlinks_file,$warn_out)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1154
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1155 my %pt;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1156 my $n=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1157
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1158 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1159 my $fchr=$sfile[$#sfile-2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1160
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1161 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1162
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1163 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1164 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1165
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1166 my @t=split("\t",$_);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1167 $pt{$n}=[@t];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1168 $n++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1169 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1170 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1171
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1172 my $nb_links=$n-1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1173 my $nb=$nb_links;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1174
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1175 my %pt2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1176 my $nb2=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1177 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1178 my $warn=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1179
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1180 print LOG "# $fchr : Removing fully overlapped links...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1181
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1182 LINK:
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1183
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1184 for my $i (1..$nb){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1185
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1186 my @link=();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1187 my @next_link=();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1188 my $ind1=$i;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1189
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1190 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1191 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1192 print LOG "-- $fchr : $warn unique links analysed - ".($nb2-1)." non-overlapped links done\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1193 $warn+=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1194 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1195
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1196 if(exists $pt{$ind1}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1197 @link=@{$pt{$ind1}}; #link1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1198 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1199 next LINK;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1200 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1201
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1202 my ($chr1,$start1,$end1,$chr2,$start2,$end2)=($link[0],$link[1],$link[2],$link[3],$link[4],$link[5]); #get info of link1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1203 my @mates=deleteBadOrderSensePairs(split(",",$link[7]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1204
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1205 my $ind2=$ind1+1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1206 $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb); #get the next found link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1207
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1208 if($ind2<=$nb){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1209
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1210 @next_link=@{$pt{$ind2}}; #link2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1211 my ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]); #get info of link2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1212 my @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1213
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1214 while(($chr1 eq $chr3 && $chr2 eq $chr4) && overlap($start1,$end1,$start3,$end3)){ #loop here according to the chr1 coordinates, need an overlap between links to enter
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1215
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1216 if(!overlap($start2,$end2,$start4,$end4)){ #if no overlap with chr2 coordinates ->next link2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1217
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1218 $ind2++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1219 $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1220
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1221 if($ind2>$nb){ #if no more link in the file -> save link1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1222
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1223 $pt2{$nb2}=\@link;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1224 $nb2++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1225 next LINK;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1226 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1227
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1228 @next_link=@{$pt{$ind2}};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1229 ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1230 @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1231 next;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1232 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1233
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1234 my %mates=map{$_ =>1} @mates; #get the equal number of mates
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1235 my @same_mates = grep( $mates{$_}, @next_mates );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1236 my $nb_mates= scalar @same_mates;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1237
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1238 if($nb_mates == scalar @mates){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1239
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1240 delete $pt{$ind1}; #if pairs of link 1 are all included in link 2 -> delete link1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1241 next LINK; #go to link2, link2 becomes link1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1242
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1243 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1244 delete $pt{$ind2} if($nb_mates == scalar @next_mates); #if pairs of link2 are all included in link 1 -> delete link2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1245 $ind2++; #we continue by checking the next link2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1246 $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1247
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1248 if($ind2>$nb){ #if no more link in the file -> save link1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1249
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1250 $pt2{$nb2}=\@link;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1251 $nb2++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1252 next LINK;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1253 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1254
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1255 @next_link=@{$pt{$ind2}}; #get info of link2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1256 ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1257 @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1258
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1259 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1260 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1261 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1262 $pt2{$nb2}=\@link; #if no (more) link with chr1 coordinates overlap -> save link1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1263 $nb2++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1264 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1265
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1266 print LOG "-- $fchr : Total : $nb_links unique links analysed - ".($nb2-1)." non-overlapped links done\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1267
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1268 #OUTPUT
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1269
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1270 $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1271 $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1272 print LOG "-- $fchr : writing...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1273 for my $i (1..$nb2-1){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1274
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1275 print $fh join("\t",@{$pt2{$i}}); #all links output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1276 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1277
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1278 close $fh;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1279
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1280 print LOG "-- $fchr : output created: $nrlinks_file\n" if($warn_out);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1281
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1282 undef %pt;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1283 undef %pt2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1284 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1285 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1286 sub postFiltering {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1287
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1288 my ($links_file,$pflinks_file, $finalScore_thres)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1289
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1290 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1291 my $fchr=$sfile[$#sfile-2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1292
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1293
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1294 my ($nb,$nb2)=(0,0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1295
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1296 print LOG "# $fchr : Post-filtering links...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1297 print LOG "-- $fchr : final score threshold = $finalScore_thres\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1298
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1299 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1300 my $fh2 = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1301
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1302 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1303 $fh2->open(">$pflinks_file") or die "$0: can't write in the output: $pflinks_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1304
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1305
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1306 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1307
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1308 my @t=split("\t",$_);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1309 my $score=$t[$#t-1];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1310
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1311 if($score >= $finalScore_thres){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1312 print $fh2 join("\t", @t);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1313 $nb2++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1314 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1315 $nb++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1316 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1317 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1318 $fh2->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1319
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1320 print LOG "-- $fchr : Total : $nb unique links analysed - $nb2 links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1321 print LOG "-- $fchr : output created: $pflinks_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1322 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1323
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1324
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1325
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1326 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1327 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1328 #Filtering of the links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1329 sub strandFiltering{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1330
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1331 my($chr,$chrID,$pairs_threshold,$strand_filtering,$chromosomes,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1332 $input_format,$cmap_file,$mate_sense, $tag_length,$links_file,$flinks_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1333
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1334 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1335 my $fchr=$sfile[$#sfile-1];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1336
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1337
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1338 my %chrs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1339 my %chrs1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1340 my %chrs2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1341 my $nb_chrs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1342 my $exclude;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1343
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1344 if($chromosomes){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1345 my @chrs=split(",",$chromosomes);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1346 $nb_chrs=scalar @chrs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1347 $exclude=($chrs[0]=~/^\-/)? 1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1348 for my $chrName (@chrs){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1349 $chrName=~s/^(\-)//;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1350 my $col=($chrName=~s/_(1|2)$//);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1351
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1352 if(!$col){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1353 $chrs{$chrID->{$chrName}}=undef
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1354 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1355 $chrs1{$chrID->{$chrName}}=undef if($1==1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1356 $chrs2{$chrID->{$chrName}}=undef if($1==2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1357 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1358 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1359 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1360
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1361 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1362 my $nb_links=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1363 my $warn=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1364
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1365 my $sens_ratio_threshold=0.6;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1366
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1367 print LOG "\# Filtering procedure...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1368 print LOG "\# Number of pairs and strand filtering...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1369 print LOG "-- file=$links_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1370 print LOG "-- nb_pairs_threshold=$pairs_threshold, strand_filtering=".(($strand_filtering)? "yes":"no").
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1371 ", chromosomes=".(($chromosomes)? "$chromosomes":"all")."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1372
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1373
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1374
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1375 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1376 my $fh2 = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1377
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1378 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1379 $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1380
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1381 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1382
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1383 my @t=split; #for each link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1384 my $is_good=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1385 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1386
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1387
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1388 if($chromosomes){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1389
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1390 my ($chr1,$chr2)=($chrID->{$t[0]},$chrID->{$t[3]});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1391
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1392 if(!$exclude){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1393 $is_good=(exists $chrs{$chr1} && exists $chrs{$chr2})? 1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1394 $is_good=(exists $chrs1{$chr1} && exists $chrs2{$chr2})? 1:0 if(!$is_good);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1395 $is_good=($nb_chrs==1 && (exists $chrs1{$chr1} || exists $chrs2{$chr2}))? 1:0 if(!$is_good);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1396 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1397 $is_good=(exists $chrs{$chr1} || exists $chrs{$chr2})? 0:1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1398 $is_good=(exists $chrs1{$chr1} || exists $chrs2{$chr2})? 0:1 if($is_good);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1399 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1400 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1401
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1402 $is_good = ($is_good && $t[6] >= $pairs_threshold)? 1 :0; #filtering according the number of pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1403 if($is_good && $strand_filtering){ #if filtering according the strand sense
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1404
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1405 my @mates=split(/,/,$t[7]); #get the concordant pairs in the strand sense
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1406 my @strands1=split(/,/,$t[8]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1407 my @strands2=split(/,/,$t[9]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1408
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1409 my %mate_class=( 'FF' => 0, 'RR' => 0, 'FR' => 0, 'RF' => 0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1410
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1411 my %mate_reverse=( 'FF' => 'RR', 'RR' => 'FF', #group1: FF,RR
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1412 'FR' => 'RF', 'RF' => 'FR'); #group2: FR,RF
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1413
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1414 my %mate_class2=( $mate_sense=>"NORMAL_SENSE", inverseSense($mate_sense)=>"NORMAL_SENSE",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1415 substr($mate_sense,0,1).inverseSense(substr($mate_sense,1,1))=>"REVERSE_SENSE",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1416 inverseSense(substr($mate_sense,0,1)).substr($mate_sense,1,1)=>"REVERSE_SENSE");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1417
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1418 if($t[6] == 1){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1419
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1420 push(@t,$mate_class2{$strands1[0].$strands2[0]},"1/1",1,1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1421
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1422 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1423
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1424 tie (my %class,'Tie::IxHash');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1425 my $split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1426
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1427 foreach my $i (0..$#mates){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1428 $mate_class{$strands1[$i].$strands2[$i]}++; #get the over-represented group
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1429 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1430
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1431 my $nb_same_sens_class=$mate_class{FF}+$mate_class{RR};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1432 my $nb_diff_sens_class=$mate_class{FR}+$mate_class{RF};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1433 my $sens_ratio=max($nb_same_sens_class,$nb_diff_sens_class)/($nb_same_sens_class+$nb_diff_sens_class);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1434
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1435 if($sens_ratio < $sens_ratio_threshold){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1436 %class=(1=>'FF', 2=>'FR');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1437 $split=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1438 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1439 $class{1}=($nb_same_sens_class > $nb_diff_sens_class)? 'FF':'FR'; #if yes get the concerned class
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1440 $split=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1441 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1442
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1443 $is_good=getConsistentSenseLinks(\@t,\@mates,\@strands1,\@strands2,$tag_length,$mate_sense,\%mate_reverse,\%mate_class2,\%class,$split,$pairs_threshold);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1444 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1445 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1446
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1447 if($is_good){ #PRINT
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1448
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1449 my $nb=scalar @t;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1450 if($nb > 20){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1451 my @t2=splice(@t,0,20);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1452 print $fh2 join("\t",@t2)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1453 $nb_links++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1454 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1455 $nb_links++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1456 print $fh2 join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1457 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1458
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1459 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1460 print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1461 $warn+=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1462 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1463 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1464 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1465 $fh2->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1466
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1467 print LOG "-- $fchr : No links have been found with the selected filtering parameters\n" if(!$nb_links);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1468
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1469 print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1470
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1471
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1472 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1473 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1474 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1475 sub getConsistentSenseLinks{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1476
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1477 my ($t,$mates,$strands1,$strands2,$tag_length,$mate_sense, $mate_reverse,$mate_class2, $class, $split,$thres)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1478
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1479 my $npairs=scalar @$mates;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1480
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1481 my @ends_order1 = split (/,/,$$t[10]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1482 my @ends_order2 = split (/,/,$$t[11]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1483 my @order1 = split (/,/,$$t[12]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1484 my @order2 = split (/,/,$$t[13]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1485 my @positions1 = split (/,/,$$t[14]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1486 my @positions2 = split (/,/,$$t[15]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1487
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1488 my @newlink;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1489
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1490 foreach my $ind (keys %{$class} ){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1491
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1492 tie (my %flink,'Tie::IxHash');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1493 my @orders2remove=();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1494
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1495 foreach my $i (0..$#{$mates}){ #get the pairs belonging the over-represented group
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1496
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1497 if((($$strands1[$i].$$strands2[$i]) eq $$class{$ind}) || (($$strands1[$i].$$strands2[$i]) eq $$mate_reverse{$$class{$ind}})){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1498 push(@{$flink{mates}},$$mates[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1499 push(@{$flink{strands1}},$$strands1[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1500 push(@{$flink{strands2}},$$strands2[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1501 push(@{$flink{ends_order1}},$ends_order1[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1502 push(@{$flink{ends_order2}},$ends_order2[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1503 push(@{$flink{positions1}},$positions1[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1504 push(@{$flink{positions2}},$positions2[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1505
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1506 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1507 push(@orders2remove,$order1[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1508 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1509 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1510
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1511 @{$flink{order1}}=();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1512 @{$flink{order2}}=();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1513 if(scalar @orders2remove > 0){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1514 getNewOrders(\@order1,\@order2,\@orders2remove,$flink{order1},$flink{order2})
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1515 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1516 @{$flink{order1}}=@order1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1517 @{$flink{order2}}=@order2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1518 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1519
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1520 my @ends1; getEnds(\@ends1,$flink{positions1},$flink{strands1},$flink{ends_order1},$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1521 my @ends2; getEnds(\@ends2,$flink{positions2},$flink{strands2},$flink{ends_order2},$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1522
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1523 my $fnpairs=scalar @{$flink{mates}};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1524 my $strand_filtering_ratio=$fnpairs."/".$npairs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1525 my $real_ratio=$fnpairs/$npairs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1526
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1527 if($fnpairs>=$thres){ #filtering according the number of pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1528
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1529 push(@newlink,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1530 $$t[0],
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1531 min(min(@{$flink{positions1}}),min(@ends1)),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1532 max(max(@{$flink{positions1}}),max(@ends1)),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1533 $$t[3],
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1534 min(min(@{$flink{positions2}}),min(@ends2)),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1535 max(max(@{$flink{positions2}}),max(@ends2)),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1536 $fnpairs,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1537 join(",",@{$flink{mates}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1538 join(",",@{$flink{strands1}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1539 join(",",@{$flink{strands2}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1540 join(",",@{$flink{ends_order1}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1541 join(",",@{$flink{ends_order2}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1542 join(",",@{$flink{order1}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1543 join(",",@{$flink{order2}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1544 join(",",@{$flink{positions1}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1545 join(",",@{$flink{positions2}}),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1546 $$mate_class2{${$flink{strands1}}[0].${$flink{strands2}}[0]},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1547 $strand_filtering_ratio,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1548 $real_ratio,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1549 $npairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1550 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1551 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1552 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1553
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1554 if (grep {defined($_)} @newlink) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1555 @$t=@newlink;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1556 return 1
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1557 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1558 return 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1559
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1560 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1561 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1562 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1563 sub getNewOrders{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1564
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1565 my($tab1,$tab2,$list,$newtab1,$newtab2)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1566 my $j=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1567 my $k=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1568 for my $i (0..$#{$tab2}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1569 my $c=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1570 for my $j (0..$#{$list}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1571 $c++ if(${$list}[$j] < ${$tab2}[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1572 if(${$list}[$j] == ${$tab2}[$i]){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1573 $c=-1; last;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1574 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1575 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1576 if($c!=-1){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1577 push(@{$newtab2}, ${$tab2}[$i]-$c);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1578 push(@{$newtab1}, $k);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1579 $k++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1580 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1581 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1582 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1583
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1584 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1585 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1586 #Filtering of the links using their order
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1587 sub orderFiltering {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1588
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1589 my ($chr,$chrID,$nb_pairs_threshold,$nb_pairs_order_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1590
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1591 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1592 my $fchr=$sfile[$#sfile-2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1593
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1594
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1595 my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1596
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1597 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1598 my $warn=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1599 my $nb_links=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1600
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1601 my $quant05 = 1.644854;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1602 my $quant001 = 3.090232;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1603 my $alphaDist = $quant05 * 2 * $sigma;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1604 my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1605
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1606 print LOG "\# Filtering by order...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1607 print LOG "-- mu length=$mu, sigma length=$sigma, nb pairs order threshold=$nb_pairs_order_threshold\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1608 print LOG "-- distance between comparable pairs was set to $alphaDist\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1609 print LOG "-- maximal fragment length was set to $maxFragmentLength\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1610
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1611
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1612 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1613 my $fh2 = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1614
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1615 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1616 $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1617
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1618 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1619
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1620 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1621 my @t = split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1622 my ($chr1,$chr2,$mates_list)=@t[0,3,7];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1623 my @pairs=split(",",$mates_list);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1624 ($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1625 my ($coord_start_chr1,$coord_end_chr1,$coord_start_chr2,$coord_end_chr2) = @t[1,2,4,5];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1626 my $numberOfPairs = $t[6];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1627 my @strand1 = split (/,/,$t[8]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1628 my @strand2 = split (/,/,$t[9]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1629 my @ends_order1 = split (/,/,$t[10]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1630 my @ends_order2 = split (/,/,$t[11]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1631 my @order1 = split (/,/,$t[12]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1632 my @order2 = split (/,/,$t[13]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1633 my @positions1 = split (/,/,$t[14]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1634 my @positions2 = split (/,/,$t[15]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1635 my @ends1; getEnds(\@ends1,\@positions1,\@strand1,\@ends_order1,$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1636 my @ends2; getEnds(\@ends2,\@positions2,\@strand2,\@ends_order2,$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1637 my $clusterCoordinates_chr1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1638 my $clusterCoordinates_chr2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1639 my $reads_left = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1640
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1641 my $ifRenv = $t[16];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1642 my $strand_ratio_filtering=$t[17];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1643
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1644 #kind of strand filtering. For example, will keep only FFF-RRR from a link FFRF-RRRF if <F-R> orientation is correct
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1645 my ($singleBreakpoint, %badInFRSense) = findBadInFRSenseSOLiDSolexa(\@strand1,\@strand2,\@ends_order1,\@ends_order2,\@order1,\@order2,$mate_sense);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1646 #find pairs type F-RRRR or FFFF-R in the case if <R-F> orientation is correct
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1647 #These pairs are annotated as BED pairs forever! They won't be recycled!
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1648 my $table;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1649 for my $i (0..$numberOfPairs-1) { #fill the table with non adequate pairs: pairID numberOfNonAdPairs nonAdPairIDs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1650 my $nonAdeq = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1651 for my $j (0..$i-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1652 if (exists($table->{$j}->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1653 $nonAdeq++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1654 $table->{$i}->{$j} = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1655 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1656 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1657 for my $j ($i+1..$numberOfPairs-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1658 if ($positions1[$j]-$positions1[$i]>$alphaDist) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1659 if (&reversed ($i,$j,$ifRenv,\@positions2)) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1660 $nonAdeq++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1661 $table->{$i}->{$j} = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1662 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1663 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1664 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1665 $table->{$i}->{nonAdeq} = $nonAdeq;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1666 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1667
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1668 for my $bad (keys %badInFRSense) { #remove pairs type F-RRRR or FFFF-R in the case of <R-F> orientation
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1669 &remove($bad,$table);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1670 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1671
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1672 my @falseReads;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1673 #RRRR-F -> RRRR or R-FFFF -> FFFF
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1674 @falseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense, keys %{$table});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1675 #these pairs will be recycled later as $secondTable
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1676 for my $bad (@falseReads) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1677 &remove($bad,$table);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1678 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1679
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1680 my $bad = &check($table);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1681 while ($bad ne "OK") { #clear the table to reject non adequate pairs in the sense of ORDER
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1682 # push (@falseReads, $bad); remove completely!!!
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1683 &remove($bad,$table);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1684 $bad = &check($table);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1685 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1686
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1687 $reads_left = scalar keys %{$table};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1688 my $coord_start_chr1_cluster1 = min(min(@positions1[sort {$a<=>$b} keys %{$table}]),min(@ends1[sort {$a<=>$b} keys %{$table}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1689 my $coord_end_chr1_cluster1 = max(max(@positions1[sort {$a<=>$b} keys %{$table}]),max(@ends1[sort {$a<=>$b} keys %{$table}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1690 my $coord_start_chr2_cluster1 = min(min(@positions2[sort {$a<=>$b} keys %{$table}]),min(@ends2[sort {$a<=>$b} keys %{$table}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1691 my $coord_end_chr2_cluster1 = max(max(@positions2[sort {$a<=>$b} keys %{$table}]),max(@ends2[sort {$a<=>$b} keys %{$table}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1692
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1693 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1694 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1695
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1696 my $ifBalanced = 'UNBAL';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1697 my $secondTable;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1698 my $clusterCoordinates;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1699 my ($break_pont_chr1,$break_pont_chr2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1700
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1701 my $signatureType="";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1702
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1703 my $maxCoord1 =$chr->{$chr1}->{length};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1704 my $maxCoord2 =$chr->{$chr2}->{length};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1705
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1706 if (scalar @falseReads) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1707 @falseReads = sort @falseReads;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1708 #now delete FRFR choosing the majority
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1709 my @newfalseReads; #find and remove pairs type RRRR-F or R-FFFF
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1710 @newfalseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense,@falseReads); #these @newfalseReads won't be recycled
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1711 my %hashTmp;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1712 for my $count1 (0..scalar(@falseReads)-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1713 my $i = $falseReads[$count1];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1714 $hashTmp{$i} = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1715 for my $bad (@newfalseReads) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1716 if ($bad == $i) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1717 delete $hashTmp{$i};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1718 next;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1719 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1720 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1721 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1722 @falseReads = sort keys %hashTmp; #what is left
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1723 for my $count1 (0..scalar(@falseReads)-1) { #fill the table for reads which were previously rejected
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1724 my $nonAdeq = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1725 my $i = $falseReads[$count1];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1726
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1727 for my $count2 (0..$count1-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1728 my $j = $falseReads[$count2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1729 if (exists($secondTable->{$j}->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1730 $nonAdeq++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1731 $secondTable->{$i}->{$j} = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1732 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1733 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1734 for my $count2 ($count1+1..scalar(@falseReads)-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1735 my $j = $falseReads[$count2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1736 if ($positions1[$j]-$positions1[$i]>$alphaDist) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1737 if (&reversed ($i,$j,$ifRenv,\@positions2)) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1738 $nonAdeq++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1739 $secondTable->{$i}->{$j} = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1740 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1741 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1742 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1743 $secondTable->{$i}->{nonAdeq} = $nonAdeq;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1744 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1745
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1746 my @falseReads2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1747 my $bad = &check($secondTable);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1748 while ($bad ne "OK") { #clear the table to reject non adequate pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1749 push (@falseReads2, $bad);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1750 &remove($bad,$secondTable);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1751 $bad = &check($secondTable);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1752 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1753 if (scalar keys %{$secondTable} >= $nb_pairs_order_threshold) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1754 my $coord_start_chr1_cluster2 = min(min(@positions1[sort {$a<=>$b} keys %{$secondTable}]),min(@ends1[sort {$a<=>$b} keys %{$secondTable}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1755 my $coord_end_chr1_cluster2 = max(max(@positions1[sort {$a<=>$b} keys %{$secondTable}]),max(@ends1[sort {$a<=>$b} keys %{$secondTable}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1756 my $coord_start_chr2_cluster2 = min(min(@positions2[sort {$a<=>$b} keys %{$secondTable}]),min(@ends2[sort {$a<=>$b} keys %{$secondTable}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1757 my $coord_end_chr2_cluster2 = max(max(@positions2[sort {$a<=>$b} keys %{$secondTable}]),max(@ends2[sort {$a<=>$b} keys %{$secondTable}]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1758
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1759 $ifBalanced = 'BAL';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1760
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1761 if ($ifBalanced eq 'BAL') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1762
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1763 if (scalar keys %{$table} < $nb_pairs_order_threshold) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1764 $ifBalanced = 'UNBAL'; #kill cluster 1!
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1765 ($table,$secondTable)=($secondTable,$table); #this means that one needs to exchange cluster1 with cluster2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1766 $reads_left = scalar keys %{$table};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1767 $coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1768 $coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1769 $coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1770 $coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1771 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1772 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1773
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1774 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1775
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1776 $reads_left += scalar keys %{$secondTable};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1777 next if ($reads_left < $nb_pairs_threshold);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1778
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1779 if ($coord_end_chr1_cluster2 < $coord_start_chr1_cluster1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1780 ($table,$secondTable)=($secondTable,$table); #this means that one needs to exchange cluster1 with cluster2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1781
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1782 ($coord_start_chr1_cluster1,$coord_start_chr1_cluster2) = ($coord_start_chr1_cluster2,$coord_start_chr1_cluster1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1783 ($coord_end_chr1_cluster1,$coord_end_chr1_cluster2)=($coord_end_chr1_cluster2,$coord_end_chr1_cluster1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1784 ($coord_start_chr2_cluster1,$coord_start_chr2_cluster2)=($coord_start_chr2_cluster2,$coord_start_chr2_cluster1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1785 ($coord_end_chr2_cluster1 , $coord_end_chr2_cluster2)=($coord_end_chr2_cluster2 , $coord_end_chr2_cluster1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1786
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1787 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.'),'.$clusterCoordinates_chr1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1788 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.'),'.$clusterCoordinates_chr2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1789 }else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1790 $clusterCoordinates_chr1 .= ',('.$coord_start_chr1_cluster2.','.$coord_end_chr1_cluster2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1791 $clusterCoordinates_chr2 .= ',('.$coord_start_chr2_cluster2.','.$coord_end_chr2_cluster2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1792 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1793 $coord_start_chr1 = min($coord_start_chr1_cluster1,$coord_start_chr1_cluster2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1794 $coord_end_chr1 = max($coord_end_chr1_cluster1,$coord_end_chr1_cluster2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1795 $coord_start_chr2 = min($coord_start_chr2_cluster1,$coord_start_chr2_cluster2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1796 $coord_end_chr2 = max($coord_end_chr2_cluster1,$coord_end_chr2_cluster2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1797 #to calculate breakpoints one need to take into account read orientation in claster..
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1798 my $leftLetterOk = substr($mate_sense, 0, 1); #R
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1799 my $rightLetterOk = substr($mate_sense, 1, 1); #F
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1800
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1801
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1802 my @index1 = keys %{$table};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1803 my @index2 = keys %{$secondTable};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1804
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1805 my (@generalStrand1,@generalStrand2) = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1806
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1807 if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1808 $leftLetterOk = 'R';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1809 $rightLetterOk = 'F';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1810 @generalStrand1 = translateSolidToRF(\@strand1,\@ends_order1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1811 @generalStrand2 = translateSolidToRF(\@strand2,\@ends_order2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1812 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1813 @generalStrand1 = @strand1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1814 @generalStrand2 = @strand2; # TODO check if it is correct
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1815 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1816 if ($generalStrand1[$index1[0]] eq $leftLetterOk && $generalStrand1[$index2[0]] eq $rightLetterOk) { #(R,F)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1817 $break_pont_chr1 = '('.$coord_end_chr1_cluster1.','.$coord_start_chr1_cluster2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1818
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1819 if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1820 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1821 $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1822 $signatureType = "TRANSLOC";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1823 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1824 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1825 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1826 $signatureType = "INS_FRAGMT";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1827 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1828
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1829 } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1830 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1831 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1832 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1833 $signatureType = "INV_INS_FRAGMT";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1834 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1835 $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1836 $signatureType = "INV_TRANSLOC";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1837 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1838 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1839 #should not occur
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1840 print STDERR "\nError in orderFiltering\n\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1841 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1842 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1843
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1844 elsif ($generalStrand1[$index1[0]] eq $rightLetterOk && $generalStrand1[$index2[0]] eq $leftLetterOk) { #(F,R)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1845 $break_pont_chr1 = '('.max(($coord_end_chr1_cluster1-$maxFragmentLength),1).','.$coord_start_chr1_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1846 $break_pont_chr1 .= ',('.$coord_end_chr1_cluster2.','.min(($coord_start_chr1_cluster2+$maxFragmentLength),$maxCoord1).')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1847 if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1848 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1849 $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1850 $signatureType = "INV_INS_FRAGMT";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1851 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1852 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1853 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1854 $signatureType = "INV_COAMPLICON";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1855 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1856
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1857 } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1858 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1859 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1860 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1861 $signatureType = "COAMPLICON";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1862 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1863 $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1864 $signatureType = "INS_FRAGMT";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1865 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1866 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1867 #should not occur
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1868 $signatureType = "UNDEFINED";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1869 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1870 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1871 else { # (F,F) or (R,R) something strange. We will discard the smallest cluster
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1872 $ifBalanced = 'UNBAL';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1873 if (scalar keys %{$secondTable} > scalar keys %{$table}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1874 ($table,$secondTable)=($secondTable,$table); #this means that one needs to exchange cluster1 with cluster2
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1875
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1876 $coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1877 $coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1878 $coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1879 $coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1880 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1881 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1882 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1883 $reads_left = scalar keys %{$table};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1884 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1885 if ($ifBalanced eq 'BAL') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1886 $ifRenv = $signatureType;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1887 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1888 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1889 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1890 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1891 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1892 if ($ifBalanced ne 'BAL') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1893 #define possible break point
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1894 $coord_start_chr1 = $coord_start_chr1_cluster1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1895 $coord_end_chr1 = $coord_end_chr1_cluster1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1896 $coord_start_chr2 = $coord_start_chr2_cluster1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1897 $coord_end_chr2 = $coord_end_chr2_cluster1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1898
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1899 my $region_length_chr1 = $coord_end_chr1-$coord_start_chr1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1900 my $region_length_chr2 = $coord_end_chr2-$coord_start_chr2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1901
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1902 my $leftLetterOk = substr($mate_sense, 0, 1); #R
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1903 my $rightLetterOk = substr($mate_sense, 1, 1); #F
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1904
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1905 my @index = keys %{$table};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1906 unless ($diff_sense_ends) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1907 my $firstEndOrder1 = $ends_order1[$index[0]];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1908 my $firstEndOrder2 = $ends_order2[$index[0]];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1909 $break_pont_chr1 = (($strand1[$index[0]] eq 'R' && $firstEndOrder1 == 2) || ($strand1[$index[0]] eq 'F' && $firstEndOrder1 == 1))?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1910 $break_pont_chr2 = (($strand2[$index[0]] eq 'R' && $firstEndOrder2 == 2) || ($strand2[$index[0]] eq 'F' && $firstEndOrder2 == 1))?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1911 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1912 $break_pont_chr1 = ($strand1[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1913 $break_pont_chr2 = ($strand2[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1914 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1915
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1916 if ($chr1 ne $chr2){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1917 $ifRenv="INV_TRANSLOC" if($ifRenv eq "REVERSE_SENSE");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1918 $ifRenv="TRANSLOC" if($ifRenv eq "NORMAL_SENSE");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1919 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1920 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1921
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1922 if (($ifBalanced eq 'BAL')&&( (scalar keys %{$table}) + (scalar keys %{$secondTable}) < $nb_pairs_threshold)) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1923 next; #discard the link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1924 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1925 if (($ifBalanced eq 'UNBAL')&&(scalar keys %{$table} < $nb_pairs_threshold)) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1926 next; #discard the link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1927 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1928 my $ratioTxt = "$reads_left/".(scalar @pairs);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1929 my ($n1,$nTot) = split ("/",$strand_ratio_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1930 my $ratioReal = $reads_left/$nTot;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1931
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1932 if ($coord_start_chr1<=0) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1933 $coord_start_chr1=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1934 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1935 if ($coord_start_chr2<=0) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1936 $coord_start_chr2=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1937 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1938 #create output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1939 my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1, #all information output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1940 $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1941 $reads_left,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1942 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@pairs),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1943 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand1),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1944 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand2),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1945 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order1),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1946 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order2),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1947 &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order1),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1948 &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order2),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1949 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions1),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1950 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions2),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1951 $ifRenv,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1952 $strand_ratio_filtering,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1953 $ifBalanced, $ratioTxt, $break_pont_chr1, $break_pont_chr2,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1954 $ratioReal, $nTot);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1955
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1956 $nb_links++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1957 print $fh2 join("\t",@link)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1958
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1959 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1960 print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1961 $warn+=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1962 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1963
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1964 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1965 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1966 $fh2->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1967
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1968 print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1969
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1970 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1971 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1972 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1973 #gets information about ends positions given start, direction and order
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1974 sub getEnds {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1975 my ($ends,$starts,$strand,$end_order,$tag_length) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1976 for my $i (0..scalar(@{$starts})-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1977 $ends->[$i] = getEnd($starts->[$i],$strand->[$i],$end_order->[$i],$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1978 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1979 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1980 sub getEnd {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1981 my ($start,$strand, $end_order,$tag_length) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1982 return ($strand eq 'F')? $start+$tag_length->{$end_order}-1:$start-$tag_length->{$end_order}+1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1983 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1984 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1985 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1986 #gets starts and ends Coords when start=leftmost given positions, directions and orders
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1987 sub getCoordswithLeftMost {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1988
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1989 my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1990
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1991 for my $i (0..scalar(@{$positions})-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1992
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1993 if($strand->[$i] eq 'F'){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1994 $starts->[$i]=$positions->[$i];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1995 $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1996 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1997 $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1998 $ends->[$i]=$positions->[$i];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
1999 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2000 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2001 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2002 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2003 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2004 sub addInsertionInfo { #add field with INS,DEL,NA and distance between clusters and performs filtering
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2005
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2006 my ($chr,$chrID,$nb_pairs_threshold,$order_filtering,$indel_sigma_threshold,$dup_sigma_threshold,$singleton_sigma_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2007
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2008 my @sfile=split(/\./,$links_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2009 my $fchr=$sfile[$#sfile-2];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2010
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2011
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2012 my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2013
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2014 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2015 my $nb_links=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2016 my $warn=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2017
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2018 print LOG "\# Filtering out normal pairs using insert size...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2019 print LOG "-- mu length=$mu, sigma length=$sigma, indel sigma threshold=$indel_sigma_threshold, dup sigma threshold=$dup_sigma_threshold\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2020 print LOG "-- using ".($mu-$indel_sigma_threshold*$sigma)."-".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2021 ($mu+$indel_sigma_threshold*$sigma)." as normal range of insert size for indels\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2022 print LOG "-- using ".($mu-$dup_sigma_threshold*$sigma)."-".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2023 ($mu+$dup_sigma_threshold*$sigma)." as normal range of insert size for duplications\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2024 print LOG "-- using ".($mu-$singleton_sigma_threshold*$sigma)." as the upper limit of insert size for singletons\n" if($mate_sense eq "RF");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2025
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2026 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2027 my $fh2 = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2028
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2029 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2030 $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2031
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2032 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2033
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2034 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2035 my @t = split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2036 my ($chr1,$chr2,$mates_list)=@t[0,3,7];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2037
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2038 if($chrID->{$chr1} ne $chrID->{$chr2}) { #if inter-chromosomal link here (because sv_type=all),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2039 $nb_links++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2040
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2041 $t[16]="INV_TRANSLOC" if($t[16] eq "REVERSE_SENSE");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2042 $t[16]="TRANSLOC" if($t[16] eq "NORMAL_SENSE");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2043
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2044 $t[16].= "\t";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2045 $t[19].= "\t";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2046
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2047 print $fh2 join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2048
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2049 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2050 print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2051 $warn+=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2052 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2053 next;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2054 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2055
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2056 my $ifRenv = $t[16];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2057 my $ifBalanced = "UNBAL";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2058 $ifBalanced = $t[18] if ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2059
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2060 my $numberOfPairs = $t[6];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2061 my @positions1 = deleteBadOrderSensePairs(split (/,/,$t[14]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2062 my @positions2 = deleteBadOrderSensePairs(split (/,/,$t[15]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2063
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2064 if ($ifBalanced eq "BAL") {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2065
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2066 if ($ifRenv eq "INV_TRANSLOC") {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2067 $ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2068 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2069 if ($ifRenv eq "NORMAL_SENSE") {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2070 $ifRenv = "TRANSLOC";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2071 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2072 if ($ifRenv eq "REVERSE_SENSE") {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2073 $ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2074 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2075 $t[19].= "\t";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2076
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2077 my $meanDistance = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2078
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2079 for my $i (0..$numberOfPairs-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2080 $meanDistance += $positions2[$i]-$positions1[$i];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2081 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2082 $meanDistance /= $numberOfPairs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2083
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2084 $t[16] = $ifRenv."\t".$meanDistance;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2085 #dont touch the annotation. It should be already OK.
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2086
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2087 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2088 #only for unbalanced
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2089
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2090 my $ifoverlap=overlap($t[1],$t[2],$t[4],$t[5]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2091
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2092 my $ends_sense_class = (deleteBadOrderSensePairs(split (/,/,$t[8])))[0].
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2093 (deleteBadOrderSensePairs(split (/,/,$t[9])))[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2094 my $ends_order_class = (deleteBadOrderSensePairs(split (/,/,$t[10])))[0].
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2095 (deleteBadOrderSensePairs(split (/,/,$t[11])))[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2096
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2097 my $indel_type = $ifRenv;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2098
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2099 my $meanDistance = "N/A";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2100
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2101 ($meanDistance, $indel_type) = checkIndel ($numberOfPairs, #identify insertion type for rearrangments without inversion, calculates distance between cluster
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2102 \@positions1, #assign N/A to $indel_type if unknown
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2103 \@positions2,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2104 $ifRenv,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2105 $ifoverlap,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2106 $indel_sigma_threshold,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2107 $dup_sigma_threshold,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2108 $singleton_sigma_threshold,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2109 $mu,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2110 $sigma,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2111 $ifBalanced,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2112 $ends_sense_class,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2113 $ends_order_class,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2114 $mate_sense,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2115 $diff_sense_ends,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2116 );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2117
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2118 #filtering of pairs with distance inconsistant with the SV
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2119 if ($ifRenv ne "REVERSE_SENSE") {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2120 my $maxCoord1 =$chr->{$chrID->{$chr1}}->{length};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2121 my $maxCoord2 =$chr->{$chrID->{$chr2}}->{length};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2122 $meanDistance = recalc_t_usingInsertSizeInfo(\@t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2123 $maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2124 next if ($t[6] < $nb_pairs_threshold);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2125 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2126 $t[19].= "\t";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2127 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2128 $t[16] = $indel_type."\t".$meanDistance;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2129 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2130
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2131 $nb_links++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2132
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2133 print $fh2 join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2134 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2135 print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2136 $warn+=10000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2137 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2138 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2139 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2140 $fh2->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2141
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2142 print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2143
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2144 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2145 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2146 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2147 sub checkIndel {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2148
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2149 my ($numberOfPairs, $positions1, $positions2, $ifRenv, $ifoverlap, $indel_sigma_threshold, $dup_sigma_threshold, $singleton_sigma_threshold,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2150 $mu, $sigma, $ifBalanced,$ends_sense_class,$ends_order_class,$mate_sense,$diff_sense_ends) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2151
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2152 my $meanDistance = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2153
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2154 for my $i (0..$numberOfPairs-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2155 $meanDistance += $positions2->[$i]-$positions1->[$i];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2156 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2157 $meanDistance /= $numberOfPairs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2158
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2159 return ($meanDistance,"INV_DUPLI") if (($ifRenv eq "REVERSE_SENSE") && ($meanDistance<$mu+$dup_sigma_threshold*$sigma) );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2160
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2161 return ($meanDistance,"INVERSION") if ($ifRenv eq "REVERSE_SENSE");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2162
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2163 if($diff_sense_ends){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2164 return ($meanDistance, "LARGE_DUPLI") if ($ends_sense_class ne $mate_sense) && ($meanDistance>$mu+$dup_sigma_threshold*$sigma) ;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2165 return ($meanDistance, "SINGLETON") if (($meanDistance<$mu-$singleton_sigma_threshold*$sigma) && $mate_sense eq "RF" && ($ends_sense_class eq inverseSense($mate_sense)));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2166 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2167 return ($meanDistance, "LARGE_DUPLI") if (($ends_sense_class eq $mate_sense) && ($ends_order_class eq "12") || ($ends_sense_class eq inverseSense($mate_sense)) && ($ends_order_class eq "21")) &&
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2168 ($meanDistance>$mu+$dup_sigma_threshold*$sigma) ;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2169 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2170
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2171 return ($meanDistance, "SMALL_DUPLI") if (($meanDistance<$mu-$dup_sigma_threshold*$sigma) && $ifoverlap);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2172
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2173 return ($meanDistance, "DUPLICATION") if ($diff_sense_ends && ($ends_sense_class ne $mate_sense) && ($meanDistance<$mu-$dup_sigma_threshold*$sigma) ) ;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2174
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2175 return ($meanDistance, "INSERTION") if ($meanDistance<$mu -$indel_sigma_threshold*$sigma);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2176 return ($meanDistance, "DELETION") if ($meanDistance>$mu+$indel_sigma_threshold*$sigma);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2177
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2178 return ($meanDistance, "UNDEFINED");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2179 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2180 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2181 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2182 #sub reacalulate @t so that get rid of unconsistent pairs (unconsistent insert size )
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2183 sub recalc_t_usingInsertSizeInfo {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2184 my($t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,$maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2185
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2186 my @badPairs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2187
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2188 my @positions1 = getAllEntries($t->[14]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2189 my @positions2 = getAllEntries($t->[15]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2190
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2191 if ($meanDistance < $mu) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2192 for my $i (0..scalar(@positions1)-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2193 if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]>=$mu) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2194 push(@badPairs,$i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2195 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2196 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2197 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2198 for my $i (0..scalar(@positions1)-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2199 if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]<=$mu) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2200 push(@badPairs,$i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2201 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2202 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2203 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2204
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2205 if (scalar (@badPairs)>0) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2206 #print join("\t",@badPairs).": ".join("\t",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2207 #remove these inconsistant links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2208 $t->[6] -= scalar(@badPairs); #numberOfPairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2209 return if ($t->[6] < $nb_pairs_threshold);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2210
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2211 $t->[7] = mark_values(\@badPairs, $t->[7]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2212 $t->[8] = mark_values(\@badPairs, $t->[8]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2213 $t->[9] = mark_values(\@badPairs, $t->[9]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2214 $t->[10] = mark_values(\@badPairs, $t->[10]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2215 $t->[11] = mark_values(\@badPairs, $t->[11]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2216
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2217 $t->[12] = mark_indexes(\@badPairs, $t->[12]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2218 $t->[13] = mark_indexes(\@badPairs, $t->[13]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2219
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2220 $t->[14] = mark_values(\@badPairs, $t->[14]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2221 $t->[15] = mark_values(\@badPairs, $t->[15]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2222 $t->[19] = recalculate_ratio($t->[6],$t->[19]) if ($order_filtering); #add the second ratio
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2223 $t->[17] = recalculate_ratio($t->[6],$t->[17]) unless ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2224 ($t->[1],$t->[2]) = recalculate_boundaries($t->[14],$t->[8],$t->[10],$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2225 ($t->[4],$t->[5]) = recalculate_boundaries($t->[15],$t->[9],$t->[11],$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2226
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2227 #recalc breakpoints:
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2228 my $quant001 = 3.090232;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2229 my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2230 $t->[20] = recalc_breakpoints($mate_sense,$maxCoord1,$t->[14],substr($ends_sense_class,0,1),substr($ends_order_class,0,1),$t->[1],$t->[2],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2231 $t->[21] = recalc_breakpoints($mate_sense,$maxCoord2,$t->[15],substr($ends_sense_class,1,1),substr($ends_order_class,1,1),$t->[4],$t->[5],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2232 #recalc total ratio
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2233 $t->[22] = $t->[6] / $t->[23] if ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2234 $t->[18] = $t->[6] / $t->[19] unless ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2235
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2236 @positions1 = deleteBadOrderSensePairs(split (/,/,$t->[14]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2237 @positions2 = deleteBadOrderSensePairs(split (/,/,$t->[15]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2238
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2239 $meanDistance = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2240
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2241 for my $i (0..scalar(@positions1)-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2242 $meanDistance += $positions2[$i]-$positions1[$i];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2243 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2244 $meanDistance /= scalar(@positions1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2245
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2246 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2247 $t->[17] = recalculate_ratio((split(/\//,$t->[17]))[0],$t->[17]) unless ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2248 $t->[19] = recalculate_ratio((split(/\//,$t->[19]))[0],$t->[19]) if ($order_filtering);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2249
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2250 } #nothing has been filtered
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2251 return $meanDistance;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2252 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2253
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2254 sub recalculate_ratio {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2255 my ($left, $ratio) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2256 my @elements = split (/\//,$ratio);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2257 $elements[1]= $elements[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2258 $elements[0]=$left;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2259 return $ratio."\t".join("/",@elements);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2260 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2261
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2262 sub recalc_breakpoints {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2263 my ($mate_sense,$maxCoord,$startString,$strand,$firstEndOrder,$coord_start_chr,$coord_end_chr,$maxFragmentLength,$diff_sense_ends ) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2264 my $break_pont_chr;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2265
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2266 my $leftLetterOk = substr($mate_sense, 0, 1); #R
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2267 my $rightLetterOk = substr($mate_sense, 1, 1); #F
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2268
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2269
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2270 my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2271
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2272 unless ($diff_sense_ends) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2273 $break_pont_chr = (($strand eq 'R' && $firstEndOrder == 2) || ($strand eq 'F' && $firstEndOrder == 1))?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2274 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2275 $break_pont_chr = ($strand eq $leftLetterOk)?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2276 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2277 return $break_pont_chr;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2278 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2279 sub recalculate_boundaries {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2280 my ($startString,$senseString,$endsOrderString,$tag_length) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2281 my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2282 my @strands = deleteBadOrderSensePairs(split (/,/,$senseString));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2283 my @ends_orders = deleteBadOrderSensePairs(split (/,/,$endsOrderString));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2284 my @ends; getEnds(\@ends,\@positions,\@strands,\@ends_orders,$tag_length);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2285 my $coord_start_cluster = min(min(@positions),min(@ends));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2286 my $coord_end_cluster = max(max(@positions),max(@ends));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2287 return ($coord_start_cluster,$coord_end_cluster);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2288 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2289
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2290 sub remove_indexes {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2291 my ($bads, $string) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2292 my @elements = deleteBadOrderSensePairs(split (/,/,$string));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2293 for my $i (reverse sort %{$bads}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2294 delete $elements[$i];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2295 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2296 return "(".join(",",@elements).")";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2297 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2298 ##add @ to to elements
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2299 sub mark_values {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2300 my ($bads, $string) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2301 my @elements = getAllEntries($string);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2302 for my $i (@{$bads}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2303 $elements[$i] .= "@";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2304 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2305 return "(".join(",",@elements).")";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2306 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2307 ##add @ to to indexes
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2308 sub mark_indexes {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2309 my ($bads, $string) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2310 my @elements = getAllEntries($string);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2311 for my $i ((0..scalar(@elements)-1)) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2312 for my $j (@{$bads}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2313 $elements[$i] .= "@" if ($elements[$i] eq ($j+1));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2314 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2315 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2316
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2317 return "(".join(",",@elements).")";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2318 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2319
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2320 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2321 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2322 sub redraw {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2323
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2324 my ($type,$table,$secondTable,$badInFRSense,$ifBalanced,$arr) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2325
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2326 my $out;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2327 my @first_arr;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2328 if ($ifBalanced eq 'BAL') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2329 my @second_arr;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2330 my $lastPushed = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2331 if ($type == 1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2332 for my $i (0 .. scalar(@{$arr})-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2333 if (exists ($table->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2334 push(@first_arr,$arr->[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2335 $lastPushed = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2336 }elsif (exists ($secondTable->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2337 push(@second_arr,$arr->[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2338 $lastPushed = 2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2339 } elsif ($lastPushed == 1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2340 if (exists ($badInFRSense->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2341 push(@first_arr,$arr->[$i]."\$");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2342 }else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2343 push(@first_arr,$arr->[$i]."*");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2344 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2345 } elsif ($lastPushed == 2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2346 if (exists ($badInFRSense->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2347 push(@second_arr,$arr->[$i]."\$");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2348 }else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2349 push(@second_arr,$arr->[$i]."*");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2350 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2351 } else {print "Error!";exit;}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2352 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2353 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2354 for my $i (@{$arr}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2355 if (exists ($table->{$i-1})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2356 push(@first_arr,$i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2357 $lastPushed = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2358 }elsif (exists ($secondTable->{$i-1})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2359 push(@second_arr,$i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2360 $lastPushed = 2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2361 } elsif ($lastPushed == 1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2362 if (exists ($badInFRSense->{$i-1})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2363 push(@first_arr,$i."\$");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2364 }else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2365 push(@first_arr,$i."*");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2366 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2367 } elsif ($lastPushed == 2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2368 if (exists ($badInFRSense->{$i-1})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2369 push(@second_arr,$i."\$");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2370 }else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2371 push(@second_arr,$i."*");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2372 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2373 } else {print "Error!";exit;}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2374 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2375 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2376 $out = '('.join(",",@first_arr).'),('.join(",",@second_arr).')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2377 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2378 else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2379 if ($type == 1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2380 for my $i (0 .. scalar(@{$arr})-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2381 if (exists ($table->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2382 push(@first_arr,$arr->[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2383 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2384 if (exists ($badInFRSense->{$i})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2385 push(@first_arr,$arr->[$i]."\$");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2386 }else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2387 push(@first_arr,$arr->[$i]."*");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2388 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2389 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2390 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2391 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2392 for my $i (@{$arr}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2393 if (exists ($table->{$i-1})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2394 push(@first_arr,$i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2395 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2396 if (exists ($badInFRSense->{$i-1})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2397 push(@first_arr,$i."\$");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2398 }else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2399 push(@first_arr,$i."*");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2400 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2401 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2402 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2403 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2404 $out = '('.join(",",@first_arr).')';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2405 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2406 return $out;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2407 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2408 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2409 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2410 sub check {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2411
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2412 my $table = $_[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2413 my $bad = 'OK';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2414 my $max = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2415 for my $i (sort {$a<=>$b} keys %{$table}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2416 unless ($table->{$i}->{nonAdeq} == 0) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2417 if ($max<$table->{$i}->{nonAdeq}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2418 $max=$table->{$i}->{nonAdeq};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2419 $bad = $i;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2420 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2421 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2422 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2423 return $bad;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2424 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2425 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2426 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2427 sub reversed {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2428
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2429 my ($i,$j,$ifRenv,$positions) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2430 if (($ifRenv eq 'REVERSE_SENSE' && $positions->[$i]<$positions->[$j]) || ($ifRenv ne 'REVERSE_SENSE' && $positions->[$i]>$positions->[$j])){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2431 return 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2432 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2433 return 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2434 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2435 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2436 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2437 sub remove {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2438
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2439 my ($bad,$table) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2440 for my $i (sort {$a<=>$b} keys %{$table}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2441 if ($bad == $i) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2442 delete($table->{$i});;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2443 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2444 if (exists($table->{$i}->{$bad})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2445 delete($table->{$i}->{$bad});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2446 $table->{$i}->{nonAdeq}--;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2447 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2448 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2449 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2450 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2451 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2452 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2453 sub findBadInRFSenseSOLiDSolexa { #choose maximum: FFFFs or RRRRs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2454
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2455 my ($strand,$ends_order,$mate_sense,@keysLeft) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2456
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2457 my $leftLetterOk = substr($mate_sense, 0, 1); #R
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2458 my $rightLetterOk = substr($mate_sense, 1, 1); #F
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2459
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2460 my (@standardArray);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2461 if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2462 $leftLetterOk = 'R';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2463 $rightLetterOk = 'F';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2464 @standardArray = translateSolidToRF($strand,$ends_order);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2465 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2466 @standardArray = @{$strand};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2467 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2468
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2469 my $ifR = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2470 my @Rs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2471
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2472 for my $i (@keysLeft) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2473 if ($standardArray[$i] eq $leftLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2474 $ifR++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2475 push(@Rs,$i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2476 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2477 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2478
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2479
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2480 my $ifF = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2481 my @Fs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2482
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2483 for my $i (@keysLeft) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2484 if ($standardArray[$i] eq $rightLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2485 $ifF++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2486 push(@Fs,$i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2487 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2488 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2489
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2490 if($ifR>=$ifF) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2491 return @Fs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2492 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2493 return @Rs;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2494 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2495
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2496 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2497 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2498 sub findBadInFRSenseSOLiDSolexa { #should work both for SOLiD and Solexa
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2499
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2500 my ($strand1,$strand2,$ends_order1,$ends_order2,$order1,$order2) = ($_[0],$_[1],$_[2],$_[3],$_[4],$_[5]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2501 my $mate_sense = $_[6];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2502
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2503 my $leftLetterOk = substr($mate_sense, 0, 1); #R
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2504 my $rightLetterOk = substr($mate_sense, 1, 1); #F
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2505
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2506 my (@standardArray1,@standardArray2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2507
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2508 if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2509 $leftLetterOk = 'R';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2510 $rightLetterOk = 'F';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2511 @standardArray1 = translateSolidToRF($strand1,$ends_order1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2512 my @arr = getOrderedStrands($strand2,$order2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2513 my @ends2 = getOrderedStrands($ends_order2,$order2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2514 @standardArray2 = translateSolidToRF(\@arr,\@ends2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2515
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2516 } else {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2517 @standardArray1 = @{$strand1};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2518 @standardArray2 = getOrderedStrands($strand2,$order2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2519 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2520
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2521 #we will try 4 possibilities, 2 for each end of the link: RFRR-FFF->RFFFF , RFRR-FFF->RRRFFF
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2522
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2523 #for the first end:
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2524
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2525 my @array = @standardArray1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2526 my %badInFRSense1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2527 for my $i (1..scalar (@array)-1){ # FRFRFFFF -> FFFFFF and RRFRFRFFFF -> RRFFFFFF
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2528 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2529 $badInFRSense1{$i}=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2530 $array[$i] = $rightLetterOk;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2531 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2532 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2533 my $numberRRRFFF_or_FFF_1 = scalar(@array)-scalar(keys %badInFRSense1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2534 @array = @standardArray1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2535 my %badInFRSense0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2536 for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFFRR -> FFFFFFRR
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2537 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2538 $badInFRSense0{$i-1}=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2539 $array[$i-1] = $leftLetterOk;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2540
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2541 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2542 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2543 my $numberRRF1 = scalar(@array)-scalar(keys %badInFRSense0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2544
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2545 #for the second end:
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2546 @array = @standardArray2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2547
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2548 my %badInFRSense3;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2549 for my $i (1..scalar(@array)-1){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2550 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2551 $badInFRSense3{$order2->[$i]}=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2552 $array[$i] = $rightLetterOk;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2553 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2554 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2555 my $numberRRRFFF_or_FFF_2 = scalar(@array)-scalar(keys %badInFRSense3);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2556
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2557 @array = @standardArray2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2558 my %badInFRSense5;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2559 for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFF -> FFFFFF
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2560 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2561 $badInFRSense5{$i-1}=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2562 $array[$i-1] = $leftLetterOk;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2563 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2564 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2565 my $numberRRF2 = scalar(@array)-scalar(keys %badInFRSense5);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2566
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2567 if ($numberRRF1>=$numberRRRFFF_or_FFF_1 && $numberRRF1 >= $numberRRRFFF_or_FFF_2 && $numberRRF1 >=$numberRRF2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2568 return (1,%badInFRSense0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2569 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2570
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2571 if ($numberRRRFFF_or_FFF_1 >=$numberRRF1 && $numberRRRFFF_or_FFF_1 >= $numberRRRFFF_or_FFF_2 && $numberRRRFFF_or_FFF_1 >= $numberRRF2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2572 return (1,%badInFRSense1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2573 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2574
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2575 if ($numberRRRFFF_or_FFF_2 >= $numberRRF1 && $numberRRRFFF_or_FFF_2 >= $numberRRRFFF_or_FFF_1 && $numberRRRFFF_or_FFF_2 >=$numberRRF2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2576 return (2,%badInFRSense3);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2577 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2578
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2579 if ($numberRRF2 >= $numberRRF1 && $numberRRF2 >= $numberRRRFFF_or_FFF_1 && $numberRRF2 >= $numberRRRFFF_or_FFF_2 ) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2580 return (2,%badInFRSense5);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2581 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2582
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2583 #should not get here:
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2584 print STDERR "Error in findBadInFRSenseSOLiDSolexa()!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2585 return (1,%badInFRSense1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2586 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2587
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2588 sub getOrderedStrands {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2589 my ($strand,$order) = ($_[0],$_[1]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2590 my @arr;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2591 for my $i (0..scalar(@{$strand})-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2592 push(@arr,$strand->[$order->[$i]-1]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2593 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2594 return @arr;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2595 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2596 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2597 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2598 sub checkClusters {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2599
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2600 my ($ifRenv,$coord_start_chr1_cluster1,$coord_start_chr1_cluster2,$coord_start_chr2_cluster1,$coord_start_chr2_cluster2) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2601 if ($ifRenv eq 'REVERSE_SENSE') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2602 if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2603 return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2604 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2605 return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2606 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2607 #if NORM
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2608 if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2609 return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2610 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2611 return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2612 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2613
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2614 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2615 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2616 sub translateSolidToRF {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2617 my ($strandArr,$ends_orderArr)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2618 my @array;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2619 for my $i (0..scalar(@{$strandArr})-1) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2620 if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'F') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2621 push(@array,'F');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2622 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2623 if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'F') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2624 push(@array,'R');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2625 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2626 if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'R') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2627 push(@array,'R');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2628 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2629 if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'R') {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2630 push(@array,'F');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2631 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2632 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2633 return @array;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2634 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2635
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2636 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2637 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2638 #convert the links file to the circos format
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2639 sub links2segdup{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2640
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2641 my($id,$color_code,$links_file,$segdup_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2642
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2643 print LOG "# Converting to the circos format...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2644
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2645 tie (my %hcolor,'Tie::IxHash'); #color-code hash table
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2646 foreach my $col (keys %{$color_code}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2647 my ($min_links,$max_links)=split(",",$color_code->{$col});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2648 $hcolor{$col}=[$min_links,$max_links];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2649 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2650
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2651 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2652 open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2653
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2654 my $index=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2655 while(<LINKS>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2656
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2657 my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2658
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2659 my $color=getColor($count,\%hcolor,"circos"); #get the color-code according the number of links
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2660
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2661 print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n". #circos output
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2662 "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2663 $index++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2664 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2665
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2666 close LINKS;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2667 close SEGDUP;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2668 print LOG "-- output created: $segdup_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2669 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2670 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2671 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2672 #convert the links file to the bedPE format for BEDTools usage
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2673 sub links2bedPElinksfile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2674
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2675 my ($sample,$links_file,$bedpe_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2676
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2677 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2678 open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2679
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2680 my $nb_links=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2681
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2682 while(<LINKS>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2683
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2684 chomp;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2685 my @t=split("\t",$_);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2686 my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2687 my $type=($chr1 eq $chr2)? "INTRA":"INTER";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2688 $type.="_".$t[10];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2689
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2690 $start1--; $start2--;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2691
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2692 print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2693 "\t$sample"."_link$nb_links\t$type\t.\t.".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2694 "\t".join("|",@t)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2695
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2696 $nb_links++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2697 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2698
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2699 close LINKS;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2700 close BEDPE;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2701
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2702 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2703 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2704 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2705 sub bedPElinks2linksfile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2706
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2707 my ($bedpe_file,$links_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2708
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2709 open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2710 open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2711
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2712 while(<BEDPE>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2713
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2714 chomp;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2715 my $sample=(split("_",(split("\t",$_))[6]))[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2716 my @t1=(split("\t",$_))[0,1,2,3,4,5];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2717 my @t2=split(/\|/,(split("\t",$_))[10]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2718 push(@t2,$sample);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2719
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2720 print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2721
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2722 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2723 close BEDPE;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2724 close LINKS;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2725
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2726 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2727 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2728 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2729 #convert the links file to the bed format
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2730 sub links2bedfile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2731
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2732 my ($tag_length,$color_code,$links_file,$bed_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2733
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2734 print LOG "# Converting to the bed format...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2735
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2736 my $compare=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2737 if($links_file!~/compared$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2738 $compare=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2739 $tag_length->{none}->{1}=$tag_length->{1};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2740 $tag_length->{none}->{2}=$tag_length->{2};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2741 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2742
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2743 #color-code hash table
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2744 tie (my %hcolor,'Tie::IxHash');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2745 my %color_order;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2746 my $n=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2747 foreach my $col (keys %{$color_code}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2748 my ($min_links,$max_links)=split(",",$color_code->{$col});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2749 $hcolor{$col}=[$min_links,$max_links];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2750 $color_order{$col}=$n;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2751 $n++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2752 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2753
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2754 my %pair;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2755 my %pt;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2756 $n=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2757 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2758
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2759 my %str=( "F"=>"+", "R"=>"-" );
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2760
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2761 while(<LINKS>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2762
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2763 my @t=split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2764 my $sample=($compare)? pop(@t):"none";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2765
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2766 my $chr1=$t[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2767 my $chr2=$t[3];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2768 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2769 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2770 my $same_chr=($chr1 eq $chr2)? 1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2771
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2772 my $count=$t[6];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2773 my $color=getColor($count,\%hcolor,"bed");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2774
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2775 my @pairs=deleteBadOrderSensePairs(split(",",$t[7]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2776 my @strand1=deleteBadOrderSensePairs(split(",",$t[8]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2777 my @strand2=deleteBadOrderSensePairs(split(",",$t[9]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2778 my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2779 my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2780 my @position1=deleteBadOrderSensePairs(split(",",$t[14]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2781 my @position2=deleteBadOrderSensePairs(split(",",$t[15]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2782 my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2783 my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2784
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2785
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2786 for my $p (0..$#pairs){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2787
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2788 if (!exists $pair{$pairs[$p]}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2789
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2790 if($same_chr){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2791
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2792 $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2793 $start1[$p]-1, $end2[$p], $color,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2794 2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2795 $pt{$n}=$pair{$pairs[$p]}->{0};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2796 $n++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2797
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2798 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2799
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2800 $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2801 $start1[$p]-1, $end1[$p], $color,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2802 1, $tag_length->{$sample}->{$ends_order1[$p]}, 0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2803 $pt{$n}=$pair{$pairs[$p]}->{1};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2804 $n++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2805
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2806
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2807 $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2808 $start2[$p]-1, $end2[$p], $color,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2809 1, $tag_length->{$sample}->{$ends_order2[$p]}, 0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2810 $pt{$n}=$pair{$pairs[$p]}->{2};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2811 $n++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2812 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2813 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2814
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2815 if($same_chr){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2816 ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2817 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2818 ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2819 ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2820 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2821 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2822 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2823 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2824 close LINKS;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2825
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2826 my $nb_pairs=$n-1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2827
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2828 open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2829 print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2830 "visibility=2 itemRgb=\"On\"\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2831
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2832 for my $i (1..$nb_pairs){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2833 print BED join("\t",@{$pt{$i}})."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2834 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2835
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2836 close BED;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2837
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2838 print LOG "-- output created: $bed_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2839
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2840 undef %pair;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2841 undef %pt;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2842
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2843 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2844 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2845 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2846 sub deleteBadOrderSensePairs{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2847
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2848 my (@tab)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2849 my @tab2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2850
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2851 foreach my $v (@tab){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2852
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2853 $v=~s/[\(\)]//g;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2854 push(@tab2,$v) if($v!~/[\$\*\@]$/);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2855 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2856 return @tab2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2857 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2858 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2859 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2860 sub getAllEntries{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2861 my (@tab)=split (/,/,$_[0]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2862 my @tab2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2863
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2864 foreach my $v (@tab){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2865
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2866 $v=~s/[\(\)]//g;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2867 push(@tab2,$v);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2868 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2869 return @tab2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2870 }#------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2871 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2872 sub getAllEntriesWOspecialChar{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2873 my (@tab)=split (/,/,$_[0]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2874 my @tab2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2875
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2876 foreach my $v (@tab){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2877
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2878 $v=~s/[\(\)\$\*\@]//g;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2879 push(@tab2,$v);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2880 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2881 return @tab2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2882 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2883 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2884 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2885 sub links2SVfile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2886
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2887 my($links_file,$sv_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2888
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2889 print LOG "# Converting to the sv output table...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2890 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2891 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2892
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2893 my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2894 chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2895 final_score breakpoint1_start1-end1 breakpoint2_start2-end2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2896
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2897 my $nb_links=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2898
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2899 while (<LINKS>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2900
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2901 my @t=split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2902 my @sv=();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2903 my $sv_type="-";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2904 my $strand_ratio="-";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2905 my $eq_ratio="-";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2906 my $eq_type="-";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2907 my $insert_ratio="-";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2908 my $link="-";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2909 my ($bk1, $bk2)=("-","-");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2910 my $score="-";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2911
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2912 my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2913 my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2914 my $nb_pairs=$t[6];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2915 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2916 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2917 my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2918
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2919 #if strand filtering
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2920 if (defined $t[16]){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2921 #if inter-chr link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2922 $sv_type=$t[16];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2923 if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2924 $strand_ratio=floor($1/$2*100)."%";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2925 $score=$t[18];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2926 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2927 if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2928 #if intra-chr link with insert size filtering
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2929 $strand_ratio=floor($1/$2*100)."%";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2930 $link=floor($t[17]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2931 if($sv_type!~/^INV/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2932 $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2933 $score=$t[20];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2934 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2935 $score=$t[19];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2936 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2937 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2938 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2939
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2940 if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2941
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2942 #if strand and order filtering only and/or interchr link
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2943 $eq_type=$t[18];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2944 $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2945 ($bk1, $bk2)=($t[20],$t[21]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2946 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2947 $score=$t[22];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2948
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2949 }elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2950
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2951 #if all three filtering
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2952 $link=floor($t[17]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2953 $eq_type=$t[19];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2954 $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2955
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2956 if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2957 $insert_ratio=floor($1/$2*100)."%";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2958 ($bk1, $bk2)=($t[22],$t[23]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2959 $score=$t[24];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2960
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2961 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2962 ($bk1, $bk2)=($t[21],$t[22]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2963 $score=$t[23];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2964 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2965 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2966
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2967 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2968
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2969
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2970 push(@sv, $chr_type, $sv_type,$eq_type);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2971 push(@sv,"$chr1\t$start1-$end1");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2972 push(@sv, $link);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2973 push(@sv,"$chr2\t$start2-$end2",
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2974 $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2975
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2976
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2977 print SV join("\t",@sv)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2978 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2979
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2980 close LINKS;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2981 close SV;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2982
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2983 system "sort -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2984
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2985 open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2986 my @links=<SV>;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2987 close SV;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2988
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2989 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2990
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2991 print SV join("\t",@header)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2992 print SV @links;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2993 close SV;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2994
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2995 unlink($sv_file.".sorted");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2996
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2997 print LOG "-- output created: $sv_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2998
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
2999 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3000 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3001 sub densityCalculation{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3002
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3003 my ($chr,$chrID,$file,$tag_length,$window_dist,$step,$mates_file,$mates_file_ref,$density_file,$input_format)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3004
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3005 my @sfile=split(/\./,$$mates_file[$file]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3006 my $fchr=$sfile[$#sfile];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3007
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3008 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3009
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3010 my %density;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3011 my %density_ref;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3012 my @ratio;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3013 my ($cov,$cov_ref);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3014
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3015 #FREQUENCY CALCULATION PROCEDURE
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3016 print LOG "# $fchr : Frequency calculation procedure...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3017 &FreqCalculation(\%density,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file[$file],$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3018 &FreqCalculation(\%density_ref,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file_ref[$file],$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3019
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3020 #MAKING RATIO AND OUTPUT
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3021 print LOG "\# Ratio calculation procedure...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3022 $density_file=~s/\/mates\//\/density\//;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3023 $fh->open(">".$density_file) or die "$0: can't write in the output ".$density_file." :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3024
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3025 foreach my $k (1..$chr->{nb_chrs}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3026 foreach my $frag (1..$chr->{$k}->{nb_frag}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3027
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3028 @ratio= ($chr->{$k}->{name},
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3029 (${$chr->{$k}->{$frag}}[0]+1),
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3030 (${$chr->{$k}->{$frag}}[1]+1));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3031
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3032 $cov=(exists $density{$k}{$frag}->{count})? $density{$k}{$frag}->{count}:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3033 $cov_ref=(exists $density_ref{$k}{$frag}->{count})? $density_ref{$k}{$frag}->{count}:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3034
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3035 push(@ratio,$cov,$cov_ref);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3036 push(@ratio,log($cov/$cov_ref)) if($cov && $cov_ref);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3037 push(@ratio,-log($cov_ref+1)) if(!$cov && $cov_ref);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3038 push(@ratio,log($cov+1)) if($cov && !$cov_ref);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3039 next if(!$cov && !$cov_ref);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3040
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3041 print $fh join("\t",@ratio)."\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3042 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3043 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3044
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3045 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3046 print LOG "-- output created: $density_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3047
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3048 undef %density;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3049 undef %density_ref;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3050 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3051 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3052 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3053 sub FreqCalculation{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3054
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3055 my ($density,$chr,$chrID,$tag_length,$window_dist,$step,$mates_file,$input_format) = @_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3056
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3057 my @sfile=split(/\./,$mates_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3058 my $fchr=$sfile[$#sfile];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3059 my $fh = new FileHandle;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3060
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3061 my $nb_windows=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3062 my $warn=100000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3063 my $record=0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3064 my %pair;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3065
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3066 my ($sumX,$sumX2) = (0,0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3067
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3068 print LOG "\# Frequency calculation for $mates_file...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3069
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3070 if ($mates_file =~ /.gz$/) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3071 $fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3072 }elsif($mates_file =~ /.bam$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3073 o$fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3074 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3075 $fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3076 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3077
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3078 while(<$fh>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3079
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3080 my @t=split;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3081 my $mate=$t[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3082
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3083 my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2,);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3084
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3085 next if(exists $pair{$mate});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3086
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3087 next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2,\$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3088
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3089 next unless (exists $chrID->{$chr_read1} || exists $chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3090 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3091
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3092 $pair{$mate}=undef;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3093 $record++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3094
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3095 my ($coord_start_read1,$coord_end_read1, $coord_start_read2,$coord_end_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3096
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3097 recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3098 recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3099
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3100 my $length = abs($coord_start_read1-$coord_start_read2);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3101 $sumX += $length; #add to sum and sum^2 for mean and variance calculation
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3102 $sumX2 += $length*$length;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3103
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3104 for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3105
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3106 if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3107
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3108 &addToDensity($density,$chr_read1,$i,\$nb_windows)
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3109 if(overlap($coord_start_read1,$coord_end_read2,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1]));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3110
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3111 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3112
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3113 $i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3114 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3115 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3116
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3117 if($record>=$warn){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3118 print LOG "-- $warn mate-pairs analysed - $nb_windows points created\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3119 $warn+=100000;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3120 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3121 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3122 $fh->close;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3123
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3124 print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_windows points created\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3125
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3126 if($record>0){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3127
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3128 my $mu = $sumX/$record;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3129 my $sigma = sqrt($sumX2/$record - $mu*$mu);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3130 print LOG "-- $fchr : mu length = $mu, sigma length = $sigma\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3131 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3132
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3133 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3134 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3135 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3136 sub ratio2segdup{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3137
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3138 my($id,$density_file,$segdup_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3139
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3140 print LOG "# Converting to circos format...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3141
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3142 open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3143 open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3144
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3145 while(<RATIO>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3146 chomp;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3147 my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3148 print SEGDUP "$id$chr1\t$start1\t$end1\t$ratio\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3149 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3150
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3151 close RATIO;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3152 close SEGDUP;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3153 print LOG "-- output created: $segdup_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3154 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3155 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3156 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3157 sub ratio2bedfile{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3158
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3159 my($density_file,$bed_file)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3160
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3161 print LOG "# Converting to bedGraph format...\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3162
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3163 open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3164 open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3165 print BED "track type=bedGraph name=\"$bed_file\" description=\"log ratios for cnv detection\" ".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3166 "visibility=2 color=255,0,0 alwaysZero=\"On\"\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3167
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3168 while(<RATIO>){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3169 chomp;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3170 my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3171 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3172 print BED "$chr1\t".($start1-1)."\t$end1\t$ratio\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3173 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3174
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3175 close RATIO;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3176 close BED;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3177 print LOG "-- output created: $bed_file\n";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3178 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3179 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3180 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3181 sub inverseSense{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3182
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3183 my $mate_sense=$_[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3184 my %reverse=( 'F' => 'R' , 'R' => 'F' ,
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3185 'FF' => 'RR', 'RR' => 'FF',
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3186 'FR' => 'RF', 'RF' => 'FR');
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3187 return $reverse{$mate_sense};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3188 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3189
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3190 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3191 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3192 sub getNextFrag{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3193
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3194 my ($read_start,$frag_num,$frag_start,$frag_last,$window_dist,$step)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3195
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3196 my $how_far = $read_start-$frag_start;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3197 my $nb_windows_toskip;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3198
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3199 if($how_far>0){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3200
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3201 $nb_windows_toskip=($how_far/$step)-($window_dist/$step);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3202 $nb_windows_toskip=~ s/\..*//;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3203 $nb_windows_toskip=0 if($nb_windows_toskip<0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3204 return ($frag_num + $nb_windows_toskip);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3205 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3206 return $frag_last;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3207 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3208 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3209 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3210 sub getColor{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3211
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3212 my($count,$hcolor,$format)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3213 for my $col ( keys % { $hcolor} ) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3214 return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3215 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3216 return "white" if($format eq "circos");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3217 return "255,255,255" if($format eq "bed");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3218 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3219 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3220 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3221 sub recupCoords{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3222
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3223 my($c_hit,$cs_hit,$ce_hit,$tag_length,$input_format)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3224 my $strand = 'F';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3225
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3226 if ($c_hit=~s/^\-//) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3227 $strand='R';
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3228 $$cs_hit=$c_hit;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3229 $$ce_hit=$c_hit-($tag_length-1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3230 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3231 $$cs_hit=$c_hit;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3232 $$ce_hit=$c_hit+($tag_length-1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3233 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3234 return $strand;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3235
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3236 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3237 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3238 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3239 sub overlap {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3240 my($cs_hit,$ce_hit,$cs_region,$ce_region)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3241 if( (($cs_hit < $cs_region) && ($ce_hit < $cs_region )) || (($cs_hit > $ce_region) && ($ce_hit > $ce_region )) ) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3242 return 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3243 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3244 return 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3245 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3246 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3247 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3248 sub makeLink {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3249
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3250 my ($link,$chr1,$frag1,$chr2,$frag2,$mt,$nb)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3251
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3252 if($chr1>$chr2){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3253 ($chr1,$chr2)= ($chr2,$chr1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3254 ($frag1,$frag2)= ($frag2,$frag1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3255 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3256
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3257 if($chr1 == $chr2){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3258 if($frag1>$frag2){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3259 ($frag1,$frag2)= ($frag2,$frag1);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3260 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3261 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3262
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3263 if(!exists $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3264 $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}=$mt;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3265 $$nb++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3266 }elsif($link->{$chr1}->{$chr2}->{$frag1}->{$frag2}!~/(^|,)$mt(,|$)/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3267 $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}.=",$mt";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3268 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3269 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3270 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3271 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3272 #fonction of adding the read to the density profile
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3273 sub addToDensity {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3274
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3275 my ($density,$chr1,$frag1,$nb)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3276
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3277 if(!exists $density->{$chr1}->{$frag1}->{count}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3278 $density->{$chr1}->{$frag1}->{count}=1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3279 $$nb++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3280 }else{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3281 $density->{$chr1}->{$frag1}->{count}++;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3282 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3283 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3284 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3285 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3286 sub floor {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3287 my $nb = $_[0];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3288 $nb=~ s/\..*//;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3289 return $nb;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3290 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3291 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3292 sub decimal{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3293
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3294 my $num=shift;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3295 my $digs_to_cut=shift;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3296
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3297 $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3298
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3299 return $num;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3300 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3301
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3302 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3303 sub max {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3304
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3305 my($max) = shift(@_);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3306 foreach my $temp (@_) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3307 $max = $temp if $temp > $max;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3308 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3309 return($max);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3310 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3311 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3312 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3313 sub min {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3314
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3315 my($min) = shift(@_);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3316 foreach my $temp (@_) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3317 $min = $temp if $temp < $min;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3318 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3319 return($min);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3320 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3321 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3322 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3323 sub sortTablebyIndex{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3324 my ($tab1,$tab2)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3325 my @tab3;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3326
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3327 foreach my $i (@$tab1){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3328 $tab3[$i]=$$tab2[$$tab1[$i]];
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3329 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3330 return @tab3;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3331 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3332 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3333 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3334 sub round {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3335 my $number = shift || 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3336 my $dec = 10 ** (shift || 0);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3337 return int( $dec * $number + .5 * ($number <=> 0)) / $dec;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3338 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3339 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3340 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3341 sub getUniqueTable{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3342
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3343 my (@tab)=@_;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3344 my (%saw,@out)=();
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3345 undef %saw;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3346 return sort(grep(!$saw{$_}++, @tab));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3347 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3348 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3349 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3350 sub catFiles {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3351
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3352 unlink("$_[1]") if(exists $_[1]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3353 system qq( cat "$_" >> "$_[1]" ) for @{$_[0]};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3354 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3355 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3356 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3357 #check if the configuration file is correct
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3358 sub validateconfiguration{
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3359
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3360 my %conf=%{$_[0]};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3361 my $list_prgs="@ARGV";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3362
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3363 my @general_params=qw(input_format mates_orientation read1_length read2_length mates_file cmap_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3364 my @detection_params=qw(split_mate_file window_size step_length split_mate_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3365 my @filtering_params=qw(split_link_file nb_pairs_threshold strand_filtering split_link_file);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3366 my @circos_params=qw(organism_id colorcode);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3367 my @bed_params=qw(colorcode);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3368 my @compare_params=qw(list_samples file_suffix);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3369
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3370 foreach my $dir ($conf{general}{output_dir},$conf{general}{tmp_dir}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3371
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3372 unless (defined($dir)) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3373 $dir = ".";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3374 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3375 unless (-d $dir){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3376 mkdir $dir or die;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3377 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3378 $dir.="/" if($dir!~/\/$/);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3379 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3380
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3381 unless (defined($conf{general}{num_threads})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3382 $conf{general}{num_threads} = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3383 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3384 $conf{general}{num_threads}=24 if($conf{general}{num_threads}>24);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3385
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3386 if($list_prgs!~/links2compare/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3387
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3388 foreach my $p (@general_params){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3389 die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{general}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3390 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3391
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3392 $conf{general}{input_format}="sam" if($conf{general}{input_format} eq "bam");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3393
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3394 unless (defined($conf{general}{sv_type})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3395 $conf{general}{sv_type} = "all";
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3396 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3397
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3398 $conf{general}{read_lengths}={ 1=> $conf{general}{read1_length}, 2=> $conf{general}{read2_length}};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3399 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3400
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3401 if($list_prgs=~/(linking|cnv)/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3402
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3403 foreach my $p (@detection_params){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3404 die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{detection}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3405 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3406
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3407 die("Error Config : The parameter \"mates_file_ref\" is not defined\n") if($list_prgs=~/cnv/ && !defined $conf{detection}{mates_file_ref});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3408
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3409 if($conf{detection}{step_length}>$conf{detection}{window_size}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3410 die("Error Config : Parameter \"step_length\" should not exceed \"window size\"\n");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3411 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3412
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3413 unless (-d $conf{general}{tmp_dir}."/mates"){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3414 mkdir $conf{general}{tmp_dir}."/mates" or die;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3415 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3416
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3417 if($list_prgs=~/linking/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3418 unless (-d $conf{general}{tmp_dir}."/links"){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3419 mkdir $conf{general}{tmp_dir}."/links" or die;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3420 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3421 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3422 if($list_prgs=~/cnv/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3423 unless (-d $conf{general}{tmp_dir}."/density"){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3424 mkdir $conf{general}{tmp_dir}."/density" or die;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3425 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3426 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3427
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3428 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3429
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3430 if($list_prgs=~/filtering/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3431
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3432 foreach my $p (@filtering_params) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3433 die("Error Config : The filtering parameter \"$p\" is not defined\n") if (!defined $conf{filtering}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3434
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3435 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3436
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3437 if(defined($conf{filtering}{chromosomes})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3438 my @chrs=split(",",$conf{filtering}{chromosomes});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3439 my $exclude=($chrs[0]=~/^\-/)? 1:0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3440 for my $chrName (@chrs){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3441
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3442 die("Error Config : The filtering parameter \"chromosomes\" is not valid\n")
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3443 if(($chrName!~/^\-/ && $exclude) || ($chrName=~/^\-/ && !$exclude));
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3444
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3445 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3446 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3447
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3448 if (( $conf{filtering}{order_filtering} )&& !$conf{filtering}{strand_filtering}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3449 die("Error Config : The parameter strand_filtering is set to \"0\" while order_filtering is selected".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3450 "\nChange strand_filtering to \"1\" if you want to use the order filtering\n");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3451 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3452 if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{order_filtering}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3453 die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use order filtering\n");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3454 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3455 if (( $conf{filtering}{insert_size_filtering} )&& !$conf{filtering}{strand_filtering}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3456 die("Error Config : The parameter strand_filtering is set to \"0\" while insert_size_filtering is selected".
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3457 "\nChange strand_filtering to \"1\" if you want to use the insert size filtering\n");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3458 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3459 if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{insert_size_filtering}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3460 die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use discriminate insertions from deletions\n");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3461 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3462
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3463 if (!defined($conf{filtering}{indel_sigma_threshold})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3464 $conf{filtering}{indel_sigma_threshold} = 2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3465 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3466 if (!defined($conf{filtering}{dup_sigma_threshold})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3467 $conf{filtering}{dup_sigma_threshold} = 2;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3468 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3469 if (!defined($conf{filtering}{singleton_sigma_threshold})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3470 $conf{filtering}{singleton_sigma_threshold} = 4;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3471 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3472
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3473 if (!defined($conf{filtering}{nb_pairs_order_threshold})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3474 $conf{filtering}{nb_pairs_order_threshold} = 1;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3475 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3476
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3477 if (!defined($conf{filtering}{final_score_threshold})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3478 $conf{filtering}{final_score_threshold} = 0.8;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3479 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3480
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3481 if ($conf{filtering}{nb_pairs_order_threshold}>$conf{filtering}{nb_pairs_threshold}) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3482 die("Error Config : Parameter \"nb_pairs_order_threshold\" should not exceed \"nb_pairs_threshold\"\n");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3483 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3484
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3485 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3486
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3487 if($list_prgs=~/2circos$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3488 foreach my $p (@circos_params) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3489 next if($list_prgs=~/^ratio/ && $p eq "colorcode");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3490 die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3491 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3492 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3493
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3494 if($list_prgs=~/2bed$/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3495 foreach my $p (@bed_params) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3496 die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3497 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3498 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3499
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3500 if($list_prgs=~/links2compare/){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3501 foreach my $p (@compare_params) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3502 die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3503 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3504
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3505 unless (defined($conf{compare}{same_sv_type})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3506 $conf{compare}{same_sv_type} = 0;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3507 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3508
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3509 unless (defined($conf{compare}{min_overlap})) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3510 $conf{compare}{min_overlap} = 1E-9;
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3511 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3512
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3513 if($conf{compare}{circos_output}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3514 foreach my $p (@circos_params) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3515 next if($list_prgs=~/^ratio/ && $p eq "colorcode");
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3516 die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3517 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3518 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3519 if($conf{compare}{bed_output}){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3520 foreach my $p (@bed_params) {
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3521 die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3522 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3523 die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3524
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3525 my @samples=split(",",$conf{compare}{list_samples});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3526 my @read_lengths=split(",",$conf{compare}{list_read_lengths});
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3527 for my $i (0..$#samples){
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3528 my @l=split("-",$read_lengths[$i]);
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3529 $conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3530 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3531 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3532 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3533
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3534
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3535 }
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3536 #------------------------------------------------------------------------------#
f090bf6ec765 Uploaded
bzeitouni
parents:
diff changeset
3537 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#