annotate nibls.pl @ 5:f466394ee1fd draft

Uploaded
author big-tiandm
date Thu, 23 Oct 2014 22:08:09 -0400
parents 07745c0958dd
children e0884a4b996b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
2 #####################################################################################################
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
3 #LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it.
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
4 #
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
5 # Dan MacLean
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
6 # dan.maclean@sainsbury-laboratory.ac.uk
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
7 #
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
8 # This program is free academic software; academic and non-profit
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
9 # users may redistribute it freely.
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
10 #
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
14 #
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
15 # This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
16 # see included file GPL3.txt
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
17 #
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
18 #
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
19
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
20
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
21 ###Dont forget you will need ...
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
22 #####################################################################################################
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
23 # Boost::Graph
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
24 #Copyright 2005 by David Burdick
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
25 # Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
26 #Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
27 #####################################################################################################
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
28
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
29
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
30
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
31 use strict;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
32 use warnings;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
33 use Boost::Graph;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
34 use Getopt::Long;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
35
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
36
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
37 my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
38
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
39 my $gff_file ;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
40 my $min_inc = 5;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
41 my $clus = 0.6;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
42 my $buff = 0;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
43 my $output_file;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
44 my $temp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
45 my $mark;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
46
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
47 GetOptions(
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
48
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
49 'c=f' => \$clus,
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
50 'm=i' => \$min_inc,
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
51 'f|file=s' => \$gff_file,
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
52 'b=i' => \$buff,
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
53 'o=s' => \$output_file,
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
54 't=s' => \$temp,
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
55 'k=s' => \$mark
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
56 ) ;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
57
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
58
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
59 die $usage unless $gff_file;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
60
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
61
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
62 my $starttime = time;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
63 warn "started $starttime\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
64
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
65 ## load in data
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
66 my %molecules; # stores starts and ends of srnas
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
67 open GFF, "<$gff_file";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
68
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
69 while (my $entry = <GFF>){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
70
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
71 chomp $entry;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
72 next if($entry=~/^\#/);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
73 my @data = split(/\t/,$entry);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
74 my $chr=shift @data;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
75 my $strand=shift @data;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
76 my $start=shift @data;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
77 my $end=shift @data;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
78 # my $length1=$end-$start+1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
79 # if ($length1>30) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
80 # $length1=40;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
81 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
82 my $total;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
83 foreach (0..$#data) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
84 $total+=$_;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
85 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
86 push @data,$total;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
87 # push @data,$length1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
88 # if (defined $molecules{$chr}{$start}{$end}{$strand}) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
89 # my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand});
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
90 # for (my $i=0;$i<$#old_data ;$i++) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
91 # $data[$i]+=$old_data[$i];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
92 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
93 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
94 my $data=join ";",@data;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
95 $molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
96 #print "$chr\t$start\t$end\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
97 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
98
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
99 close GFF;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
100
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
101 warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
102
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
103 my @sample=split/\#/,$mark;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
104 $mark=join"\"\t\"",@sample;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
105 open OUT, ">$output_file";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
106 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
107 open CLUSTER,">$temp";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
108 print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
109 foreach my $chromosome (keys %molecules){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
110 my $g = new Boost::Graph(directed=>0);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
111 my @starts = keys(%{$molecules{$chromosome}} );
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
112 @starts = sort {$a <=> $b} @starts;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
113
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
114 while (my $srna_start = shift @starts){ ## work from left most sRNA to right most, add to graph if they close enough
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
115
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
116
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
117 foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
118
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
119
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
120 ###use new graph if the next srna is too far away from this one..
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
121 if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
122
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
123
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
124 ##dump the info from the old graph
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
125 if (scalar(@{$g->get_nodes()}) > 2){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
126
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
127 my $cluster_coeff = get_cc($g);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
128 if ($cluster_coeff >= $clus){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
129 dump_locus($g, $cluster_coeff);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
130 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
131 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
132
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
133
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
134 $g = new Boost::Graph(directed=>0);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
135
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
136 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
137
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
138 foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){ ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
139
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
140 unless ($e eq $srna_end){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
141 my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end; ## turn coordinate of sRNA inro a node name
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
142 my $en = $chromosome. ':' . $srna_start . ':' . $e;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
143 $g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1');
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
144 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
145
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
146 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
147
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
148 foreach my $start (@starts){ ##build graph of overlaps
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
149 my $new = 0;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
150 last if $start - $min_inc > $srna_end;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
151 if ($start - $min_inc <= $srna_end){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
152
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
153 my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
154 foreach my $end (keys %{$molecules{$chromosome}{$start}}){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
155
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
156 my $end_node = $chromosome . ':' . $start . ':' . $end;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
157 $g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1');
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
158 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
159
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
160 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
161 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
162 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
163
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
164 if (!(defined $starts[0])) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
165 ##dump the info from the last graph
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
166 if (scalar(@{$g->get_nodes()}) > 2){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
167
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
168 my $cluster_coeff = get_cc($g);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
169 if ($cluster_coeff >= $clus){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
170 dump_locus($g, $cluster_coeff);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
171 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
172 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
173 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
174 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
175 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
176
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
177 warn "Loci printed\nFinished\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
178
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
179 my $endtime = time;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
180
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
181 my $elapsed = $endtime - $starttime;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
182
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
183 warn "Time elapsed = $elapsed s\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
184 close OUT;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
185 close CLUSTER;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
186 #########################################################################################
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
187 sub get_cc{ ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
188
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
189 my $graph = shift;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
190
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
191 my @component = @{$graph->get_nodes()}; #number of nodes
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
192 my @clustering_coefficients;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
193
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
194 foreach my $vertex (@component)
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
195 {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
196
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
197 my @neighbours = @{$graph->neighbors($vertex)};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
198
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
199 my %edges_in_graph;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
200
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
201 my $n = @neighbours; #n = the number of neighbours
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
202 my $k = ($n * ($n - 1))/2; #k = total number of possible connections
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
203
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
204 my $e= 0; #actual number of connections within sub-graph
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
205
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
206 foreach my $neighbour (@neighbours)
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
207 {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
208 foreach my $neighbour_2 (@neighbours)
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
209 {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
210 my $edge1 = "$neighbour\t$neighbour_2";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
211 my $edge2 = "$neighbour_2\t$neighbour";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
212 unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2})
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
213 {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
214 if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour))
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
215 {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
216 ++$e;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
217 $edges_in_graph{$edge1}=1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
218 $edges_in_graph{$edge2}=1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
219 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
220 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
221 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
222 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
223
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
224 if ($k >= 1)
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
225 {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
226 my $c = $e / $k;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
227 push @clustering_coefficients, $c;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
228 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
229 else {push @clustering_coefficients, '0';}
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
230 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
231
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
232 my $graph_n = scalar(@clustering_coefficients);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
233 my $graph_cc = 0;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
234 foreach my $cc (@clustering_coefficients){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
235
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
236 $graph_cc = $graph_cc + $cc;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
237
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
238 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
239 $graph_cc = $graph_cc / $graph_n;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
240
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
241 return $graph_cc;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
242 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
243
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
244 ############################################################################################################
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
245
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
246 sub dump_locus{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
247
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
248 my $g = shift;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
249 my $cc = shift;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
250 my $chr;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
251 my $start = 1000000000000000000000000000000000000000000000;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
252 my $end = -1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
253 my @sample;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
254 my @tag;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
255 foreach my $node (@{$g->get_nodes()}){
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
256
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
257 $node =~ m/^(\S+):(\d+):(\d+)$/;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
258 my $c=$1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
259 my $s=$2;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
260 my $e=$3;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
261 my @data;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
262 foreach my $str (keys %{$molecules{$c}{$s}{$e}}) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
263 push @tag,($s.",".$e.",".$str);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
264 @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
265 # for (my $i=0;$i<$#old_data ;$i++) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
266 # $data[$i]+=$old_data[$i];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
267 # }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
268 my $length=$e-$s+1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
269 if ($length>30) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
270 $length=40;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
271 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
272 push @data,$length;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
273 my $data=join ";",@data;#sample_exp/total_exp/length;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
274 push @sample,$data;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
275 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
276
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
277 $chr = $c;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
278 $start = $s if $s < $start;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
279 $end = $e if $e > $end;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
280 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
281 my $tag=join";",@tag;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
282
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
283 my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
284 if ($max_length==40) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
285 $max_length="\>30";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
286 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
287 my $cluster_exp=join"\t",@cluster_exp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
288 my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
289 print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
290 print OUT $gff, "\n";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
291 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
292
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
293 sub Max_length{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
294 my @exp=@{$_[0]};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
295 my %sample_length;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
296 my $total_exp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
297 my @each;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
298 for (my $i=0;$i<=$#exp ;$i++) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
299 my @tag=split/;/,$exp[$i];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
300 my $length=pop(@tag);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
301 my $exp=pop(@tag);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
302 $sample_length{$length}+=$exp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
303 $total_exp+=$exp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
304 for (my $j=0;$j<=$#tag ;$j++) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
305 $each[$j]+=$tag[$j];
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
306 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
307 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
308 my $max=0;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
309 my $max_key;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
310 foreach my $key (sort keys %sample_length) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
311 my $p=$sample_length{$key}/$total_exp;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
312 if ($p>$max) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
313 $max=$p;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
314 $max_key=$key;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
315 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
316 $sample_length{$key}=sprintf("%.2f",$p);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
317 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
318 return($max_key,$sample_length{$max_key},@each);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
319 }