annotate nibls.pl @ 28:e93f33f03019 draft

Uploaded
author big-tiandm
date Thu, 13 Nov 2014 22:45:51 -0500
parents 9dcffd531c76
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
21
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
2 #####################################################################################################
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
3 #LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it.
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
4 #
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
5 # Dan MacLean
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
6 # dan.maclean@sainsbury-laboratory.ac.uk
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
7 #
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
8 # This program is free academic software; academic and non-profit
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
9 # users may redistribute it freely.
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
10 #
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
14 #
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
15 # This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
16 # see included file GPL3.txt
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
17 #
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
18 #
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
19
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
20
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
21 ###Dont forget you will need ...
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
22 #####################################################################################################
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
23 # Boost::Graph
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
24 #Copyright 2005 by David Burdick
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
25 # Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
26 #Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
27 #####################################################################################################
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
28
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
29
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
30
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
31 use strict;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
32 use warnings;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
33 use Boost::Graph;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
34 use Getopt::Long;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
35
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
36
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
37 my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
38
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
39 my $gff_file ;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
40 my $min_inc = 5;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
41 my $clus = 0.6;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
42 my $buff = 0;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
43 my $output_file;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
44 my $temp;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
45 my $mark;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
46
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
47 GetOptions(
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
48
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
49 'c=f' => \$clus,
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
50 'm=i' => \$min_inc,
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
51 'f|file=s' => \$gff_file,
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
52 'b=i' => \$buff,
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
53 'o=s' => \$output_file,
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
54 't=s' => \$temp,
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
55 'k=s' => \$mark
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
56 ) ;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
57
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
58
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
59 die $usage unless $gff_file;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
60
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
61
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
62 my $starttime = time;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
63 warn "started $starttime\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
64
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
65 ## load in data
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
66 my %molecules; # stores starts and ends of srnas
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
67 open GFF, "<$gff_file";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
68
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
69 while (my $entry = <GFF>){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
70
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
71 chomp $entry;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
72 next if($entry=~/^\#/);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
73 my @data = split(/\t/,$entry);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
74 my $chr=shift @data;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
75 my $strand=shift @data;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
76 my $start=shift @data;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
77 my $end=shift @data;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
78 # my $length1=$end-$start+1;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
79 # if ($length1>30) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
80 # $length1=40;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
81 # }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
82 my $total;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
83 for (my $s=0;$s<@data ;$s++) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
84 $total+=$data[$s];
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
85 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
86 push @data,$total;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
87 # push @data,$length1;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
88 # if (defined $molecules{$chr}{$start}{$end}{$strand}) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
89 # my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand});
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
90 # for (my $i=0;$i<$#old_data ;$i++) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
91 # $data[$i]+=$old_data[$i];
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
92 # }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
93 # }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
94 my $data=join ";",@data;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
95 $molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
96 #print "$chr\t$start\t$end\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
97 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
98
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
99 close GFF;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
100
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
101 warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
102
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
103 my @sample=split/\#/,$mark;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
104 $mark=join"\"\t\"",@sample;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
105 open OUT, ">$output_file";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
106 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
107 open CLUSTER,">$temp";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
108 print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
109 foreach my $chromosome (keys %molecules){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
110 my $g = new Boost::Graph(directed=>0);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
111 my @starts = keys(%{$molecules{$chromosome}} );
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
112 @starts = sort {$a <=> $b} @starts;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
113
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
114 while (my $srna_start = shift @starts){ ## work from left most sRNA to right most, add to graph if they close enough
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
115
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
116
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
117 foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
118
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
119
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
120 ###use new graph if the next srna is too far away from this one..
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
121 if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
122
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
123
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
124 ##dump the info from the old graph
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
125 if (scalar(@{$g->get_nodes()}) > 2){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
126
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
127 my $cluster_coeff = get_cc($g);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
128 if ($cluster_coeff >= $clus){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
129 dump_locus($g, $cluster_coeff);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
130 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
131 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
132
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
133
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
134 $g = new Boost::Graph(directed=>0);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
135
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
136 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
137
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
138 foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){ ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
139
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
140 unless ($e eq $srna_end){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
141 my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end; ## turn coordinate of sRNA inro a node name
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
142 my $en = $chromosome. ':' . $srna_start . ':' . $e;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
143 $g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1');
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
144 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
145
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
146 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
147
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
148 foreach my $start (@starts){ ##build graph of overlaps
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
149 my $new = 0;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
150 last if $start - $min_inc > $srna_end;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
151 if ($start - $min_inc <= $srna_end){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
152
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
153 my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
154 foreach my $end (keys %{$molecules{$chromosome}{$start}}){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
155
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
156 my $end_node = $chromosome . ':' . $start . ':' . $end;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
157 $g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1');
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
158 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
159
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
160 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
161 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
162 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
163
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
164 if (!(defined $starts[0])) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
165 ##dump the info from the last graph
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
166 if (scalar(@{$g->get_nodes()}) > 2){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
167
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
168 my $cluster_coeff = get_cc($g);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
169 if ($cluster_coeff >= $clus){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
170 dump_locus($g, $cluster_coeff);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
171 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
172 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
173 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
174 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
175 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
176
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
177 warn "Loci printed\nFinished\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
178
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
179 my $endtime = time;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
180
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
181 my $elapsed = $endtime - $starttime;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
182
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
183 warn "Time elapsed = $elapsed s\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
184 close OUT;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
185 close CLUSTER;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
186 #########################################################################################
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
187 sub get_cc{ ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
188
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
189 my $graph = shift;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
190
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
191 my @component = @{$graph->get_nodes()}; #number of nodes
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
192 my @clustering_coefficients;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
193
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
194 foreach my $vertex (@component)
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
195 {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
196
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
197 my @neighbours = @{$graph->neighbors($vertex)};
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
198
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
199 my %edges_in_graph;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
200
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
201 my $n = @neighbours; #n = the number of neighbours
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
202 my $k = ($n * ($n - 1))/2; #k = total number of possible connections
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
203
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
204 my $e= 0; #actual number of connections within sub-graph
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
205
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
206 foreach my $neighbour (@neighbours)
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
207 {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
208 foreach my $neighbour_2 (@neighbours)
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
209 {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
210 my $edge1 = "$neighbour\t$neighbour_2";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
211 my $edge2 = "$neighbour_2\t$neighbour";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
212 unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2})
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
213 {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
214 if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour))
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
215 {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
216 ++$e;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
217 $edges_in_graph{$edge1}=1;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
218 $edges_in_graph{$edge2}=1;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
219 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
220 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
221 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
222 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
223
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
224 if ($k >= 1)
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
225 {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
226 my $c = $e / $k;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
227 push @clustering_coefficients, $c;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
228 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
229 else {push @clustering_coefficients, '0';}
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
230 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
231
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
232 my $graph_n = scalar(@clustering_coefficients);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
233 my $graph_cc = 0;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
234 foreach my $cc (@clustering_coefficients){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
235
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
236 $graph_cc = $graph_cc + $cc;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
237
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
238 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
239 $graph_cc = $graph_cc / $graph_n;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
240
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
241 return $graph_cc;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
242 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
243
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
244 ############################################################################################################
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
245
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
246 sub dump_locus{
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
247
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
248 my $g = shift;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
249 my $cc = shift;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
250 my $chr;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
251 my $start = 1000000000000000000000000000000000000000000000;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
252 my $end = -1;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
253 my @sample;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
254 my @tag;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
255 foreach my $node (@{$g->get_nodes()}){
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
256
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
257 $node =~ m/^(\S+):(\d+):(\d+)$/;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
258 my $c=$1;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
259 my $s=$2;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
260 my $e=$3;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
261 # my @data;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
262 foreach my $str (keys %{$molecules{$c}{$s}{$e}}) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
263 my @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
264 push @tag,($s.",".$e.",".$str.",".$data[-1]);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
265 # for (my $i=0;$i<$#old_data ;$i++) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
266 # $data[$i]+=$old_data[$i];
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
267 # }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
268 my $length=$e-$s+1;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
269 if ($length>30) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
270 $length=40;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
271 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
272 push @data,$length;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
273 my $data=join ";",@data;#sample_exp/total_exp/length;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
274 push @sample,$data;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
275 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
276
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
277 $chr = $c;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
278 $start = $s if $s < $start;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
279 $end = $e if $e > $end;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
280 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
281 my $tag=join";",@tag;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
282 my $tag_number=@tag;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
283 my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
284 if ($max_length==40) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
285 $max_length="\>30";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
286 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
287 my $cluster_exp=join"\t",@cluster_exp;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
288 my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
289 print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
290 print OUT $gff, "\n";
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
291 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
292
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
293 sub Max_length{
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
294 my @exp=@{$_[0]};
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
295 my %sample_length;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
296 my $total_exp;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
297 my @each;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
298 for (my $i=0;$i<=$#exp ;$i++) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
299 my @tag=split/;/,$exp[$i];
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
300 my $length=pop(@tag);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
301 my $exp=pop(@tag);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
302 $sample_length{$length}+=$exp;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
303 $total_exp+=$exp;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
304 for (my $j=0;$j<=$#tag ;$j++) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
305 $each[$j]+=$tag[$j];
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
306 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
307 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
308 my $max=0;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
309 my $max_key;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
310 foreach my $key (sort keys %sample_length) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
311 my $p=$sample_length{$key}/$total_exp;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
312 if ($p>$max) {
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
313 $max=$p;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
314 $max_key=$key;
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
315 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
316 $sample_length{$key}=sprintf("%.2f",$p);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
317 }
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
318 return($max_key,$sample_length{$max_key},@each);
9dcffd531c76 Uploaded
big-tiandm
parents:
diff changeset
319 }