annotate nibls.pl @ 53:f5a2e8308836 draft default tip

Uploaded
author big-tiandm
date Mon, 08 Dec 2014 01:51:16 -0500
parents 7b5a48b972e9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
50
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
2 #####################################################################################################
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
3 #LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it.
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
4 #
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
5 # Dan MacLean
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
6 # dan.maclean@sainsbury-laboratory.ac.uk
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
7 #
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
8 # This program is free academic software; academic and non-profit
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
9 # users may redistribute it freely.
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
10 #
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
14 #
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
15 # This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
16 # see included file GPL3.txt
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
17 #
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
18 #
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
19
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
20
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
21 ###Dont forget you will need ...
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
22 #####################################################################################################
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
23 # Boost::Graph
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
24 #Copyright 2005 by David Burdick
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
25 # Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
26 #Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
27 #####################################################################################################
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
28
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
29
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
30
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
31 use strict;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
32 use warnings;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
33 use Boost::Graph;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
34 use Getopt::Long;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
35
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
36
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
37 my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
38
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
39 my $gff_file ;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
40 my $min_inc = 5;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
41 my $clus = 0.6;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
42 my $buff = 0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
43 my $output_file;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
44 my $temp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
45 my $mark;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
46
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
47 GetOptions(
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
48
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
49 'c=f' => \$clus,
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
50 'm=i' => \$min_inc,
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
51 'f|file=s' => \$gff_file,
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
52 'b=i' => \$buff,
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
53 'o=s' => \$output_file,
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
54 't=s' => \$temp,
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
55 'k=s' => \$mark
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
56 ) ;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
57
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
58
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
59 die $usage unless $gff_file;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
60
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
61
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
62 my $starttime = time;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
63 warn "started $starttime\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
64
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
65 ## load in data
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
66 my %molecules; # stores starts and ends of srnas
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
67 open GFF, "<$gff_file";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
68
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
69 while (my $entry = <GFF>){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
70
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
71 chomp $entry;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
72 next if($entry=~/^\#/);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
73 my @data = split(/\t/,$entry);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
74 my $chr=shift @data;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
75 my $strand=shift @data;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
76 my $start=shift @data;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
77 my $end=shift @data;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
78 # my $length1=$end-$start+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
79 # if ($length1>30) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
80 # $length1=40;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
81 # }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
82 my $total;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
83 for (my $s=0;$s<@data ;$s++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
84 $total+=$data[$s];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
85 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
86 push @data,$total;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
87 # push @data,$length1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
88 # if (defined $molecules{$chr}{$start}{$end}{$strand}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
89 # my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand});
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
90 # for (my $i=0;$i<$#old_data ;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
91 # $data[$i]+=$old_data[$i];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
92 # }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
93 # }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
94 my $data=join ";",@data;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
95 $molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
96 #print "$chr\t$start\t$end\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
97 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
98
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
99 close GFF;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
100
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
101 warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
102
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
103 my @sample=split/\#/,$mark;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
104 $mark=join"\"\t\"",@sample;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
105 open OUT, ">$output_file";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
106 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
107 open CLUSTER,">$temp";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
108 print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
109 foreach my $chromosome (keys %molecules){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
110 my $g = new Boost::Graph(directed=>0);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
111 my @starts = keys(%{$molecules{$chromosome}} );
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
112 @starts = sort {$a <=> $b} @starts;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
113
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
114 while (my $srna_start = shift @starts){ ## work from left most sRNA to right most, add to graph if they close enough
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
115
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
116
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
117 foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
118
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
119
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
120 ###use new graph if the next srna is too far away from this one..
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
121 if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
122
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
123
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
124 ##dump the info from the old graph
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
125 if (scalar(@{$g->get_nodes()}) > 2){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
126
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
127 my $cluster_coeff = get_cc($g);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
128 if ($cluster_coeff >= $clus){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
129 dump_locus($g, $cluster_coeff);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
130 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
131 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
132
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
133
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
134 $g = new Boost::Graph(directed=>0);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
135
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
136 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
137
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
138 foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){ ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
139
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
140 unless ($e eq $srna_end){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
141 my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end; ## turn coordinate of sRNA inro a node name
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
142 my $en = $chromosome. ':' . $srna_start . ':' . $e;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
143 $g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1');
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
144 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
145
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
146 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
147
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
148 foreach my $start (@starts){ ##build graph of overlaps
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
149 my $new = 0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
150 last if $start - $min_inc > $srna_end;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
151 if ($start - $min_inc <= $srna_end){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
152
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
153 my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
154 foreach my $end (keys %{$molecules{$chromosome}{$start}}){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
155
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
156 my $end_node = $chromosome . ':' . $start . ':' . $end;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
157 $g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1');
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
158 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
159
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
160 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
161 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
162 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
163
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
164 if (!(defined $starts[0])) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
165 ##dump the info from the last graph
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
166 if (scalar(@{$g->get_nodes()}) > 2){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
167
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
168 my $cluster_coeff = get_cc($g);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
169 if ($cluster_coeff >= $clus){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
170 dump_locus($g, $cluster_coeff);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
171 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
172 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
173 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
174 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
175 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
176
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
177 warn "Loci printed\nFinished\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
178
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
179 my $endtime = time;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
180
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
181 my $elapsed = $endtime - $starttime;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
182
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
183 warn "Time elapsed = $elapsed s\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
184 close OUT;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
185 close CLUSTER;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
186 #########################################################################################
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
187 sub get_cc{ ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
188
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
189 my $graph = shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
190
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
191 my @component = @{$graph->get_nodes()}; #number of nodes
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
192 my @clustering_coefficients;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
193
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
194 foreach my $vertex (@component)
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
195 {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
196
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
197 my @neighbours = @{$graph->neighbors($vertex)};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
198
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
199 my %edges_in_graph;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
200
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
201 my $n = @neighbours; #n = the number of neighbours
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
202 my $k = ($n * ($n - 1))/2; #k = total number of possible connections
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
203
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
204 my $e= 0; #actual number of connections within sub-graph
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
205
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
206 foreach my $neighbour (@neighbours)
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
207 {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
208 foreach my $neighbour_2 (@neighbours)
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
209 {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
210 my $edge1 = "$neighbour\t$neighbour_2";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
211 my $edge2 = "$neighbour_2\t$neighbour";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
212 unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2})
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
213 {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
214 if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour))
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
215 {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
216 ++$e;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
217 $edges_in_graph{$edge1}=1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
218 $edges_in_graph{$edge2}=1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
219 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
220 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
221 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
222 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
223
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
224 if ($k >= 1)
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
225 {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
226 my $c = $e / $k;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
227 push @clustering_coefficients, $c;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
228 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
229 else {push @clustering_coefficients, '0';}
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
230 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
231
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
232 my $graph_n = scalar(@clustering_coefficients);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
233 my $graph_cc = 0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
234 foreach my $cc (@clustering_coefficients){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
235
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
236 $graph_cc = $graph_cc + $cc;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
237
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
238 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
239 $graph_cc = $graph_cc / $graph_n;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
240
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
241 return $graph_cc;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
242 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
243
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
244 ############################################################################################################
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
245
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
246 sub dump_locus{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
247
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
248 my $g = shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
249 my $cc = shift;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
250 my $chr;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
251 my $start = 1000000000000000000000000000000000000000000000;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
252 my $end = -1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
253 my @sample;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
254 my @tag;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
255 foreach my $node (@{$g->get_nodes()}){
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
256
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
257 $node =~ m/^(\S+):(\d+):(\d+)$/;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
258 my $c=$1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
259 my $s=$2;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
260 my $e=$3;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
261 # my @data;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
262 foreach my $str (keys %{$molecules{$c}{$s}{$e}}) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
263 my @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
264 push @tag,($s.",".$e.",".$str.",".$data[-1]);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
265 # for (my $i=0;$i<$#old_data ;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
266 # $data[$i]+=$old_data[$i];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
267 # }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
268 my $length=$e-$s+1;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
269 if ($length>30) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
270 $length=40;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
271 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
272 push @data,$length;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
273 my $data=join ";",@data;#sample_exp/total_exp/length;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
274 push @sample,$data;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
275 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
276
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
277 $chr = $c;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
278 $start = $s if $s < $start;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
279 $end = $e if $e > $end;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
280 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
281 my $tag=join";",@tag;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
282 my $tag_number=@tag;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
283 my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
284 if ($max_length==40) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
285 $max_length="\>30";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
286 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
287 my $cluster_exp=join"\t",@cluster_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
288 my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
289 print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
290 print OUT $gff, "\n";
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
291 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
292
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
293 sub Max_length{
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
294 my @exp=@{$_[0]};
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
295 my %sample_length;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
296 my $total_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
297 my @each;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
298 for (my $i=0;$i<=$#exp ;$i++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
299 my @tag=split/;/,$exp[$i];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
300 my $length=pop(@tag);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
301 my $exp=pop(@tag);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
302 $sample_length{$length}+=$exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
303 $total_exp+=$exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
304 for (my $j=0;$j<=$#tag ;$j++) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
305 $each[$j]+=$tag[$j];
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
306 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
307 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
308 my $max=0;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
309 my $max_key;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
310 foreach my $key (sort keys %sample_length) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
311 my $p=$sample_length{$key}/$total_exp;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
312 if ($p>$max) {
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
313 $max=$p;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
314 $max_key=$key;
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
315 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
316 $sample_length{$key}=sprintf("%.2f",$p);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
317 }
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
318 return($max_key,$sample_length{$max_key},@each);
7b5a48b972e9 Uploaded
big-tiandm
parents:
diff changeset
319 }