annotate fastx_toolkit-0.0.6/scripts/fasta_clipping_histogram.pl @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
997f5136985f Uploaded
xilinxu
parents:
diff changeset
1 #!/usr/bin/perl
997f5136985f Uploaded
xilinxu
parents:
diff changeset
2
997f5136985f Uploaded
xilinxu
parents:
diff changeset
3 # FASTX-toolkit - FASTA/FASTQ preprocessing tools.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
4 # Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
5 #
997f5136985f Uploaded
xilinxu
parents:
diff changeset
6 # This program is free software: you can redistribute it and/or modify
997f5136985f Uploaded
xilinxu
parents:
diff changeset
7 # it under the terms of the GNU Affero General Public License as
997f5136985f Uploaded
xilinxu
parents:
diff changeset
8 # published by the Free Software Foundation, either version 3 of the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
9 # License, or (at your option) any later version.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
10 #
997f5136985f Uploaded
xilinxu
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
997f5136985f Uploaded
xilinxu
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
14 # GNU Affero General Public License for more details.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
15 #
997f5136985f Uploaded
xilinxu
parents:
diff changeset
16 # You should have received a copy of the GNU Affero General Public License
997f5136985f Uploaded
xilinxu
parents:
diff changeset
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
18
997f5136985f Uploaded
xilinxu
parents:
diff changeset
19 use strict;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
20 use warnings;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
21 use GD::Graph::bars;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
22 use Data::Dumper;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
23 use PerlIO::gzip;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
24
997f5136985f Uploaded
xilinxu
parents:
diff changeset
25 if (scalar @ARGV==0) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
26 print<<END;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
27
997f5136985f Uploaded
xilinxu
parents:
diff changeset
28 Create a Linker Clipping Information Histogram
997f5136985f Uploaded
xilinxu
parents:
diff changeset
29
997f5136985f Uploaded
xilinxu
parents:
diff changeset
30 usage: $0 INPUT_FILE.FA OUTPUT_FILE.PNG
997f5136985f Uploaded
xilinxu
parents:
diff changeset
31
997f5136985f Uploaded
xilinxu
parents:
diff changeset
32 INPUT_FILE.FA = input file (in FASTA format, can be GZIPped)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
33 OUTPUT_FILE.PNG = histogram image
997f5136985f Uploaded
xilinxu
parents:
diff changeset
34
997f5136985f Uploaded
xilinxu
parents:
diff changeset
35 END
997f5136985f Uploaded
xilinxu
parents:
diff changeset
36 exit 0;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
37 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
38
997f5136985f Uploaded
xilinxu
parents:
diff changeset
39 #
997f5136985f Uploaded
xilinxu
parents:
diff changeset
40 # Read parameters
997f5136985f Uploaded
xilinxu
parents:
diff changeset
41 #
997f5136985f Uploaded
xilinxu
parents:
diff changeset
42 open(IN, "<:gzip(autopop)", "$ARGV[0]") or die "Cannot open input file $ARGV[0]\n";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
43 open(OUT, ">$ARGV[1]") or die "Cannot create output file $ARGV[1]\n";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
44 binmode OUT;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
45
997f5136985f Uploaded
xilinxu
parents:
diff changeset
46 my %histogram ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
47
997f5136985f Uploaded
xilinxu
parents:
diff changeset
48 while (my $name = <IN>) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
49 my $sequence = <IN> ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
50 chomp $sequence;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
51
997f5136985f Uploaded
xilinxu
parents:
diff changeset
52 my $sequence_length = length($sequence);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
53
997f5136985f Uploaded
xilinxu
parents:
diff changeset
54 my $count;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
55
997f5136985f Uploaded
xilinxu
parents:
diff changeset
56 if ( index($name, "-")==-1 ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
57 #Assume this file is not collapsed, just count each seqeunce as 1
997f5136985f Uploaded
xilinxu
parents:
diff changeset
58 $count = 1 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
59 } else {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
60 #Assume file is collapsed (that is - sequence-ID has two numbers with a separating dash)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
61 (undef, $count) = $name =~ /^\>(\d+)\-(\d+)$/ ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
62
997f5136985f Uploaded
xilinxu
parents:
diff changeset
63 # If the match failed, treat this fasta as not collapsed;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
64 $count = 1 if not defined $count ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
65 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
66
997f5136985f Uploaded
xilinxu
parents:
diff changeset
67 $histogram{$sequence_length} += $count ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
68 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
69
997f5136985f Uploaded
xilinxu
parents:
diff changeset
70 #Textual Output
997f5136985f Uploaded
xilinxu
parents:
diff changeset
71 if (0) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
72 print "Length\tCount\n";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
73 foreach my $length_key ( sort { $a <=> $b } keys %histogram ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
74 print $length_key,"\t", $histogram{$length_key},"\n";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
75 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
76 exit 0;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
77 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
78
997f5136985f Uploaded
xilinxu
parents:
diff changeset
79 ## Build the data as required by GD::Graph::bars.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
80 ## Data list has two items (each item is itself a list)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
81 ## 1. a list of x-axis labels (these are the keys from the histogram)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
82 ## 2. a list of values
997f5136985f Uploaded
xilinxu
parents:
diff changeset
83 my @data = (
997f5136985f Uploaded
xilinxu
parents:
diff changeset
84 [ sort { $a <=> $b } keys %histogram ],
997f5136985f Uploaded
xilinxu
parents:
diff changeset
85 [ map { $histogram{$_} } sort { $a <=> $b } keys %histogram ] ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
86
997f5136985f Uploaded
xilinxu
parents:
diff changeset
87 my $graph = new GD::Graph::bars (1000,800);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
88
997f5136985f Uploaded
xilinxu
parents:
diff changeset
89 $graph->set(
997f5136985f Uploaded
xilinxu
parents:
diff changeset
90 x_label => 'Length',
997f5136985f Uploaded
xilinxu
parents:
diff changeset
91 y_label => 'Amount',
997f5136985f Uploaded
xilinxu
parents:
diff changeset
92 title => 'Sequences lengths Distribution (after clipping)',
997f5136985f Uploaded
xilinxu
parents:
diff changeset
93 bar_spacing => 10,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
94 transparent => 0,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
95 t_margin => 10,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
96 y_tick_number => 20,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
97 y_long_ticks => 1,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
98 ) or die $graph->error;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
99
997f5136985f Uploaded
xilinxu
parents:
diff changeset
100 $graph->plot(\@data) or die $graph->error;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
101 print OUT $graph->gd->png;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
102
997f5136985f Uploaded
xilinxu
parents:
diff changeset
103 close IN;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
104 close OUT;