Mercurial > repos > xilinxu > xilinxu
comparison fastx_toolkit-0.0.6/scripts/fasta_clipping_histogram.pl @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:dfe9332138cf | 3:997f5136985f |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 # FASTX-toolkit - FASTA/FASTQ preprocessing tools. | |
4 # Copyright (C) 2009 A. Gordon (gordon@cshl.edu) | |
5 # | |
6 # This program is free software: you can redistribute it and/or modify | |
7 # it under the terms of the GNU Affero General Public License as | |
8 # published by the Free Software Foundation, either version 3 of the | |
9 # License, or (at your option) any later version. | |
10 # | |
11 # This program is distributed in the hope that it will be useful, | |
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 # GNU Affero General Public License for more details. | |
15 # | |
16 # You should have received a copy of the GNU Affero General Public License | |
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | |
19 use strict; | |
20 use warnings; | |
21 use GD::Graph::bars; | |
22 use Data::Dumper; | |
23 use PerlIO::gzip; | |
24 | |
25 if (scalar @ARGV==0) { | |
26 print<<END; | |
27 | |
28 Create a Linker Clipping Information Histogram | |
29 | |
30 usage: $0 INPUT_FILE.FA OUTPUT_FILE.PNG | |
31 | |
32 INPUT_FILE.FA = input file (in FASTA format, can be GZIPped) | |
33 OUTPUT_FILE.PNG = histogram image | |
34 | |
35 END | |
36 exit 0; | |
37 } | |
38 | |
39 # | |
40 # Read parameters | |
41 # | |
42 open(IN, "<:gzip(autopop)", "$ARGV[0]") or die "Cannot open input file $ARGV[0]\n"; | |
43 open(OUT, ">$ARGV[1]") or die "Cannot create output file $ARGV[1]\n"; | |
44 binmode OUT; | |
45 | |
46 my %histogram ; | |
47 | |
48 while (my $name = <IN>) { | |
49 my $sequence = <IN> ; | |
50 chomp $sequence; | |
51 | |
52 my $sequence_length = length($sequence); | |
53 | |
54 my $count; | |
55 | |
56 if ( index($name, "-")==-1 ) { | |
57 #Assume this file is not collapsed, just count each seqeunce as 1 | |
58 $count = 1 ; | |
59 } else { | |
60 #Assume file is collapsed (that is - sequence-ID has two numbers with a separating dash) | |
61 (undef, $count) = $name =~ /^\>(\d+)\-(\d+)$/ ; | |
62 | |
63 # If the match failed, treat this fasta as not collapsed; | |
64 $count = 1 if not defined $count ; | |
65 } | |
66 | |
67 $histogram{$sequence_length} += $count ; | |
68 } | |
69 | |
70 #Textual Output | |
71 if (0) { | |
72 print "Length\tCount\n"; | |
73 foreach my $length_key ( sort { $a <=> $b } keys %histogram ) { | |
74 print $length_key,"\t", $histogram{$length_key},"\n"; | |
75 } | |
76 exit 0; | |
77 } | |
78 | |
79 ## Build the data as required by GD::Graph::bars. | |
80 ## Data list has two items (each item is itself a list) | |
81 ## 1. a list of x-axis labels (these are the keys from the histogram) | |
82 ## 2. a list of values | |
83 my @data = ( | |
84 [ sort { $a <=> $b } keys %histogram ], | |
85 [ map { $histogram{$_} } sort { $a <=> $b } keys %histogram ] ) ; | |
86 | |
87 my $graph = new GD::Graph::bars (1000,800); | |
88 | |
89 $graph->set( | |
90 x_label => 'Length', | |
91 y_label => 'Amount', | |
92 title => 'Sequences lengths Distribution (after clipping)', | |
93 bar_spacing => 10, | |
94 transparent => 0, | |
95 t_margin => 10, | |
96 y_tick_number => 20, | |
97 y_long_ticks => 1, | |
98 ) or die $graph->error; | |
99 | |
100 $graph->plot(\@data) or die $graph->error; | |
101 print OUT $graph->gd->png; | |
102 | |
103 close IN; | |
104 close OUT; |