Mercurial > repos > iuc > fasta_stats
comparison fasta-stats.pl @ 2:cd0874854f51 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
author | iuc |
---|---|
date | Mon, 26 Apr 2021 10:01:43 +0000 |
parents | 16f1f3e2de42 |
children | 56022eb50bbd |
comparison
equal
deleted
inserted
replaced
1:16f1f3e2de42 | 2:cd0874854f51 |
---|---|
45 process($seq) if $n; | 45 process($seq) if $n; |
46 | 46 |
47 # sort length array | 47 # sort length array |
48 # (should use hash here for efficiency with huge no of short reads?) | 48 # (should use hash here for efficiency with huge no of short reads?) |
49 | 49 |
50 @len = sort { $a <=> $b } @len; | 50 @len = sort { $b <=> $a } @len; |
51 | 51 |
52 # compute more stats | 52 # compute more stats |
53 | 53 |
54 $stat{'num_seq'} = scalar(@len); | 54 $stat{'num_seq'} = scalar(@len); |
55 | 55 |
60 $stat{'len_median'} = $len[int(@len/2)]; | 60 $stat{'len_median'} = $len[int(@len/2)]; |
61 $stat{'len_mean'} = int( $stat{'num_bp'} / $stat{'num_seq'} ); | 61 $stat{'len_mean'} = int( $stat{'num_bp'} / $stat{'num_seq'} ); |
62 | 62 |
63 # calculate n50 | 63 # calculate n50 |
64 my $thresh = int 0.5 * $stat{'num_bp'}; | 64 my $thresh = int 0.5 * $stat{'num_bp'}; |
65 $stat{'len_N50'} = &calc_x50(@len, $thresh); | 65 ($stat{'len_N50'}, $stat{'L50'}) = &calc_x50(\@len, $thresh); |
66 | 66 |
67 #calculate NG50 | 67 #calculate NG50 |
68 if ($calc_ng50) { | 68 if ($calc_ng50) { |
69 my $thresh = int 0.5 * $genome_size * 1000000; | 69 my $thresh = int 0.5 * $genome_size; |
70 $stat{'len_NG50'} = &calc_x50(@len, $thresh); | 70 ($stat{'len_NG50'}, $stat{'LG50'}) = &calc_x50(\@len, $thresh); |
71 } | 71 } |
72 } | 72 } |
73 | 73 |
74 #calculate GC content | 74 #calculate GC content |
75 $stat{'num_bp_not_N'} = $stat{'num_G'} + $stat{'num_C'} + $stat{'num_A'} + $stat{'num_T'}; | 75 $stat{'num_bp_not_N'} = $stat{'num_G'} + $stat{'num_C'} + $stat{'num_A'} + $stat{'num_T'}; |
99 } | 99 } |
100 | 100 |
101 # N50/NG50 calculation sub | 101 # N50/NG50 calculation sub |
102 | 102 |
103 sub calc_x50{ | 103 sub calc_x50{ |
104 my @x = shift; | 104 my $ref = shift; |
105 my @x = @$ref; | |
105 my $thresh = shift; | 106 my $thresh = shift; |
106 my $cum=0; | 107 my $cum=0; |
107 for my $i (0 .. $#x) { | 108 for my $i (0 .. $#x) { |
108 $cum += $x[$i]; | 109 $cum += $x[$i]; |
109 if ($cum >= $thresh) { | 110 if ($cum >= $thresh) { |
110 return $x[$i]; | 111 return $x[$i], $i+1; |
111 } | 112 } |
112 } | 113 } |
113 return 0; | 114 return (0,0); |
114 } | 115 } |
115 | 116 |