Mercurial > repos > brasset_jensen > srnapipe
annotate bin/align.pm @ 62:e6c4f3e96e47 draft
planemo upload for repository https://github.com/GReD-Clermont/sRNAPipe/ commit 87e0c95b4d0d1954e81247c31a8f74c5d87c7f9e
author | pierre.pouchin |
---|---|
date | Thu, 17 Jan 2019 09:56:52 -0500 |
parents | 9185ca0a7b43 |
children |
rev | line source |
---|---|
61
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
1 package align; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
2 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
3 use strict; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
4 use warnings; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
5 use File::Basename; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
6 use String::Random; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
7 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
8 use FindBin; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
9 use lib $FindBin::Bin; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
10 use Rcall qw ( histogram ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
11 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
12 use Exporter; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
13 our @ISA = qw( Exporter ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
14 our @EXPORT = qw( &rpms_rpkm_te &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
15 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
16 sub to_build |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
17 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
18 my ( $toBuildTabP, $log, $newdir ) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
19 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
20 foreach my $pairs ( @{ $toBuildTabP } ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
21 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
22 if ( $pairs->[0] == 1 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
23 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
24 my $sym = $newdir.basename(${$pairs->[1]}).'_symlink.fa'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
25 symlink( ${$pairs->[1]}, $sym ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
26 ${$pairs->[1]} = $sym; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
27 build_index ( ${$pairs->[1]}, $log ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
28 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
29 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
30 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
31 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
32 sub build_index |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
33 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
34 my $to_index = shift; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
35 my $log = shift; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
36 my $index_log = $to_index.'_index.err'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
37 `bwa index '$to_index' 2> '$index_log'`; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
38 print $log "Creating index for $to_index\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
39 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
40 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
41 sub get_unique |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
42 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
43 my ( $sam, $s_uni, $out_prefix, $col_prefix, $details, $report ) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
44 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
45 my $fout = $col_prefix.'_all_mappers.fastq'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
46 my $funi = $col_prefix.'_unique_mappers.fastq'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
47 my $frej = $col_prefix.'_unmapped.fastq'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
48 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
49 my $repartition = $out_prefix.'distribution.txt'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
50 my $png_rep = $out_prefix.'distribution.png'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
51 my ( %duplicates, %genome_hits) ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
52 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
53 #alignement to the first reference |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
54 my @return = sam_parse( $sam, $fout, $funi, $frej, $s_uni, \%duplicates, \%genome_hits, $report ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
55 my $ref_fai = $return[4]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
56 my $mappers = $return[5]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
57 my $mappers_uni = $return[6]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
58 my $size_mappedHashR = $return[7]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
59 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
60 if ( $details == 1 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
61 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
62 #print number of duplicates and hits number |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
63 my ($pourcentage, $total) =(0,0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
64 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
65 $total += $_ foreach values %{$size_mappedHashR}; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
66 open (my $rep, '>'.$repartition) || die "cannot create $repartition $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
67 print $rep "size\tnumber\tpercentage\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
68 foreach my $k (sort{$a cmp $b} keys (%{$size_mappedHashR})) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
69 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
70 $pourcentage = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
71 $pourcentage = $size_mappedHashR->{$k} / $total * 100 unless $total ==0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
72 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
73 print $rep "$k\t$size_mappedHashR->{$k}\t"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
74 printf $rep "%.2f\n",$pourcentage; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
75 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
76 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
77 histogram($size_mappedHashR, $png_rep, $total); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
78 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
79 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
80 my $dup = $out_prefix.'dup_mapnum.txt'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
81 my $dup_u = $out_prefix .'dup_unique.txt'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
82 my $dup_r = $out_prefix .'dup_nonmapp.txt'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
83 open(my $tab,">".$dup) || die "cannot open output txt file\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
84 open(my $tab_r,">".$dup_r) || die "cannot open output txt file\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
85 open(my $tab_u,">".$dup_u) || die "cannot open output txt file\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
86 print $tab "sequence\tcount\tmapnum\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
87 print $tab_u "sequence\tcount\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
88 print $tab_r "sequence\tcount\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
89 foreach my $k (sort {$duplicates{$b} <=> $duplicates{$a}}keys %duplicates) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
90 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
91 $duplicates{$k} = 0 unless exists($duplicates{$k}); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
92 $genome_hits{$k} = 0 unless exists($genome_hits{$k}); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
93 if ($genome_hits{$k} != 0) { print $tab $k."\t".$duplicates{$k}."\t".$genome_hits{$k}."\n"; } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
94 else {print $tab_r $k."\t".$duplicates{$k}."\n";} |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
95 if ($genome_hits{$k} == 1) { print $tab_u $k."\t".$duplicates{$k}."\n"; } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
96 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
97 close $dup; close $dup_r; close $dup_u; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
98 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
99 return ( $ref_fai, $mappers, $mappers_uni ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
100 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
101 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
102 sub sam_parse |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
103 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
104 my ( $sam, $fastq_accepted, $fastq_accepted_unique, $fastq_rejected, $sam_unique, $duplicate_hashR, $best_hit_number_hashR, $report ) = @_ ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
105 my ($reads, $mappers, $mappersUnique, @garbage, %size_num, %size_num_spe, %number, %numberSens, %numberReverse, %unique_number, %numberNM, %numberM, %size); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
106 $mappers = $mappersUnique = $reads = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
107 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
108 open my $fic, '<', $sam || die "cannot open $sam $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
109 open my $accepted, '>', $fastq_accepted || die "cannot create $fastq_accepted $! \n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
110 open my $unique, '>', $fastq_accepted_unique || die "cannot create $fastq_accepted_unique $! \n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
111 open my $rejected, '>', $fastq_rejected || die "cannot create $fastq_rejected $! \n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
112 open my $sam_uni, '>', $sam_unique || die "cannot create $sam_unique $! \n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
113 my $sequence = ''; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
114 while(<$fic>) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
115 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
116 chomp $_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
117 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
118 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
119 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
120 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
121 $size{$1} = $2; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
122 $unique_number{$1} = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
123 $number{$1} = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
124 $numberNM{$1} = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
125 $numberM{$1} = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
126 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
127 print $sam_uni $_."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
128 next; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
129 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
130 $reads++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
131 my @line = split (/\t/,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
132 $sequence = $line[9]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
133 if ($line[1] & 16) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
134 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
135 $sequence =reverse($sequence); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
136 $sequence =~ tr/atgcuATGCU/tacgaTACGA/; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
137 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
138 if ($line[1] == 16 || $line[1] == 0) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
139 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
140 my $len = length($sequence); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
141 $size_num{$len} ++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
142 $size_num_spe{$line[2]}{$len}++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
143 $mappers ++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
144 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
145 ${$best_hit_number_hashR}{$sequence} = $1 if ($line[13] =~ /X0:i:(\d*)/ || $line[14] =~/X0:i:(\d*)/ ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
146 ${$duplicate_hashR}{$sequence}++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
147 $number{$line[2]}++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
148 $numberSens{$line[2]}++ if $line[1] == 0 ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
149 $numberReverse{$line[2]}++ if $line[1] == 16 ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
150 print $accepted "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
151 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
152 if ($line[11] eq "XT:A:U") |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
153 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
154 $unique_number{$line[2]}++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
155 $mappersUnique++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
156 print $unique "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
157 print $sam_uni $_."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
158 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
159 if ($_ =~ /.*XM:i:(\d+).*/) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
160 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
161 if ($1 == 0){$numberNM{$line[2]}++;}else{$numberM{$line[2]}++;} |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
162 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
163 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
164 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
165 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
166 ${$best_hit_number_hashR}{$sequence} = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
167 ${$duplicate_hashR}{$sequence}++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
168 print $rejected "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
169 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
170 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
171 close $fic; close $accepted; close $unique; close $rejected; close $sam_uni; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
172 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
173 print $report "Parsing $sam file\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
174 print $report "\treads: $reads\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
175 print $report "\tmappers: $mappers\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
176 print $report "\tunique mappers: $mappersUnique\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
177 print $report "-----------------------------\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
178 return (\%number, \%unique_number, \%numberSens, \%numberReverse, \%size, $mappers, $mappersUnique, \%size_num, \%size_num_spe, \%numberNM, \%numberM ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
179 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
180 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
181 sub get_hash_alignment |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
182 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
183 my ($index, $mismatches, $accept, $reject, $outA, $outR, $fastq, $number_of_cpus, $name, $sam, $report, $fai_f) = @_ ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
184 my ($reads, $mappers, $unmapped) = (0,0,0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
185 my $accep_unique; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
186 BWA_call ( $index, $fastq, $sam, $mismatches, $number_of_cpus, $report ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
187 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
188 open my $fic, '<', $sam || die "cannot open $sam $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
189 open my $accepted, '>', $outA || die "cannot open $outA\n" if $accept == 1; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
190 open my $rejected, '>', $outR || die "cannot open $outR\n" if $reject == 1; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
191 open my $fai, '>', $fai_f || die "cannot open $fai_f\n" if $fai_f; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
192 #if ($name eq "snRNAs") { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
193 # open ( $accep_unique, ">".$1."-unique.fastq") if $outR =~ /(.*)\.fastq/; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
194 #} |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
195 my $sequence = ''; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
196 while(<$fic>) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
197 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
198 chomp $_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
199 if( $_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
200 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
201 if ($fai_f && $_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
202 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
203 print $fai $1."\t".$2."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
204 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
205 next; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
206 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
207 $reads++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
208 my @line = split (/\t/,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
209 $sequence = $line[9]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
210 if ($line[1] & 16) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
211 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
212 $sequence =reverse($sequence); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
213 $sequence =~ tr/atgcuATGCU/tacgaTACGA/; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
214 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
215 if ($line[1] & 16 || $line[1] == 0) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
216 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
217 $mappers ++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
218 if ($accept == 1 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
219 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
220 print $accepted "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
221 # print $accep_unique "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n" if ($name eq "snRNAs" && $line[11] eq "XT:A:U"); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
222 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
223 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
224 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
225 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
226 print $rejected "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n" if $reject == 1; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
227 $unmapped++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
228 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
229 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
230 # close $accep_unique if ($name eq "bonafide_reads"); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
231 close $fic; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
232 close $accepted if $accept == 1; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
233 close $rejected if $reject ==1; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
234 close $fai if $fai_f; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
235 print $report "\treads: $reads\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
236 print $report "\tmappers: $mappers\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
237 print $report "\tunmapped: $unmapped\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
238 print $report "-----------------------------\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
239 return ($mappers, $unmapped); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
240 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
241 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
242 sub sam_count |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
243 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
244 my $sam = shift; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
245 my ( %number, %size ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
246 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
247 open my $fic, '<', $sam || die "cannot open $sam file $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
248 while(<$fic>) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
249 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
250 chomp $_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
251 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
252 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
253 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
254 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
255 $size{$1} = $2; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
256 $number{$1} = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
257 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
258 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
259 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
260 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
261 my @line = split (/\t/,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
262 if ( $line[1] & 16 || $line[1] == 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
263 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
264 $number{$line[2]}++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
265 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
266 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
267 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
268 close $fic; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
269 return ( \%number, \%size ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
270 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
271 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
272 sub sam_count_mis |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
273 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
274 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
275 my $sam = shift; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
276 my ( %number, %numberNM, %numberM, %size); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
277 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
278 open my $fic, '<', $sam || die "cannot open $sam file $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
279 while(<$fic>) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
280 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
281 chomp $_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
283 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
285 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
286 $size{$1} = $2; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
287 $number{$1} = [0,0,0,0,0,0,0]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
288 $numberNM{$1} = [0,0,0,0,0,0,0]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
289 $numberM{$1} = [0,0,0,0,0,0,0]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
290 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
291 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
292 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
293 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
294 my @line = split (/\t/,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
295 my @seq = split //, $line[9]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
296 if ( $line[1] == 16 || $line[1] == 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
297 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
298 $number{ $line[2] }->[0]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
299 if ($line[1] == 0) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
300 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
301 $number{$line[2]}->[1]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
302 $number{$line[2]}->[3]++ if $seq[0] eq 'T'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
303 $number{$line[2]}->[5]++ if $seq[9] eq 'A'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
304 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
305 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
306 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
307 $number{$line[2]}->[2]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
308 $number{$line[2]}->[4]++ if $seq[9] eq 'A'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
309 $number{$line[2]}->[6]++ if $seq[0] eq 'T'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
310 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
311 if ($_ =~ /.*XM:i:(\d+).*/) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
312 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
313 if ( $1 == 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
314 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
315 $numberNM{$line[2]}->[0]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
316 if ($line[1] == 0) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
317 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
318 $numberNM{$line[2]}->[1]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
319 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
320 $numberNM{$line[2]}->[5]++ if $seq[9] eq 'A'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
321 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
322 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
323 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
324 $numberNM{$line[2]}->[2]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
325 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
326 $numberNM{$line[2]}->[6]++ if $seq[0] eq 'T'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
327 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
328 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
329 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
330 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
331 $numberM{$line[2]}->[0]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
332 if ($line[1] == 0) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
333 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
334 $numberM{$line[2]}->[1]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
335 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
336 $numberM{$line[2]}->[5]++ if $seq[9] eq 'A'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
337 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
338 else |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
339 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
340 $numberM{$line[2]}->[2]++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
341 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
342 $numberM{$line[2]}->[6]++ if $seq[0] eq 'T'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
343 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
344 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
345 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
346 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
347 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
348 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
349 return (\%number, \%size, \%numberNM, \%numberM ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
350 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
351 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
352 sub rpms_rpkm_te |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
353 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
354 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
355 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
356 print $out "ID\treads counts\tRPKM"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
357 print $out "\tper million of piRNAs" if ($piRNA_number != 0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
358 print $out "\tper million of miRNAs" if ($miRNA_number != 0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
359 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
360 print $out "\tsense reads counts\treverse reads counts"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
361 print $out "\t% of sense 1U\t% of sense 10A\t% of reverse 1U\t% of reverse 10A\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
362 foreach my $k ( sort keys %{$counthashR} ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
363 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
364 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
365 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
366 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
367 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
368 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
369 if ($piRNA_number != 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
370 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
372 printf $out "\t%.2f",$pirna; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
373 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
374 if ($miRNA_number != 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
375 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
376 $mirna = ( $counthashR->{$k}->[0] * 1000000) / $miRNA_number; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
377 printf $out "\t%.2f",$mirna; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
378 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
379 if ($bonafide_number != 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
380 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
382 printf $out "\t%.2f",$bonafide; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
383 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
384 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
385 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
386 my $S1U = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
387 $S1U = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
388 my $R1U = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
389 $R1U = $counthashR->{$k}->[6] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
390 my $S10A = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
391 $S10A = $counthashR->{$k}->[5] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
392 my $R10A = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
393 $R10A = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
394 print $out "\t".$S1U."\t".$S10A."\t".$R1U."\t".$R10A; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
395 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
396 print $out "\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
397 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
398 close $out; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
399 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
400 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
401 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
402 sub sam_to_bam_bg |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
403 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
404 my ( $sam, $scale, $number_of_cpus ) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
405 my ( $bam_sorted, $bedgraphM, $bedgraphP, $view_err, $sort_err ) = ( '', '', '', '', '' ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
406 if ( $sam =~ /(.*?).sam$/ ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
407 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
408 $bam_sorted = $1.'_sorted.bam'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
409 $bedgraphP= $1.'_plus.bedgraph'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
410 $bedgraphM = $1.'_minus.bedgraph'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
411 $view_err = $1.'_view.err'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
412 $sort_err = $1.'_sort.err'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
413 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
414 `samtools view -Shb --threads $number_of_cpus '$sam' 2> '$view_err' | samtools sort -O BAM --threads $number_of_cpus /dev/stdin 2> '$sort_err' > '$bam_sorted'`; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
415 `bedtools genomecov -scale $scale -strand + -bga -ibam '$bam_sorted' > '$bedgraphP'`; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
416 `bedtools genomecov -scale $scale -strand - -bga -ibam '$bam_sorted' > '$bedgraphM'`; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
417 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
418 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
419 sub sam_sorted_bam |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
420 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
421 my ( $sam, $number_of_cpus ) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
422 my ( $bam_sorted, $view_err, $sort_err ) = ( '', '', '' ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
423 if ( $sam =~ /(.*?).sam$/ ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
424 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
425 $bam_sorted = $1.'_sorted.bam'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
426 $view_err = $1.'_view.err'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
427 $sort_err = $1.'_sort.err'; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
428 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
429 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
430 `samtools view -Shb --threads $number_of_cpus '$sam' 2> '$view_err' | samtools sort -O BAM --threads $number_of_cpus /dev/stdin 2> '$sort_err' > '$bam_sorted'`; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
431 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
432 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
433 sub BWA_call |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
434 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
435 my ( $index, $fastq, $sam, $mismatches, $number_of_cpus, $report ) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
436 my ( $aln_err, $samse_err, $seq_num ) = ( $sam.'_aln.err', $sam.'_samse.err', 0 ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
437 print $report "-----------------------------\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
438 print $report "bwa aln -t $number_of_cpus -n $mismatches '$index' '$fastq' 2> '$aln_err' | bwa samse $index /dev/stdin '$fastq' 2> '$samse_err' > '$sam'\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
439 `bwa aln -t $number_of_cpus -n $mismatches '$index' '$fastq' 2> '$aln_err' | bwa samse $index /dev/stdin '$fastq' 2> '$samse_err' > '$sam' `; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
440 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
441 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
442 sub rpms_rpkm |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
443 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
444 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
445 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
446 print $out "ID\treads counts\tRPKM"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
447 print $out "\tper million of piRNAs" if ($piRNA_number != 0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
448 print $out "\tper million of miRNAs" if ($miRNA_number != 0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
449 print $out "\tper million of bonafide reads" if ($bonafide_number != 0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
450 print $out "\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
451 foreach my $k ( sort keys %{$counthashR} ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
452 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
453 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
454 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
455 $rpkm = ( $counthashR->{$k} * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
456 print $out $k."\t".$counthashR->{$k}."\t"; printf $out "%.2f",$rpkm; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
457 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
458 if ($piRNA_number != 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
459 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
460 $pirna = ( $counthashR->{$k} * 1000000) / $piRNA_number; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
461 printf $out "\t%.2f",$pirna; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
462 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
463 if ($miRNA_number != 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
464 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
465 $mirna = ( $counthashR->{$k} * 1000000) / $miRNA_number; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
466 printf $out "\t%.2f",$mirna; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
467 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
468 if ($bonafide_number != 0 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
469 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
470 $bonafide = ( $counthashR->{$k} * 1000000) / $bonafide_number; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
471 printf $out "\t%.2f",$bonafide; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
472 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
473 print $out "\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
474 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
475 close $out; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
476 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
477 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
478 sub extract_sam |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
479 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
480 my ( $hashRef, $sam_in, $sam_out, $sam_uni_out, $fastq_out, $fastq_uni_out ) = @_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
481 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
482 open my $s_in, '<', $sam_in || die "cannot open $sam_in file $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
483 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
484 open my $f_out, '>', $fastq_out || die "cannot create $fastq_out $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
485 open my $f_uni_out, '>', $fastq_uni_out || die "cannot create $fastq_uni_out $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
486 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
487 open my $s_out, '>', $sam_out || die "cannot create $sam_out file $!\n" if defined ($hashRef); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
488 open my $s_uni_out, '>', $sam_uni_out || die "cannot create $sam_uni_out file $!\n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
489 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
490 my $sequence = ''; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
491 while(<$s_in>) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
492 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
493 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
494 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
495 print $s_out $_ if defined ($hashRef); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
496 print $s_uni_out $_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
497 next; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
498 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
499 my @line = split (/\t/,$_); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
500 $sequence = $line[0]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
501 if ( (! defined ($hashRef) )|| ( exists $hashRef->{$sequence} && $hashRef->{$sequence} == 1 ) ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
502 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
503 my $arn = $line[9]; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
504 if ($line[1] & 16) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
505 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
506 $arn =reverse($arn); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
507 $arn =~ tr/atgcuATGCU/tacgaTACGA/; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
508 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
509 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
510 if ( ( $line[1] == 16 || $line[1] == 0 ) ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
511 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
512 print $f_out "\@".$line[0]."\n".$arn."\n+\n".$line[10]."\n" ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
513 print $s_out $_ if defined ($hashRef); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
514 if ( $line[11] eq "XT:A:U" ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
515 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
516 print $f_uni_out "\@".$line[0]."\n".$arn."\n+\n".$line[10]."\n" ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
517 print $s_uni_out $_ ; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
518 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
519 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
520 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
521 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
522 close $s_in; close $s_out if defined ($hashRef); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
523 close $s_uni_out; close $f_out; close $f_uni_out; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
524 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
525 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
526 sub get_fastq_seq |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
527 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
528 my $fastq = shift; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
529 my %hash; my $cmp = 0; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
530 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
531 open my $fic, '<', $fastq || die "cannot open input file $! \n"; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
532 while(<$fic>) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
533 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
534 chomp $_; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
535 $cmp++; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
536 if ($cmp % 4 == 1) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
537 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
538 die "file do not contain a @ at line $cmp\n" unless ($_ =~ /^\@/ ); |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
539 if ($_ =~ /^\@(.*)\s.*/) { $hash{$1} = 1;} |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
540 elsif ($_ =~ /^\@(.*)/) { $hash{$1} = 1;} |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
541 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
542 elsif ($cmp % 4 == 3 ) |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
543 { |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
544 die "file do not contain a + at line $cmp\n" unless $_ =~ /^\+/; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
545 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
546 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
547 close $fic; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
548 return \%hash; |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
549 } |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
550 |
9185ca0a7b43
Updated package according to recommendations.
pierre.pouchin
parents:
60
diff
changeset
|
551 1; |