comparison miRNA_Express_and_sequence.pl @ 13:d1cc2e6ecf90 draft

Uploaded
author big-tiandm
date Fri, 25 Jul 2014 05:21:10 -0400
parents
children
comparison
equal deleted inserted replaced
12:dc5a29826c7d 13:d1cc2e6ecf90
1 #!/usr/bin/perl -w
2 #Filename:
3 #Author: Tian Dongmei
4 #Email: tiandm@big.ac.cn
5 #Date: 2014-6-4
6 #Modified:
7 #Description: solexa miRNA express and sequence
8 my $version=1.00;
9
10 use strict;
11 use Getopt::Long;
12
13 my %opts;
14 GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h");
15 if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments
16 &usage;
17 }
18
19 my $filein=$opts{'i'};
20 my $fileout=$opts{'list'};
21 my $out=$opts{'fa'};
22 my $preout=$opts{'pre'};
23
24 =cut
25 my %hash_pri;
26 open PRI,"<$opts{p}";
27 while (my $aline=<PRI>) {
28 chomp $aline;
29 if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;}
30 }
31 close PRI;
32 =cut
33
34 open IN,"<$filein"; #input file
35 open OUT,">$fileout"; #output file
36 open FA ,">$out";
37 open PRE,">$preout";
38
39 print OUT "#ID\tcoordinate\tpos1\tpos2";
40 my @marks=split/\,/,$opts{'tag'};
41 foreach (@marks) {
42 print OUT "\t",$_,"_matureExp";
43 }
44 foreach (@marks) {
45 print OUT "\t",$_,"_starExp";
46 }
47 foreach (@marks) {
48 print OUT "\t",$_,"_totalExp";
49 }
50
51 print OUT "\n";
52
53 my (%uniq_id,$novel);
54 while (my $aline=<IN>) {
55 chomp $aline;
56 until ($aline =~ /^score\s+[-\d\.]+/){
57 $aline = <IN>;
58 if (eof) {last;}
59 }
60 if (eof) {last;}
61 ########## miRNA ID ################
62 $novel++;
63 ########### annotate####################
64 do {$aline=<IN>;} until($aline=~/flank_first_end/) ;
65 chomp $aline;
66 my @flank1=split/\t/,$aline;
67 do {$aline=<IN>;} until($aline=~/flank_second_beg/) ;
68 chomp $aline;
69 my @flank2=split/\t/,$aline;
70 #
71 ########## mature start loop pre ####
72 do {$aline=<IN>;} until($aline=~/mature_beg/) ;
73 chomp $aline;
74 my @start=split/\t/,$aline;
75 # $start[1] -=$flank1[1];
76 do {$aline=<IN>;} until($aline=~/mature_end/) ;
77 chomp $aline;
78 my @end=split/\t/,$aline;
79 # $end[1] -=$flank1[1];
80 do {$aline=<IN>;} until($aline=~/mature_seq/) ;
81 chomp $aline;
82 my @arr1=split/\t/,$aline;
83 do {$aline=<IN>;} until($aline=~/pre_seq/) ;
84 chomp $aline;
85 my @arr2=split/\t/,$aline;
86 do {$aline=<IN>;} until($aline=~/pri_id/) ;
87 chomp $aline;
88 my @pri_id=split/\t/,$aline;
89 do {$aline=<IN>;} until($aline=~/pri_seq/) ;
90 chomp $aline;
91 my @pri_seq=split/\t/,$aline;
92 do {$aline=<IN>;} until($aline=~/star_beg/) ;
93 chomp $aline;
94 my @star_start=split/\t/,$aline;
95 # $star_start[1] -=$flank1[1];
96 do {$aline=<IN>;} until($aline=~/star_end/) ;
97 chomp $aline;
98 my @star_end=split/\t/,$aline;
99 # $star_end[1] -=$flank1[1];
100 do {$aline=<IN>;} until($aline=~/star_seq/) ;
101 chomp $aline;
102 my @arr3=split/\t/,$aline;
103 print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t";
104 #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t";
105 print FA ">miR-c-$novel\n$arr1[1]\n";
106 print PRE ">miR-c-$novel\n$pri_seq[1]\n";
107 ########## reads count #############
108 <IN>;
109 my @count1;my @count2;my @count3;my @count4;
110 $aline=<IN>;
111 do {
112 chomp $aline;
113 my @reads=split/\t/,$aline;
114 my @pos=();
115 $reads[5]=~/(\d+)\.\.(\d+)/;
116 # $pos[0] =$1-$flank1[1];
117 # $pos[1] =$2-$flank1[1];
118 $pos[0]=$1;
119 $pos[1]=$2;
120 $reads[0]=~/:([\d|_]+)_x(\d+)$/;
121 my @ss=split/_/,$1;
122 for (my $i=0;$i<@ss ;$i++) {
123 if (!(defined $count3[$i])) {
124 $count3[$i]=0;
125 }
126 if (!(defined $count4[$i])) {
127 $count4[$i]=0;
128 }
129 $count2[$i]+=$ss[$i];
130
131 }
132 # $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 );
133 # $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 );
134 # $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1);
135 # $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1);
136 if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 )
137 {
138 for (my $i=0;$i<@ss;$i++) {
139 $count3[$i]+=$ss[$i];
140 }
141 }
142 if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){
143 for (my $i=0;$i<@ss;$i++) {
144 $count4[$i]+=$ss[$i];
145 }
146 }
147 $aline=<IN>;
148 chomp $aline;
149 } until(length $aline < 1) ;
150 $"="\t";
151 print OUT "@count3\t@count4\t@count2\n";
152 $"=" ";
153 }
154
155 close IN;
156 close OUT;
157
158 sub usage{
159 print <<"USAGE";
160 Version $version
161 Usage:
162 $0 -i -list -fa -pre -tag
163 options:
164 -i input file,predictions file
165 -list output file miRNA list file
166 -fa output file ,miRNA sequence fasta file.
167 -pre output file, miRNA precursor fasta file.
168 -tag string, sample names# eg: samA,samB,samC
169 -h help
170 USAGE
171 exit(1);
172 }
173