Mercurial > repos > big-tiandm > mirplant2
comparison miRNA_Express_and_sequence.pl @ 13:d1cc2e6ecf90 draft
Uploaded
author | big-tiandm |
---|---|
date | Fri, 25 Jul 2014 05:21:10 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12:dc5a29826c7d | 13:d1cc2e6ecf90 |
---|---|
1 #!/usr/bin/perl -w | |
2 #Filename: | |
3 #Author: Tian Dongmei | |
4 #Email: tiandm@big.ac.cn | |
5 #Date: 2014-6-4 | |
6 #Modified: | |
7 #Description: solexa miRNA express and sequence | |
8 my $version=1.00; | |
9 | |
10 use strict; | |
11 use Getopt::Long; | |
12 | |
13 my %opts; | |
14 GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h"); | |
15 if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments | |
16 &usage; | |
17 } | |
18 | |
19 my $filein=$opts{'i'}; | |
20 my $fileout=$opts{'list'}; | |
21 my $out=$opts{'fa'}; | |
22 my $preout=$opts{'pre'}; | |
23 | |
24 =cut | |
25 my %hash_pri; | |
26 open PRI,"<$opts{p}"; | |
27 while (my $aline=<PRI>) { | |
28 chomp $aline; | |
29 if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;} | |
30 } | |
31 close PRI; | |
32 =cut | |
33 | |
34 open IN,"<$filein"; #input file | |
35 open OUT,">$fileout"; #output file | |
36 open FA ,">$out"; | |
37 open PRE,">$preout"; | |
38 | |
39 print OUT "#ID\tcoordinate\tpos1\tpos2"; | |
40 my @marks=split/\,/,$opts{'tag'}; | |
41 foreach (@marks) { | |
42 print OUT "\t",$_,"_matureExp"; | |
43 } | |
44 foreach (@marks) { | |
45 print OUT "\t",$_,"_starExp"; | |
46 } | |
47 foreach (@marks) { | |
48 print OUT "\t",$_,"_totalExp"; | |
49 } | |
50 | |
51 print OUT "\n"; | |
52 | |
53 my (%uniq_id,$novel); | |
54 while (my $aline=<IN>) { | |
55 chomp $aline; | |
56 until ($aline =~ /^score\s+[-\d\.]+/){ | |
57 $aline = <IN>; | |
58 if (eof) {last;} | |
59 } | |
60 if (eof) {last;} | |
61 ########## miRNA ID ################ | |
62 $novel++; | |
63 ########### annotate#################### | |
64 do {$aline=<IN>;} until($aline=~/flank_first_end/) ; | |
65 chomp $aline; | |
66 my @flank1=split/\t/,$aline; | |
67 do {$aline=<IN>;} until($aline=~/flank_second_beg/) ; | |
68 chomp $aline; | |
69 my @flank2=split/\t/,$aline; | |
70 # | |
71 ########## mature start loop pre #### | |
72 do {$aline=<IN>;} until($aline=~/mature_beg/) ; | |
73 chomp $aline; | |
74 my @start=split/\t/,$aline; | |
75 # $start[1] -=$flank1[1]; | |
76 do {$aline=<IN>;} until($aline=~/mature_end/) ; | |
77 chomp $aline; | |
78 my @end=split/\t/,$aline; | |
79 # $end[1] -=$flank1[1]; | |
80 do {$aline=<IN>;} until($aline=~/mature_seq/) ; | |
81 chomp $aline; | |
82 my @arr1=split/\t/,$aline; | |
83 do {$aline=<IN>;} until($aline=~/pre_seq/) ; | |
84 chomp $aline; | |
85 my @arr2=split/\t/,$aline; | |
86 do {$aline=<IN>;} until($aline=~/pri_id/) ; | |
87 chomp $aline; | |
88 my @pri_id=split/\t/,$aline; | |
89 do {$aline=<IN>;} until($aline=~/pri_seq/) ; | |
90 chomp $aline; | |
91 my @pri_seq=split/\t/,$aline; | |
92 do {$aline=<IN>;} until($aline=~/star_beg/) ; | |
93 chomp $aline; | |
94 my @star_start=split/\t/,$aline; | |
95 # $star_start[1] -=$flank1[1]; | |
96 do {$aline=<IN>;} until($aline=~/star_end/) ; | |
97 chomp $aline; | |
98 my @star_end=split/\t/,$aline; | |
99 # $star_end[1] -=$flank1[1]; | |
100 do {$aline=<IN>;} until($aline=~/star_seq/) ; | |
101 chomp $aline; | |
102 my @arr3=split/\t/,$aline; | |
103 print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t"; | |
104 #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t"; | |
105 print FA ">miR-c-$novel\n$arr1[1]\n"; | |
106 print PRE ">miR-c-$novel\n$pri_seq[1]\n"; | |
107 ########## reads count ############# | |
108 <IN>; | |
109 my @count1;my @count2;my @count3;my @count4; | |
110 $aline=<IN>; | |
111 do { | |
112 chomp $aline; | |
113 my @reads=split/\t/,$aline; | |
114 my @pos=(); | |
115 $reads[5]=~/(\d+)\.\.(\d+)/; | |
116 # $pos[0] =$1-$flank1[1]; | |
117 # $pos[1] =$2-$flank1[1]; | |
118 $pos[0]=$1; | |
119 $pos[1]=$2; | |
120 $reads[0]=~/:([\d|_]+)_x(\d+)$/; | |
121 my @ss=split/_/,$1; | |
122 for (my $i=0;$i<@ss ;$i++) { | |
123 if (!(defined $count3[$i])) { | |
124 $count3[$i]=0; | |
125 } | |
126 if (!(defined $count4[$i])) { | |
127 $count4[$i]=0; | |
128 } | |
129 $count2[$i]+=$ss[$i]; | |
130 | |
131 } | |
132 # $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 ); | |
133 # $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 ); | |
134 # $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1); | |
135 # $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1); | |
136 if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 ) | |
137 { | |
138 for (my $i=0;$i<@ss;$i++) { | |
139 $count3[$i]+=$ss[$i]; | |
140 } | |
141 } | |
142 if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){ | |
143 for (my $i=0;$i<@ss;$i++) { | |
144 $count4[$i]+=$ss[$i]; | |
145 } | |
146 } | |
147 $aline=<IN>; | |
148 chomp $aline; | |
149 } until(length $aline < 1) ; | |
150 $"="\t"; | |
151 print OUT "@count3\t@count4\t@count2\n"; | |
152 $"=" "; | |
153 } | |
154 | |
155 close IN; | |
156 close OUT; | |
157 | |
158 sub usage{ | |
159 print <<"USAGE"; | |
160 Version $version | |
161 Usage: | |
162 $0 -i -list -fa -pre -tag | |
163 options: | |
164 -i input file,predictions file | |
165 -list output file miRNA list file | |
166 -fa output file ,miRNA sequence fasta file. | |
167 -pre output file, miRNA precursor fasta file. | |
168 -tag string, sample names# eg: samA,samB,samC | |
169 -h help | |
170 USAGE | |
171 exit(1); | |
172 } | |
173 |