annotate microRNA_pipeline.pl @ 0:87fe81de0931 draft default tip

Uploaded
author bigrna
date Sun, 04 Jan 2015 02:47:25 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
1 #!/usr/bin/perl -w
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
2 #Filename:
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
3 #Author: Tian Dongmei
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
4 #Email: tiandm@big.ac.cn
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
5 #Date: 2014-4-22
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
6 #Modified:
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
7 #Description: plant microRNA prediction
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
8 my $version=1.00;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
9
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
10 use strict;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
11 use Getopt::Long;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
12 use threads;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
13 #use threads::shared;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
14 use File::Path;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
15 use File::Basename;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
16 #use RNA;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
17 #use Term::ANSIColor;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
18
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
19 my %opts;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
20 GetOptions(\%opts,"i:s@","tag:s@","phred:i","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
21 if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
22 &usage;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
23 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
24
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
25 my $time=&Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
26 print "miPlant program start:\n The time is $time!\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
27 print "Command line:\n $0 @ARGV\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
28
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
29 my $format=$opts{'format'};
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
30 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
31 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
32 die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
33 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
34
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
35 my $phred_qv=64;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
36 if (defined $opts{'phred'}) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
37 $phred_qv=$opts{'phred'};
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
38 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
39
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
40 my @inputfiles=@{$opts{'i'}};
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
41 my @inputtags=@{$opts{'tag'}};
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
42
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
43 my $mypath=`pwd`;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
44 chomp $mypath;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
45
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
46 my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/miRPlant_out/";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
47
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
48
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
49 unless ($dir=~/\/$/) {$dir.="/";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
50 if (not -d $dir) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
51 mkdir $dir;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
52 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
53 my $config=$dir."/input_config";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
54 open CONFIG,">$config";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
55 for (my $i=0;$i<@inputfiles;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
56 print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
57 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
58 close CONFIG;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
59
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
60 my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
61
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
62 my $a="ATCTCGTATG"; #adapter
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
63 if (defined $opts{'a'}) {$a=$opts{'a'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
64
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
65 my $m=6; #adapter minimum mapped nt
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
66 if (defined $opts{'M'}) {$m=$opts{'M'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
67
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
68 my $t=1; #threads number
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
69 if (defined $opts{'t'}) {$t=$opts{'t'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
70
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
71 my $min_nt=19; # minimum reads length
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
72 if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
73
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
74 my $max_nt=28; #maximum reads length
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
75 if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
76
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
77 my $mis=0; #mismatch number for microRNA
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
78 if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
79
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
80 my $mis_rfam=0;# mismatch number for rfam
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
81 if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
82
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
83 my $hit=25; # maximum reads mapping hits in genome
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
84 if (defined $opts{'r'}) {$hit=$opts{'r'};}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
85
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
86 my $upstream = 2; # microRNA 5' extension
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
87 $upstream = $opts{'e'} if(defined $opts{'e'});
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
88
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
89 my $downstream = 5;# microRNA 3' extension
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
90 $downstream = $opts{'f'} if(defined $opts{'f'});
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
91
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
92 my $maxd=defined $opts{'dis'} ? $opts{'dis'} : 200;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
93 my $flank=defined $opts{'flank'} ? $opts{'flank'} :10;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
94 my $mfe=defined $opts{'mfe'} ? $opts{'mfe'} : -20;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
95
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
96 $time=&Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
97 print "$time, Checking input file!\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
98
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
99 my (@filein,@mark,@clean);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
100 #&read_config();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
101 @filein=@inputfiles;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
102 @mark=@inputtags;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
103
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
104 &checkfa($opts{pre});
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
105 &checkfa($opts{mat});
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
106 &checkfa($opts{gfa});
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
107
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
108
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
109 ##### clip adpter --> clean data start
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
110 $time=&Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
111 print "$time, Preprocess:\n trim adapter, reads collapse and filter reads by length.\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
112
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
113 $time=~s/:/-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
114 $time=~s/ /-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
115 my $preprocess=$dir."preProcess/";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
116 mkdir $preprocess;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
117 my $can_use_threads = eval 'use threads; 1';
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
118 if ($can_use_threads) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
119 # Do processing using threads
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
120 print "Do processing using threads\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
121 my @filein1=@filein; my @mark1=@mark;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
122 while (@filein1>0) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
123 my @thrs; my @res;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
124 for (my $i=0;$i<$t ;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
125 last if(@filein1==0);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
126 my $in=shift @filein1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
127 my $out=shift @mark1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
128 push @clean,$preprocess.$out."_clips_adapter.fq";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
129 $thrs[$i]=threads->create(\&clips,$in,$out);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
130 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
131 for (my $i=0;$i<@thrs;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
132 $res[$i]=$thrs[$i]->join();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
133 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
134 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
135 } else {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
136 # Do not processing using threads
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
137 print "Do not processing using threads\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
138 for (my $i=0;$i<@filein ;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
139 my $in=$filein[$i];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
140 my $out=$mark[$i];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
141 push @clean,$preprocess.$out."_clips_adapter.fq";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
142 &clips($in,$out);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
143 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
144 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
145
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
146 ##### clip adpter --> clean data end
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
147
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
148 my $collapsed=$preprocess."collapse_reads.fa";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
149 my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
150 my $data2; ### mirbase not mapped reads
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
151 my $data3; ### rfam not mapped reads
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
152 &collapse(\@clean,$collapsed); #collapse reads to tags
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
153
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
154 &filterbylength(); # filter <$min_nt && >$max_nt
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
155
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
156 print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
157
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
158 $time=Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
159 print "$time: known microRNA quantify!\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
160
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
161 chdir $dir;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
162
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
163 $time=~s/:/-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
164 $time=~s/ /-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
165 my $known_result=$dir."known_miRNA_Express/";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
166 &quantify(); ### known microRAN quantify
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
167
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
168
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
169 #my $miR_exp_dir=&search($known_result,"miRNA_Express_");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
170 $data2=$known_result."/mirbase_not_mapped.fa";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
171
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
172 my $pathfile="$dir/path.txt";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
173 open PA,">$pathfile";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
174 print PA "$config\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
175 print PA "$preprocess\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
176 print PA "$known_result\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
177
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
178 if (defined $opts{'rfam'}) { #rfam mapping and analysis
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
179 $time=Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
180 print "$time: RNA annotate!\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
181 $time=~s/:/-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
182 $time=~s/ /-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
183 my $rfam_exp_dir=$dir."rfam_match";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
184 &rfam();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
185 #my $rfam_exp_dir=&search($dir,"rfam_match_");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
186 $data3=$rfam_exp_dir."/rfam_not_mapped.fa";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
187 print PA "$rfam_exp_dir\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
188
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
189 my $tag=join "\\;" ,@mark;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
190 system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
191 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
192
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
193 my $data4=$data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
194 if (defined $opts{'D'}) { #genome mapping
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
195 $data4=$data3;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
196 }else{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
197 $data4=$data2;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
198 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
199
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
200 $time=Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
201 print "$time: Genome alignment!\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
202 $time=~s/:/-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
203 $time=~s/ /-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
204 my $genome_map=$dir."genome_match";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
205 &genome($data4);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
206 print PA "$genome_map\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
207 #my $genome_map=&search($dir,"genome_match_");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
208 my $mapfile=$genome_map."/genome_mapped.bwt";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
209 my $mapfa=$genome_map."/genome_mapped.fa";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
210 my $unmap=$genome_map."/genome_not_mapped.fa";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
211
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
212 #$time=Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
213 #print "$time: Novel microRNA prediction!\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
214
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
215 &predict($mapfa);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
216
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
217 close PA;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
218 system("perl $scipt_path/html_miRPlant.pl -i $pathfile -format $format -o $dir/result.html");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
219
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
220 $time=Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
221 print "$time: Program end!!\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
222
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
223 ############################## sub programs ###################################
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
224 sub predict{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
225 my ($file)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
226 $time=&Time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
227 print "$time: Novel microRNA prediction!\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
228 $time=~s/:/-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
229 $time=~s/ /-/g;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
230 my $predict=$dir."novel_miRNA_predict";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
231 print PA "$predict\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
232 mkdir $predict;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
233 chdir $predict;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
234 system("perl $scipt_path/precursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
235 # print "\nprecursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
236
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
237 system("bowtie-build -f excised_precursor.fa excised_precursor");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
238 # print "\nbowtie-build -f excised_precursor.fa excised_precursor\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
239
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
240 system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
241 # print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
242
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
243 system("perl $scipt_path/convert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
244 # print "\nconvert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
245
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
246 system("sort -k 4 precursor_mapped.bst > signatures.bst");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
247 # print "\nsort +3 -25 precursor_mapped.bst > ../signatures.bst\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
248
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
249 chdir $dir;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
250 system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
251 # print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
252 #system("rm novel_tmp_dir -rf");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
253 my $tag=join "," ,@mark;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
254 system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
255 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
256
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
257 sub genome{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
258 my ($file)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
259 if(defined $opts{'idx'}){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
260 system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} ") ;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
261 # print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
262 }else{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
263 system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir ") ;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
264 # print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
265 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
266 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
267 sub rfam{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
268 if (defined $opts{'idx2'}) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
269 system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} ");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
270 # print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
271 }else{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
272 system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
273 # print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
274 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
275 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
276 sub quantify{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
277 my $tag=join "\\;" ,@mark;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
278 system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
279 print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
280 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
281 sub filterbylength{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
282 my $tmpmark=join ",", @mark;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
283 system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
284 system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
285 # print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
286
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
287 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
288 sub collapse{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
289 my ($ins,$data)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
290 my $str="";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
291 for (my $i=0;$i<@{$ins};$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
292 $str .="-i $$ins[$i] ";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
293 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
294 system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
295 # print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
296 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
297
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
298 sub clips{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
299 my ($in,$out)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
300 my $adapter=$preprocess.$out."_clips_adapter.fq";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
301 if($format eq "fq" || $format eq "fastq"){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
302 system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
303 print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
304 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
305 if($format eq "fa" || $format eq "fasta"){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
306 system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
307 # print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
308 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
309 #my $clean=$preprocess.$out."_clean.fq";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
310 #system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
311
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
312 return;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
313 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
314
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
315 sub read_config{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
316 open CON,"<$config";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
317 while (my $aline=<CON>) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
318 chomp $aline;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
319 my @tmp=split/\t/,$aline;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
320 push @filein,$tmp[0];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
321 push @mark,$tmp[1];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
322 &check_rawdata($tmp[0]);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
323 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
324 close CON;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
325 if (@filein != @mark) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
326 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
327 die "Maybe config file have some wrong!!!\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
328 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
329 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
330 sub check_rawdata{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
331 my ($fileforcheck)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
332 if (!(-s $fileforcheck)) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
333 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
334 die "Can not find $fileforcheck, or file is empty!!!\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
335 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
336 if ($format eq "fasta" || $format eq "fa") {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
337 &checkfa($fileforcheck);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
338 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
339 if ($format eq "fastq" || $format eq "fq") {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
340 &checkfq($fileforcheck);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
341 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
342 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
343 sub checkfa{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
344 my ($file_reads)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
345 open N,"<$file_reads";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
346 my $line=<N>;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
347 chomp $line;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
348 if($line !~ /^>\S+/){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
349 #printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
350 die "The first line of file $file_reads does not start with '>identifier'
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
351 Reads file $file_reads is not a valid fasta file\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
352 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
353 if(<N> !~ /^[ACGTNacgtn]*$/){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
354 #printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
355 die "File $file_reads contains not allowed characters in sequences
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
356 Allowed characters are ACGTN
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
357 Reads file $file_reads is not a fasta file\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
358 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
359 close N;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
360 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
361 sub checkfq{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
362 my ($file_reads)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
363
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
364 open N,"<$file_reads";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
365 for (my $i=0;$i<10;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
366 my $a=<N>;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
367 my $b=<N>;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
368 my $c=<N>;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
369 my $d=<N>;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
370 chomp $a;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
371 chomp $b;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
372 chomp $c;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
373 chomp $d;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
374 if($a!~/^\@/){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
375 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
376 die "$file_reads is not a fastq file\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
377 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
378 if($b!~ /^[ACGTNacgtn]*$/){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
379 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
380 die "File $file_reads contains not allowed characters in sequences
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
381 Allowed characters are ACGTN
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
382 Reads file $file_reads is not a fasta file\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
383 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
384 if ($c!~/^\@/ && $c!~/^\+/) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
385 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
386 die "$file_reads is not a fastq file\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
387 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
388 if ((length $b) != (length $d)) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
389 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
390 die "$file_reads is not a fastq file\n\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
391 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
392 my @qv=split //,$d;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
393 for (my $j=0;$j<@qv ;$j++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
394 my $q=ord($qv[$j])-64;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
395 if($q<0){$phred_qv=33;}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
396 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
397 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
398 close N;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
399 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
400
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
401 sub search{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
402 my ($dir,$str)=@_;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
403 opendir I,$dir;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
404 my @ret;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
405 while (my $file=readdir I) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
406 if ($file=~/$str/) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
407 push @ret, $file;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
408 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
409 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
410 closedir I;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
411 if (@ret != 1) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
412 #&printErr();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
413
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
414 die "Can not find directory or file which name has string: $str !!!\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
415 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
416 return $ret[0];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
417 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
418
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
419 =cut
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
420
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
421 sub printErr{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
422 print STDERR color 'bold red';
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
423 print STDERR "Error: ";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
424 print STDERR color 'reset';
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
425 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
426 sub Time{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
427 my $time=time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
428 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
429 $month++;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
430 $year+=1900;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
431 if (length($sec) == 1) {$sec = "0"."$sec";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
432 if (length($min) == 1) {$min = "0"."$min";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
433 if (length($hour) == 1) {$hour = "0"."$hour";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
434 if (length($day) == 1) {$day = "0"."$day";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
435 if (length($month) == 1) {$month = "0"."$month";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
436 #print "$year-$month-$day $hour:$min:$sec\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
437 return("$year-$month-$day-$hour-$min-$sec");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
438 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
439 =cut
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
440 sub Time{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
441 my $time=time();
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
442 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
443 $month++;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
444 $year+=1900;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
445 if (length($sec) == 1) {$sec = "0"."$sec";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
446 if (length($min) == 1) {$min = "0"."$min";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
447 if (length($hour) == 1) {$hour = "0"."$hour";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
448 if (length($day) == 1) {$day = "0"."$day";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
449 if (length($month) == 1) {$month = "0"."$month";}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
450 #print "$year-$month-$day $hour:$min:$sec\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
451 return("$year-$month-$day $hour:$min:$sec");
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
452 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
453
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
454
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
455 sub usage{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
456 print <<"USAGE";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
457 Version $version
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
458 Usage:
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
459
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
460 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
461 options:
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
462 -i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
463 -tag string # raw data file names, -tag xxx -tag xxx
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
464
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
465 -format string,#specific input rawdata file format : fastq|fq|fasta|fa
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
466
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
467 -path scirpt path
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
468
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
469 -gfa string, input file # genome fasta. sequence file
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
470 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
471 string must be the prefix of the bowtie index. For instance, if
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
472 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
473 the prefix is 'h_sapiens_37_asm'.##can be null
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
474
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
475 -pre string, input file #species specific microRNA precursor sequences
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
476 -mat string, input file #species specific microRNA mature sequences
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
477
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
478 -rfam string, input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
479 -idx2 string, rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
480 string must be the prefix of the bowtie index. For instance, if
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
481 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
482 the prefix is 'h_sapiens_37_asm'.##can be null
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
483
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
484 -D If [-D] is specified,will discard rfam mapped reads(nead -rfam).
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
485
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
486 -a string, ADAPTER string. default is ATCTCGTATG.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
487 -M int, require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
488 -min int, reads min length,default is 19.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
489 -max int, reads max length,default is 28.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
490
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
491 -mis [int] number of allowed mismatches when mapping reads to precursors, default 0
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
492 -e [int] number of nucleotides upstream of the mature sequence to consider, default 2
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
493 -f [int] number of nucleotides downstream of the mature sequence to consider, default 5
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
494 -v <int> report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
495 -r int a read is allowed to map up to this number of positions in the genome,default is 25
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
496
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
497 -dis <int> Maximal space between miRNA and miRNA* (200)
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
498 -flank <int> Flank sequence length of miRNA precursor (10)
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
499 -mfe <folat> Maximal free energy allowed for a miRNA precursor (-20)
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
500
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
501 -t int, number of threads [1]
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
502
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
503 -o output directory# absolute path
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
504 -h help
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
505 USAGE
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
506 exit(1);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
507 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
508