comparison siRNA.pl @ 5:f466394ee1fd draft

Uploaded
author big-tiandm
date Thu, 23 Oct 2014 22:08:09 -0400
parents 07745c0958dd
children
comparison
equal deleted inserted replaced
4:75180ba26dc1 5:f466394ee1fd
10 use lib '/leofs/biotrans/chentt/perl_module/'; 10 use lib '/leofs/biotrans/chentt/perl_module/';
11 #perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq 11 #perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq
12 print " 12 print "
13 ##################################### 13 #####################################
14 # # 14 # #
15 # sRNA cluster # 15 # sRNA cluster #
16 # # 16 # #
17 ##################################### 17 #####################################
18 "; 18 ";
19 ########################################################################################### 19 ###########################################################################################
20 my $usage="$0 20 my $usage="$0
55 -span plot span, default 50000 55 -span plot span, default 50000
56 "; 56 ";
57 57
58 my %options; 58 my %options;
59 GetOptions(\%options,"i:s@","tag:s@","g=s","f=s","o=s","a:s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","rfam:s","t:i","v:i","d:i","l:i","idx:s","idx2:s","cen:s","span:s","h"); 59 GetOptions(\%options,"i:s@","tag:s@","g=s","f=s","o=s","a:s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","rfam:s","t:i","v:i","d:i","l:i","idx:s","idx2:s","cen:s","span:s","h");
60 60 #print help if that option is used
61 my @inputfiles=@{$opts{'i'}}; 61 if($options{h}){die $usage;}
62 my @inputtags=@{$opts{'tag'}}; 62
63 my @filein=@{$options{'i'}};
64 my @mark=@{$options{'tag'}};
63 65
64 #my $config=$options{'i'}; 66 #my $config=$options{'i'};
65 my $genome_fa=$options{'g'}; 67 my $genome_fa=$options{'g'};
66 my $gff=$options{'f'}; 68 my $gff=$options{'f'};
69
70
67 ########################################################################################## 71 ##########################################################################################
68 my $predir=`pwd`; 72 my $predir=`pwd`;
69 chomp $predir; 73 chomp $predir;
70 my $workdir=defined($options{'o'}) ? $options{'o'}:$predir; 74 my $workdir=defined($options{'o'}) ? $options{'o'}:$predir;
71 75
89 #if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 93 #if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") {
90 # die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; 94 # die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
91 #} 95 #}
92 96
93 my $adpter="ATCTCGTATG"; #adapter 97 my $adpter="ATCTCGTATG"; #adapter
94 if (defined $opts{'a'}) {$a=$opts{'a'};} 98 if (defined $options{'a'}) {$adpter=$options{'a'};}
95 99
96 #print help if that option is used
97 if($options{h}){die $usage;}
98 100
99 my $phred_qv=64; 101 my $phred_qv=64;
100 my $sample_number; 102 my $sample_number;
101 my ($dir,$dir_tmp); 103 my ($dir,$dir_tmp);
102 ################################ MAIN ################################################## 104 ################################ MAIN ##################################################
103 print "\ncluster program start:"; 105 print "\ncluster program start:";
104 my $time=Time(); 106 my $time=Time();
105 make_dir_tmp(); 107 make_dir_tmp();
106 108
107 my (@filein,@mark,@clip); 109 my @clip;
108 my $mark; 110 my $mark;
109 my $sample_mark; 111 my $sample_mark;
110 112
111 my $config=$workdir."/input_config"; 113 my $config=$dir."/input_config";
112 open CONFIG,">$config"; 114 open CONFIG,">$config";
113 for (my $i=0;$i<@inputfiles;$i++) { 115 for (my $i=0;$i<@filein;$i++) {
114 print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n"; 116 print CONFIG $filein[$i],"\t",$mark[$i],"\n";
115 } 117 }
116 close CONFIG; 118 close CONFIG;
117 119 if (@filein != @mark) {
118 read_config(); 120 die "Maybe config file have some wrong!!!\n";
121 }
122 $sample_number=@mark;
123 $mark=join "\t",@mark;
124 $sample_mark=join "\#",@mark;
125
126
127 #read_config();
119 128
120 trim_adapter_and_filter(); 129 trim_adapter_and_filter();
121 130
122 my $filter_out=$dir."preProcess\/"."collapse_reads_out.fa";## raw clean data 131 my $filter_out=$dir."preProcess\/"."collapse_reads_out.fa";## raw clean data
123 my $data2=$filter_out; ### mirbase not mapped reads 132 my $data2=$filter_out; ### mirbase not mapped reads
148 157
149 quantify(); 158 quantify();
150 159
151 phase(); 160 phase();
152 161
153 class(); 162 if (defined($options{'nat'})&&defined($options{'repeat'})) {
163 class();
164 }
165 else{
166 get_genelist();
167 }
154 168
155 annotate(); 169 annotate();
156 170
157 genome_length(); 171 genome_length();
158 172
178 $dir="$workdir\/cluster_runs_$time\/"; 192 $dir="$workdir\/cluster_runs_$time\/";
179 print STDERR "mkdir $dir\n\n"; 193 print STDERR "mkdir $dir\n\n";
180 return; 194 return;
181 } 195 }
182 196
183 sub read_config{ 197 #sub read_config{
184 open IN,"<$config"; 198 # open IN,"<$config";
185 while (my $aline=<IN>) { 199 # while (my $aline=<IN>) {
186 chomp $aline; 200 # chomp $aline;
187 my @tmp=split/\t/,$aline; 201 # my @tmp=split/\t/,$aline;
188 push @filein,$tmp[0]; 202 # push @filein,$tmp[0];
189 push @mark,$tmp[1]; 203 # push @mark,$tmp[1];
190 } 204 # }
191 close IN; 205 # close IN;
192 if (@filein != @mark) { 206 # if (@filein != @mark) {
193 die "Maybe config file have some wrong!!!\n"; 207 # die "Maybe config file have some wrong!!!\n";
194 } 208 # }
195 $sample_number=@mark; 209 # $sample_number=@mark;
196 $mark=join "\t",@mark; 210 # $mark=join "\t",@mark;
197 $sample_mark=join "\#",@mark; 211 # $sample_mark=join "\#",@mark;
198 } 212 #}
199 213
200 214
201 sub trim_adapter_and_filter{ 215 sub trim_adapter_and_filter{
202 my $time=time(); 216 my $time=time();
203 $preprocess=$dir."preProcess/"; 217 $preprocess=$dir."preProcess/";
233 247
234 sub clips{ 248 sub clips{
235 my ($filein,$fileout)=@_; 249 my ($filein,$fileout)=@_;
236 my $adapter=$dir."preProcess\/$fileout\_clip\.fq"; 250 my $adapter=$dir."preProcess\/$fileout\_clip\.fq";
237 if($format eq "fq" || $format eq "fastq"){ 251 if($format eq "fq" || $format eq "fastq"){
238 my $clip=`$path\/fastx_clipper -a $adpter -M 6 -Q $phred_qv -i $filein -o $adapter`; 252 my $clip=`fastx_clipper -a $adpter -M 6 -Q $phred_qv -i $filein -o $adapter`;
239 } 253 }
240 if($format eq "fa" || $format eq "fasta"){ 254 if($format eq "fa" || $format eq "fasta"){
241 my $clip=`$path\/fastx_clipper -a $adpter -M 6 -i $filein -o $adapter`; 255 my $clip=`fastx_clipper -a $adpter -M 6 -i $filein -o $adapter`;
242 } 256 }
243 #my $clean=$dir."preProcess\/$fileout\_clean.fq"; 257 #my $clean=$dir."preProcess\/$fileout\_clean.fq";
244 #my $filter=`filterReadsByLength.pl -i $adapter -o $clean -min 18 -max 40 `; 258 #my $filter=`filterReadsByLength.pl -i $adapter -o $clean -min 18 -max 40 `;
245 return $fileout; 259 return $fileout;
246 } 260 }
350 print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n"; 364 print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n";
351 return 0; 365 return 0;
352 } 366 }
353 367
354 sub class{ 368 sub class{
355 print "clusters is ready to annotate by source\n\n"; 369 print "clusters is ready to annotate by sources\n\n";
356 my $nat=$options{'nat'}; 370 my $nat=$options{'nat'};
357 my $repeat=$options{'repeat'}; 371 my $repeat=$options{'repeat'};
358 my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`; 372 my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`;
359 print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n"; 373 print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n";
360 } 374 }
361 375
362 sub annotate{ 376 sub annotate{
363 print "clusters is ready to annotate by gff file\n\n"; 377 print "clusters is ready to annotate by gff file\n\n";
364 my $file="$annotate_dir\/sample_class.anno"; 378 my $file;
379 if (defined($options{'nat'})&&defined($options{'repeat'})) {
380 $file="$annotate_dir\/sample_class.anno";
381 }
382 else{
383 $file=$rpkm;
384 }
365 my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`; 385 my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`;
366 print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n"; 386 print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n";
367 return 0; 387 return 0;
388 }
389 sub get_genelist{
390
391 my $get_genelist=`perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt`;
392 print "perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt";
368 } 393 }
369 394
370 sub dec{ 395 sub dec{
371 print "deg reading\n\n"; 396 print "deg reading\n\n";
372 my $deg_file=$options{'deg'}; 397 my $deg_file=$options{'deg'};