# HG changeset patch # User big-tiandm # Date 1406282252 14400 # Node ID 5691802f074bc6d55770423580d6c9ad342b4ec0 # Parent 45de5e1ff4872a17739ea3d5ad19a81e96567149 Deleted selected files diff -r 45de5e1ff487 -r 5691802f074b collapseReads2Tags.pl --- a/collapseReads2Tags.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,170 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2014-3-20 -#Modified: -#Description: fastq file form reads cluster(the same sequence in the same cluster) -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i:s@","format=s","mark:s","qual:s","qv:i","o=s","h"); -if (!(defined $opts{o} and defined $opts{'format'}) || defined $opts{h}) { #necessary arguments -&usage; -} -my @filein=@{$opts{i}} if(defined $opts{i}); -my $name=defined $opts{'mark'} ? $opts{'mark'} : "seq"; -my $fileout=$opts{'o'}; -my $pq=defined $opts{'qv'} ? $opts{'qv'} : 33; -my %hash;##分块存放原始序列 - -my $format=$opts{'format'}; -if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { - die "Parameter -format is error!\n"; -} - -my ($qualT,$qualV); -if (defined $opts{'qual'} && ($format eq "fastq" || $format eq "fq")) { #quality filter - my @temp=split /:/,$opts{'qual'}; - $qualT=$temp[0]; - $qualV=$temp[1]; - - for (my $i=0;$i<@filein;$i++) { - open IN,"<$filein[$i]"; - while (my $aline=) { - my $seq=; - my $n=; - my $qv=; - my $tag=&qvcheck($qv,$qualT,$qualV); - next if(!$tag); - my $str=substr($seq,0,6); - $hash{$str}[$i].=$seq; - } - close IN; - } -} -elsif($format eq "fastq" || $format eq "fq"){ ### do not filter low quality reads - for (my $i=0;$i<@filein;$i++) { - open IN,"<$filein[$i]"; - while (my $aline=) { - my $seq=; - my $n=; - my $qv=; - my $str=substr($seq,0,6); - $hash{$str}[$i].=$seq; - } - close IN; - } - -} -elsif($format eq "fasta" || $format eq "fa"){ - for (my $i=0;$i<@filein;$i++) { - open IN,"<$filein[$i]"; - while (my $aline=) { - my $seq=; - my $str=substr($seq,0,6); - $hash{$str}[$i].=$seq; - } - close IN; - } -} - -open OUT,">$fileout"; #output file -my $count=0; -foreach my $key (keys %hash) { - my %cluster; - for (my $i=0;$i<@filein;$i++) { - next if(!(defined $hash{$key}[$i])); - my @tmp=split/\n/,$hash{$key}[$i]; - foreach (@tmp) { - $cluster{$_}[$i]++; - } - } - - foreach my $seq (keys %cluster) { - my $exp=""; my $ee=0; - for (my $i=0;$i<@filein;$i++) { - if (defined $cluster{$seq}[$i]) { - $exp.="_$cluster{$seq}[$i]"; - $ee+=$cluster{$seq}[$i]; - }else{ - $exp.="_0"; - } - } - $count+=$ee; - $exp=~s/^_//; - print OUT ">$name","_$count:$exp","_x$ee\n$seq\n"; - } -} -close OUT; - - -sub qvcheck{ - my ($str,$t,$v)=@_; - my $qv=0; - if($t eq "mean"){ - $qv=&getMeanQuality($str); - } - elsif($t eq "min"){ - $qv=&getMinQuality($str); - } - if ($qv<$v) { - return 0; - } - return 1; -} - -sub getMeanQuality(){ - chomp $_[0]; - my @bases = split(//,$_[0]); - my $sum = 0; - for(my $i = 0; $i <= $#bases; $i++){ - my $num = ord($bases[$i]) - $pq; - $sum += $num; - } - - return $sum/($#bases+1); - -} - -### -### This function gives back the Q-value of the worst base -sub getMinQuality(){ - chomp $_[0]; - my @bases = split(//,$_[0]); - my $worst = 1000; - for(my $i = 0; $i <= $#bases; $i++){ -# printf ("base: $bases[$i] --> %d\n",ord($bases[$i])); - my $num = ord($bases[$i]) - $pq; - if($num < $worst){ - $worst = $num; - } - } - return $worst; -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -format -mark -qual -qv -o -options: --i input file#fastq file ##can be multiple -i file1 -i file2 ... --mark string#quary name,default is "seq" --o output file --format string # fastq|fasta|fq|fa - --qual #reads filter - eg:(min:value/mean:value) - This parameter just for solexa reads. - If the input files are solid and needs filter,please do filter first . - --qv integer #Phred quality64/33,default 33 --h help -USAGE -exit(1); -} - diff -r 45de5e1ff487 -r 5691802f074b convert_bowtie_to_blast.pl --- a/convert_bowtie_to_blast.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,126 +0,0 @@ -#!/usr/bin/perl - - -use warnings; -use strict; -use Getopt::Std; - -######################################### USAGE ################################ - -my $usage= -"$0 file_bowtie_result file_solexa_seq file_chromosome - -This is a converter which changes Bowtie output into Blast format. -The input includes three files: a Bowtie result file (default Bowtie -output file), a fasta file consisting of small Reads and a chromosome -fasta file. It outputs the alignments in blast_parsed format. - -file_bowtie_result likes: - -AtFlower100010_x2 + MIR319c 508 AAGGAGATTCTTTCAGTCCAG IIIIIIIIIIIIIIIIIIIII 0 -AtFlower1000188_x1 + MIR2933a 421 TCGGAGAGGAAATTCGTCGGCG IIIIIIIIIIIIIIIIIIIIII 0 - -file_solexa_seq likes: - ->AtFlower100010_x2 -AAGGAGATTCTTTCAGTCCAG - -file_chromosome contains chromosome seq in fasta format - -"; - - -####################################### INPUT FILES ############################ - -my $file_bowtie_result=shift or die $usage; -my $file_short_seq=shift or die $usage; -my $file_chromosome_seq=shift or die $usage; - - -##################################### GLOBAL VARIBALES ######################### - -my %short_seq_length=(); -my %chromosome_length=(); - - -######################################### MAIN ################################# - -#get the short sequence id and its length -sequence_length($file_short_seq,\%short_seq_length); - -#get the chromosome sequence id and its length -sequence_length($file_chromosome_seq,\%chromosome_length); - -#convert bowtie result format to blast format; -change_format($file_bowtie_result); - -exit; - - -##################################### SUBROUTINES ############################## - -sub sequence_length{ - my ($file,$hash) = @_; - my ($id, $desc, $sequence, $seq_length) = (); - - open (FASTA, "<$file") or die "can not open $$file\n"; - while () - { - chomp; - if (/^>(\S+)(.*)/) - { - $id = $1; - $desc = $2; - $sequence = ""; - while (){ - chomp; - if (/^>(\S+)(.*)/){ - $$hash{$id} = length $sequence; - $id = $1; - $desc = $2; - $sequence = ""; - next; - } - $sequence .= $_; - } - } - } - $seq_length=length($sequence); - $$hash{$id} = $seq_length; - close FASTA; -} - - - - - -sub change_format{ - #Change Bowtie format into blast format - my $file=shift @_; - open(FILE,"<$file")||die"can not open the bowtie result file:$!\n"; - #open(BLASTOUT,">blastout")||die"can not create the blastout file:$!\n"; - - while(){ - chomp; - my @tmp=split("\t",$_); - #Clean the reads ID - my @tmp1=split(" ",$tmp[0]); - print "$tmp1[0]"."\t"."$short_seq_length{$tmp1[0]}"."\t"."1".'..'."$short_seq_length{$tmp1[0]}"."\t"."$tmp[2]"."\t"."$chromosome_length{$tmp[2]}"."\t"; - if($tmp[1] eq "+"){ - my $seq_end=$tmp[3] + $short_seq_length{$tmp1[0]}; - my $seq_bg=$tmp[3] + 1; - print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Plus"."\n"; - } - if($tmp[1] eq "-"){ - my $seq_end=$chromosome_length{$tmp[2]} - $tmp[3]; - my $seq_bg=$seq_end - $short_seq_length{$tmp1[0]} + 1; - print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Minus"."\n"; - } - } - -# close BLASTOUT; - -} - - - diff -r 45de5e1ff487 -r 5691802f074b count_rfam_express.pl --- a/count_rfam_express.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1800 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","o=s","tag:s","h"); -if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $fileout=$opts{'o'}; - -my $marks=defined $opts{'tag'} ? $opts{'tag'} : ""; - -if(!(defined $opts{'tag'})){ - my $line=`head -1 $filein`; - my @tmp=split/\t/,$line; - $tmp[0]=~/:([\d|_]+)_x(\d+)$/; - my @ss=split/_/,$1; - for (my $i=1;$i<=@ss;$i++) { - $marks .="Smp$i;"; - } -} - -my @marks=split/\;/,$marks; - -my %rfam_key; -while(){ - chomp; - if(/^(\S+)\s+(\S+)$/){ - $rfam_key{$1}=$2; - } -} - - -my %reads; -my %tags; -open IN,"<$filein"; -while (my $aline=) { - chomp $aline; - my @tmp=split/\t/,$aline; - $tmp[0]=~/:([\d|_]+)_x(\d+)$/; - - my @exp=split/_/,$1; - my @tag=split/\;/,$tmp[2]; - - if (defined $rfam_key{$tag[0]}) { - for (my $i=0;$i<@exp;$i++) { - $reads{$rfam_key{$tag[0]}}[$i]+=$exp[$i]; - $tags{$rfam_key{$tag[0]}}[$i]++ if($exp[$i]!=0); - } - }else{ - for (my $i=0;$i<@exp;$i++) { - $reads{other}[$i]+=$exp[$i]; - $tags{other}[$i]++ if($exp[$i]!=0); - } - } - -} -close IN; - -$"="\t"; ##### @array print in \t -open OUT,">$fileout"; -print OUT "####################################\n# small RNA expressed reads number #\n####################################\n"; -print OUT "#RNAname\t@marks\n"; -foreach my $key (keys %reads) { - print OUT $key; - for (my $i=0;$i<@{$reads{$key}} ;$i++) { - print OUT "\t",$reads{$key}[$i]; - } - print OUT "\n"; -} - -print OUT "\n\n####################################\n# small RNA expressed tags number #\n####################################\n"; -print OUT "#RNAname\t@marks\n"; - -foreach my $key (keys %tags) { - print OUT $key; - for (my $i=0;$i<@{$reads{$key}} ;$i++) { - if(defined $tags{$key}[$i]){print OUT "\t",$tags{$key}[$i];} - else{print OUT "\t0";} - } - print OUT "\n"; -} - -close OUT; -$"=" "; ##### @array print in \t - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -tag -o -options: --i input file# rfam bowtie bwt. format mapping result --tag [string] sample marks# eg. sampleA;sampleB;sampleC --o output file - --h help -USAGE -exit(1); -} - -__DATA__ -RF00635 lncRNA -RF01868 lncRNA -RF01869 lncRNA -RF01870 lncRNA -RF01871 lncRNA -RF01872 lncRNA -RF01873 lncRNA -RF01874 lncRNA -RF01875 lncRNA -RF01876 lncRNA -RF01877 lncRNA -RF01878 lncRNA -RF01879 lncRNA -RF01880 lncRNA -RF01881 lncRNA -RF01882 lncRNA -RF01883 lncRNA -RF01884 lncRNA -RF01885 lncRNA -RF01886 lncRNA -RF01887 lncRNA -RF01888 lncRNA -RF01889 lncRNA -RF01890 lncRNA -RF01891 lncRNA -RF01892 lncRNA -RF01893 lncRNA -RF01894 lncRNA -RF01904 lncRNA -RF01905 lncRNA -RF01906 lncRNA -RF01907 lncRNA -RF01908 lncRNA -RF01909 lncRNA -RF01928 lncRNA -RF01929 lncRNA -RF01930 lncRNA -RF01931 lncRNA -RF01932 lncRNA -RF01933 lncRNA -RF01934 lncRNA -RF01935 lncRNA -RF01946 lncRNA -RF01947 lncRNA -RF01948 lncRNA -RF01950 lncRNA -RF01951 lncRNA -RF01952 lncRNA -RF01953 lncRNA -RF01954 lncRNA -RF01955 lncRNA -RF01956 lncRNA -RF01957 lncRNA -RF01958 lncRNA -RF01961 lncRNA -RF01962 lncRNA -RF01963 lncRNA -RF01964 lncRNA -RF01965 lncRNA -RF01966 lncRNA -RF01967 lncRNA -RF01968 lncRNA -RF01969 lncRNA -RF01970 lncRNA -RF01971 lncRNA -RF01972 lncRNA -RF01973 lncRNA -RF01974 lncRNA -RF01975 lncRNA -RF01976 lncRNA -RF01977 lncRNA -RF01978 lncRNA -RF01979 lncRNA -RF01980 lncRNA -RF01981 lncRNA -RF01983 lncRNA -RF01984 lncRNA -RF01985 lncRNA -RF01986 lncRNA -RF01987 lncRNA -RF01992 lncRNA -RF02038 lncRNA -RF02039 lncRNA -RF02040 lncRNA -RF02041 lncRNA -RF02042 lncRNA -RF02043 lncRNA -RF02044 lncRNA -RF02045 lncRNA -RF02046 lncRNA -RF02047 lncRNA -RF02085 lncRNA -RF02086 lncRNA -RF02087 lncRNA -RF02089 lncRNA -RF02090 lncRNA -RF02091 lncRNA -RF02098 lncRNA -RF02101 lncRNA -RF02102 lncRNA -RF02103 lncRNA -RF02104 lncRNA -RF02105 lncRNA -RF02106 lncRNA -RF02107 lncRNA -RF02108 lncRNA -RF02109 lncRNA -RF02110 lncRNA -RF02112 lncRNA -RF02113 lncRNA -RF02114 lncRNA -RF02115 lncRNA -RF02116 lncRNA -RF02117 lncRNA -RF02118 lncRNA -RF02119 lncRNA -RF02120 lncRNA -RF02121 lncRNA -RF02122 lncRNA -RF02123 lncRNA -RF02124 lncRNA -RF02125 lncRNA -RF02126 lncRNA -RF02127 lncRNA -RF02128 lncRNA -RF02129 lncRNA -RF02130 lncRNA -RF02131 lncRNA -RF02132 lncRNA -RF02133 lncRNA -RF02134 lncRNA -RF02135 lncRNA -RF02136 lncRNA -RF02137 lncRNA -RF02138 lncRNA -RF02139 lncRNA -RF02140 lncRNA -RF02141 lncRNA -RF02142 lncRNA -RF02143 lncRNA -RF02145 lncRNA -RF02146 lncRNA -RF02147 lncRNA -RF02148 lncRNA -RF02149 lncRNA -RF02150 lncRNA -RF02152 lncRNA -RF02153 lncRNA -RF02154 lncRNA -RF02155 lncRNA -RF02156 lncRNA -RF02157 lncRNA -RF02158 lncRNA -RF02159 lncRNA -RF02160 lncRNA -RF02161 lncRNA -RF02164 lncRNA -RF02165 lncRNA -RF02166 lncRNA -RF02167 lncRNA -RF02168 lncRNA -RF02169 lncRNA -RF02170 lncRNA -RF02171 lncRNA -RF02172 lncRNA -RF02173 lncRNA -RF02174 lncRNA -RF02175 lncRNA -RF02176 lncRNA -RF02177 lncRNA -RF02178 lncRNA -RF02179 lncRNA -RF02180 lncRNA -RF02181 lncRNA -RF02182 lncRNA -RF02183 lncRNA -RF02184 lncRNA -RF02185 lncRNA -RF02186 lncRNA -RF02187 lncRNA -RF02188 lncRNA -RF02189 lncRNA -RF02190 lncRNA -RF02191 lncRNA -RF02192 lncRNA -RF02193 lncRNA -RF02195 lncRNA -RF02196 lncRNA -RF02197 lncRNA -RF02198 lncRNA -RF02199 lncRNA -RF02200 lncRNA -RF02201 lncRNA -RF02202 lncRNA -RF02203 lncRNA -RF02204 lncRNA -RF02205 lncRNA -RF02206 lncRNA -RF02207 lncRNA -RF02208 lncRNA -RF02209 lncRNA -RF02210 lncRNA -RF02211 lncRNA -RF02212 lncRNA -RF02213 lncRNA -RF02215 lncRNA -RF02216 lncRNA -RF02217 lncRNA -RF02218 lncRNA -RF02219 lncRNA -RF02220 lncRNA -RF02246 lncRNA -RF02247 lncRNA -RF02248 lncRNA -RF02249 lncRNA -RF02250 lncRNA -RF02251 lncRNA -RF02252 lncRNA -RF02255 lncRNA -RF02256 lncRNA -RF02257 lncRNA -RF02258 lncRNA -RF02259 lncRNA -RF02267 lncRNA -RF02272 lncRNA -RF00027 miRNA -RF00047 miRNA -RF00051 miRNA -RF00052 miRNA -RF00053 miRNA -RF00073 miRNA -RF00074 miRNA -RF00075 miRNA -RF00076 miRNA -RF00103 miRNA -RF00104 miRNA -RF00129 miRNA -RF00130 miRNA -RF00131 miRNA -RF00143 miRNA -RF00144 miRNA -RF00178 miRNA -RF00237 miRNA -RF00239 miRNA -RF00241 miRNA -RF00244 miRNA -RF00245 miRNA -RF00246 miRNA -RF00247 miRNA -RF00248 miRNA -RF00249 miRNA -RF00250 miRNA -RF00251 miRNA -RF00253 miRNA -RF00254 miRNA -RF00255 miRNA -RF00256 miRNA -RF00257 miRNA -RF00258 miRNA -RF00363 miRNA -RF00364 miRNA -RF00365 miRNA -RF00366 miRNA -RF00367 miRNA -RF00445 miRNA -RF00446 miRNA -RF00451 miRNA -RF00452 miRNA -RF00455 miRNA -RF00456 miRNA -RF00464 miRNA -RF00486 miRNA -RF00637 miRNA -RF00638 miRNA -RF00639 miRNA -RF00640 miRNA -RF00641 miRNA -RF00642 miRNA -RF00643 miRNA -RF00644 miRNA -RF00645 miRNA -RF00646 miRNA -RF00647 miRNA -RF00648 miRNA -RF00649 miRNA -RF00650 miRNA -RF00651 miRNA -RF00652 miRNA -RF00653 miRNA -RF00654 miRNA -RF00655 miRNA -RF00656 miRNA -RF00657 miRNA -RF00658 miRNA -RF00659 miRNA -RF00660 miRNA -RF00661 miRNA -RF00662 miRNA -RF00663 miRNA -RF00664 miRNA -RF00665 miRNA -RF00666 miRNA -RF00667 miRNA -RF00668 miRNA -RF00669 miRNA -RF00670 miRNA -RF00671 miRNA -RF00672 miRNA -RF00673 miRNA -RF00674 miRNA -RF00675 miRNA -RF00676 miRNA -RF00677 miRNA -RF00678 miRNA -RF00679 miRNA -RF00680 miRNA -RF00681 miRNA -RF00682 miRNA -RF00683 miRNA -RF00684 miRNA -RF00685 miRNA -RF00686 miRNA -RF00687 miRNA -RF00688 miRNA -RF00689 miRNA -RF00690 miRNA -RF00691 miRNA -RF00692 miRNA -RF00693 miRNA -RF00694 miRNA -RF00695 miRNA -RF00696 miRNA -RF00697 miRNA -RF00698 miRNA -RF00699 miRNA -RF00700 miRNA -RF00701 miRNA -RF00702 miRNA -RF00703 miRNA -RF00704 miRNA -RF00705 miRNA -RF00706 miRNA -RF00707 miRNA -RF00708 miRNA -RF00709 miRNA -RF00710 miRNA -RF00711 miRNA -RF00712 miRNA -RF00713 miRNA -RF00714 miRNA -RF00715 miRNA -RF00716 miRNA -RF00717 miRNA -RF00718 miRNA -RF00719 miRNA -RF00720 miRNA -RF00721 miRNA -RF00722 miRNA -RF00723 miRNA -RF00724 miRNA -RF00725 miRNA -RF00726 miRNA -RF00727 miRNA -RF00728 miRNA -RF00729 miRNA -RF00730 miRNA -RF00731 miRNA -RF00732 miRNA -RF00733 miRNA -RF00734 miRNA -RF00735 miRNA -RF00736 miRNA -RF00737 miRNA -RF00739 miRNA -RF00740 miRNA -RF00741 miRNA -RF00742 miRNA -RF00743 miRNA -RF00744 miRNA -RF00745 miRNA -RF00746 miRNA -RF00747 miRNA -RF00748 miRNA -RF00749 miRNA -RF00750 miRNA -RF00751 miRNA -RF00752 miRNA -RF00753 miRNA -RF00754 miRNA -RF00755 miRNA -RF00756 miRNA -RF00757 miRNA -RF00758 miRNA -RF00760 miRNA -RF00761 miRNA -RF00762 miRNA -RF00763 miRNA -RF00764 miRNA -RF00765 miRNA -RF00766 miRNA -RF00767 miRNA -RF00768 miRNA -RF00769 miRNA -RF00770 miRNA -RF00771 miRNA -RF00772 miRNA -RF00773 miRNA -RF00774 miRNA -RF00775 miRNA -RF00776 miRNA -RF00777 miRNA -RF00778 miRNA -RF00779 miRNA -RF00780 miRNA -RF00781 miRNA -RF00782 miRNA -RF00783 miRNA -RF00784 miRNA -RF00785 miRNA -RF00786 miRNA -RF00787 miRNA -RF00788 miRNA -RF00789 miRNA -RF00790 miRNA -RF00791 miRNA -RF00792 miRNA -RF00793 miRNA -RF00794 miRNA -RF00795 miRNA -RF00796 miRNA -RF00797 miRNA -RF00798 miRNA -RF00799 miRNA -RF00800 miRNA -RF00801 miRNA -RF00802 miRNA -RF00803 miRNA -RF00804 miRNA -RF00805 miRNA -RF00806 miRNA -RF00807 miRNA -RF00808 miRNA -RF00809 miRNA -RF00810 miRNA -RF00811 miRNA -RF00812 miRNA -RF00813 miRNA -RF00814 miRNA -RF00815 miRNA -RF00816 miRNA -RF00817 miRNA -RF00818 miRNA -RF00819 miRNA -RF00820 miRNA -RF00821 miRNA -RF00822 miRNA -RF00823 miRNA -RF00824 miRNA -RF00825 miRNA -RF00826 miRNA -RF00827 miRNA -RF00828 miRNA -RF00829 miRNA -RF00830 miRNA -RF00831 miRNA -RF00832 miRNA -RF00833 miRNA -RF00834 miRNA -RF00835 miRNA -RF00836 miRNA -RF00837 miRNA -RF00838 miRNA -RF00839 miRNA -RF00840 miRNA -RF00841 miRNA -RF00842 miRNA -RF00843 miRNA -RF00844 miRNA -RF00845 miRNA -RF00846 miRNA -RF00847 miRNA -RF00848 miRNA -RF00849 miRNA -RF00850 miRNA -RF00851 miRNA -RF00852 miRNA -RF00853 miRNA -RF00854 miRNA -RF00855 miRNA -RF00856 miRNA -RF00857 miRNA -RF00858 miRNA -RF00859 miRNA -RF00861 miRNA -RF00862 miRNA -RF00863 miRNA -RF00864 miRNA -RF00865 miRNA -RF00866 miRNA -RF00867 miRNA -RF00868 miRNA -RF00869 miRNA -RF00870 miRNA -RF00871 miRNA -RF00872 miRNA -RF00873 miRNA -RF00874 miRNA -RF00875 miRNA -RF00876 miRNA -RF00877 miRNA -RF00878 miRNA -RF00879 miRNA -RF00882 miRNA -RF00883 miRNA -RF00884 miRNA -RF00885 miRNA -RF00886 miRNA -RF00887 miRNA -RF00888 miRNA -RF00890 miRNA -RF00891 miRNA -RF00892 miRNA -RF00893 miRNA -RF00894 miRNA -RF00895 miRNA -RF00896 miRNA -RF00897 miRNA -RF00898 miRNA -RF00899 miRNA -RF00900 miRNA -RF00901 miRNA -RF00902 miRNA -RF00903 miRNA -RF00904 miRNA -RF00905 miRNA -RF00906 miRNA -RF00907 miRNA -RF00908 miRNA -RF00909 miRNA -RF00910 miRNA -RF00911 miRNA -RF00912 miRNA -RF00914 miRNA -RF00915 miRNA -RF00917 miRNA -RF00918 miRNA -RF00919 miRNA -RF00920 miRNA -RF00921 miRNA -RF00922 miRNA -RF00925 miRNA -RF00926 miRNA -RF00927 miRNA -RF00928 miRNA -RF00929 miRNA -RF00931 miRNA -RF00932 miRNA -RF00933 miRNA -RF00934 miRNA -RF00935 miRNA -RF00936 miRNA -RF00937 miRNA -RF00939 miRNA -RF00940 miRNA -RF00941 miRNA -RF00942 miRNA -RF00943 miRNA -RF00945 miRNA -RF00946 miRNA -RF00947 miRNA -RF00948 miRNA -RF00949 miRNA -RF00950 miRNA -RF00951 miRNA -RF00952 miRNA -RF00953 miRNA -RF00954 miRNA -RF00955 miRNA -RF00956 miRNA -RF00957 miRNA -RF00958 miRNA -RF00959 miRNA -RF00960 miRNA -RF00961 miRNA -RF00962 miRNA -RF00963 miRNA -RF00964 miRNA -RF00965 miRNA -RF00966 miRNA -RF00967 miRNA -RF00968 miRNA -RF00969 miRNA -RF00970 miRNA -RF00971 miRNA -RF00972 miRNA -RF00973 miRNA -RF00974 miRNA -RF00975 miRNA -RF00976 miRNA -RF00977 miRNA -RF00978 miRNA -RF00979 miRNA -RF00980 miRNA -RF00981 miRNA -RF00983 miRNA -RF00984 miRNA -RF00985 miRNA -RF00986 miRNA -RF00987 miRNA -RF00988 miRNA -RF00989 miRNA -RF00990 miRNA -RF00991 miRNA -RF00992 miRNA -RF00993 miRNA -RF00994 miRNA -RF00995 miRNA -RF00996 miRNA -RF00997 miRNA -RF00998 miRNA -RF00999 miRNA -RF01000 miRNA -RF01001 miRNA -RF01002 miRNA -RF01003 miRNA -RF01004 miRNA -RF01005 miRNA -RF01006 miRNA -RF01007 miRNA -RF01008 miRNA -RF01009 miRNA -RF01010 miRNA -RF01011 miRNA -RF01012 miRNA -RF01013 miRNA -RF01014 miRNA -RF01015 miRNA -RF01016 miRNA -RF01017 miRNA -RF01018 miRNA -RF01019 miRNA -RF01020 miRNA -RF01021 miRNA -RF01022 miRNA -RF01023 miRNA -RF01024 miRNA -RF01025 miRNA -RF01026 miRNA -RF01027 miRNA -RF01028 miRNA -RF01029 miRNA -RF01030 miRNA -RF01031 miRNA -RF01032 miRNA -RF01033 miRNA -RF01034 miRNA -RF01035 miRNA -RF01036 miRNA -RF01037 miRNA -RF01038 miRNA -RF01039 miRNA -RF01040 miRNA -RF01041 miRNA -RF01042 miRNA -RF01043 miRNA -RF01044 miRNA -RF01045 miRNA -RF01059 miRNA -RF01061 miRNA -RF01063 miRNA -RF01064 miRNA -RF01117 miRNA -RF01314 miRNA -RF01413 miRNA -RF01895 miRNA -RF01896 miRNA -RF01897 miRNA -RF01898 miRNA -RF01899 miRNA -RF01900 miRNA -RF01901 miRNA -RF01902 miRNA -RF01903 miRNA -RF01910 miRNA -RF01911 miRNA -RF01912 miRNA -RF01913 miRNA -RF01914 miRNA -RF01915 miRNA -RF01916 miRNA -RF01917 miRNA -RF01918 miRNA -RF01919 miRNA -RF01920 miRNA -RF01921 miRNA -RF01922 miRNA -RF01923 miRNA -RF01924 miRNA -RF01925 miRNA -RF01926 miRNA -RF01927 miRNA -RF01936 miRNA -RF01937 miRNA -RF01938 miRNA -RF01939 miRNA -RF01940 miRNA -RF01941 miRNA -RF01942 miRNA -RF01943 miRNA -RF01944 miRNA -RF01945 miRNA -RF01996 miRNA -RF01997 miRNA -RF02000 miRNA -RF02002 miRNA -RF02006 miRNA -RF02007 miRNA -RF02008 miRNA -RF02009 miRNA -RF02010 miRNA -RF02011 miRNA -RF02013 miRNA -RF02014 miRNA -RF02015 miRNA -RF02016 miRNA -RF02017 miRNA -RF02018 miRNA -RF02019 miRNA -RF02020 miRNA -RF02021 miRNA -RF02022 miRNA -RF02023 miRNA -RF02024 miRNA -RF02025 miRNA -RF02026 miRNA -RF02027 miRNA -RF02028 miRNA -RF02061 miRNA -RF02092 miRNA -RF02093 miRNA -RF02094 miRNA -RF02095 miRNA -RF02096 miRNA -RF02097 miRNA -RF02214 miRNA -RF02244 miRNA -RF02245 miRNA -RF02254 miRNA -RF00001 rRNA -RF00002 rRNA -RF01118 rRNA -RF01960 rRNA -RF00177 rRNA -RF01959 rRNA -RF00003 snRNA -RF00004 snRNA -RF00007 snRNA -RF00012 snRNA -RF00015 snRNA -RF00016 snRNA -RF00020 snRNA -RF00026 snRNA -RF00045 snRNA -RF00046 snRNA -RF00049 snRNA -RF00054 snRNA -RF00055 snRNA -RF00056 snRNA -RF00065 snRNA -RF00066 snRNA -RF00067 snRNA -RF00068 snRNA -RF00069 snRNA -RF00070 snRNA -RF00071 snRNA -RF00072 snRNA -RF00085 snRNA -RF00086 snRNA -RF00087 snRNA -RF00088 snRNA -RF00089 snRNA -RF00090 snRNA -RF00091 snRNA -RF00092 snRNA -RF00093 snRNA -RF00095 snRNA -RF00096 snRNA -RF00097 snRNA -RF00099 snRNA -RF00105 snRNA -RF00108 snRNA -RF00132 snRNA -RF00133 snRNA -RF00134 snRNA -RF00135 snRNA -RF00136 snRNA -RF00137 snRNA -RF00138 snRNA -RF00139 snRNA -RF00142 snRNA -RF00145 snRNA -RF00147 snRNA -RF00149 snRNA -RF00150 snRNA -RF00151 snRNA -RF00152 snRNA -RF00153 snRNA -RF00154 snRNA -RF00155 snRNA -RF00156 snRNA -RF00157 snRNA -RF00158 snRNA -RF00159 snRNA -RF00160 snRNA -RF00181 snRNA -RF00186 snRNA -RF00187 snRNA -RF00188 snRNA -RF00189 snRNA -RF00190 snRNA -RF00191 snRNA -RF00200 snRNA -RF00201 snRNA -RF00202 snRNA -RF00203 snRNA -RF00204 snRNA -RF00205 snRNA -RF00206 snRNA -RF00208 snRNA -RF00211 snRNA -RF00212 snRNA -RF00213 snRNA -RF00217 snRNA -RF00218 snRNA -RF00221 snRNA -RF00231 snRNA -RF00263 snRNA -RF00264 snRNA -RF00265 snRNA -RF00266 snRNA -RF00267 snRNA -RF00268 snRNA -RF00270 snRNA -RF00271 snRNA -RF00272 snRNA -RF00273 snRNA -RF00274 snRNA -RF00275 snRNA -RF00276 snRNA -RF00277 snRNA -RF00278 snRNA -RF00279 snRNA -RF00280 snRNA -RF00281 snRNA -RF00282 snRNA -RF00283 snRNA -RF00284 snRNA -RF00285 snRNA -RF00286 snRNA -RF00287 snRNA -RF00288 snRNA -RF00289 snRNA -RF00291 snRNA -RF00292 snRNA -RF00293 snRNA -RF00294 snRNA -RF00295 snRNA -RF00296 snRNA -RF00300 snRNA -RF00301 snRNA -RF00302 snRNA -RF00303 snRNA -RF00304 snRNA -RF00305 snRNA -RF00306 snRNA -RF00307 snRNA -RF00309 snRNA -RF00310 snRNA -RF00311 snRNA -RF00312 snRNA -RF00313 snRNA -RF00314 snRNA -RF00315 snRNA -RF00316 snRNA -RF00317 snRNA -RF00318 snRNA -RF00319 snRNA -RF00320 snRNA -RF00321 snRNA -RF00322 snRNA -RF00323 snRNA -RF00324 snRNA -RF00325 snRNA -RF00326 snRNA -RF00327 snRNA -RF00328 snRNA -RF00329 snRNA -RF00330 snRNA -RF00331 snRNA -RF00332 snRNA -RF00333 snRNA -RF00334 snRNA -RF00335 snRNA -RF00336 snRNA -RF00337 snRNA -RF00338 snRNA -RF00339 snRNA -RF00340 snRNA -RF00341 snRNA -RF00342 snRNA -RF00343 snRNA -RF00344 snRNA -RF00345 snRNA -RF00348 snRNA -RF00349 snRNA -RF00350 snRNA -RF00351 snRNA -RF00352 snRNA -RF00353 snRNA -RF00355 snRNA -RF00356 snRNA -RF00357 snRNA -RF00358 snRNA -RF00359 snRNA -RF00360 snRNA -RF00361 snRNA -RF00377 snRNA -RF00392 snRNA -RF00393 snRNA -RF00394 snRNA -RF00396 snRNA -RF00397 snRNA -RF00398 snRNA -RF00399 snRNA -RF00400 snRNA -RF00401 snRNA -RF00402 snRNA -RF00403 snRNA -RF00404 snRNA -RF00405 snRNA -RF00406 snRNA -RF00407 snRNA -RF00408 snRNA -RF00409 snRNA -RF00410 snRNA -RF00411 snRNA -RF00412 snRNA -RF00413 snRNA -RF00414 snRNA -RF00415 snRNA -RF00416 snRNA -RF00417 snRNA -RF00418 snRNA -RF00419 snRNA -RF00420 snRNA -RF00421 snRNA -RF00422 snRNA -RF00423 snRNA -RF00424 snRNA -RF00425 snRNA -RF00426 snRNA -RF00427 snRNA -RF00428 snRNA -RF00429 snRNA -RF00430 snRNA -RF00431 snRNA -RF00432 snRNA -RF00438 snRNA -RF00439 snRNA -RF00440 snRNA -RF00441 snRNA -RF00443 snRNA -RF00471 snRNA -RF00472 snRNA -RF00473 snRNA -RF00474 snRNA -RF00475 snRNA -RF00476 snRNA -RF00477 snRNA -RF00478 snRNA -RF00479 snRNA -RF00482 snRNA -RF00488 snRNA -RF00492 snRNA -RF00493 snRNA -RF00494 snRNA -RF00509 snRNA -RF00526 snRNA -RF00527 snRNA -RF00528 snRNA -RF00529 snRNA -RF00530 snRNA -RF00531 snRNA -RF00532 snRNA -RF00533 snRNA -RF00535 snRNA -RF00536 snRNA -RF00537 snRNA -RF00538 snRNA -RF00539 snRNA -RF00540 snRNA -RF00541 snRNA -RF00542 snRNA -RF00543 snRNA -RF00544 snRNA -RF00545 snRNA -RF00546 snRNA -RF00548 snRNA -RF00553 snRNA -RF00554 snRNA -RF00560 snRNA -RF00561 snRNA -RF00562 snRNA -RF00563 snRNA -RF00564 snRNA -RF00565 snRNA -RF00566 snRNA -RF00567 snRNA -RF00568 snRNA -RF00569 snRNA -RF00570 snRNA -RF00571 snRNA -RF00572 snRNA -RF00573 snRNA -RF00574 snRNA -RF00575 snRNA -RF00576 snRNA -RF00577 snRNA -RF00578 snRNA -RF00579 snRNA -RF00580 snRNA -RF00581 snRNA -RF00582 snRNA -RF00584 snRNA -RF00586 snRNA -RF00588 snRNA -RF00591 snRNA -RF00592 snRNA -RF00593 snRNA -RF00594 snRNA -RF00598 snRNA -RF00599 snRNA -RF00600 snRNA -RF00601 snRNA -RF00602 snRNA -RF00603 snRNA -RF00604 snRNA -RF00606 snRNA -RF00607 snRNA -RF00608 snRNA -RF00609 snRNA -RF00610 snRNA -RF00611 snRNA -RF00612 snRNA -RF00613 snRNA -RF00614 snRNA -RF00618 snRNA -RF00619 snRNA -RF01119 snRNA -RF01120 snRNA -RF01121 snRNA -RF01122 snRNA -RF01123 snRNA -RF01124 snRNA -RF01125 snRNA -RF01126 snRNA -RF01127 snRNA -RF01128 snRNA -RF01129 snRNA -RF01130 snRNA -RF01131 snRNA -RF01132 snRNA -RF01133 snRNA -RF01134 snRNA -RF01135 snRNA -RF01136 snRNA -RF01137 snRNA -RF01138 snRNA -RF01139 snRNA -RF01140 snRNA -RF01141 snRNA -RF01142 snRNA -RF01143 snRNA -RF01144 snRNA -RF01145 snRNA -RF01146 snRNA -RF01147 snRNA -RF01148 snRNA -RF01149 snRNA -RF01150 snRNA -RF01151 snRNA -RF01152 snRNA -RF01153 snRNA -RF01155 snRNA -RF01156 snRNA -RF01157 snRNA -RF01158 snRNA -RF01159 snRNA -RF01160 snRNA -RF01161 snRNA -RF01162 snRNA -RF01163 snRNA -RF01164 snRNA -RF01165 snRNA -RF01166 snRNA -RF01167 snRNA -RF01168 snRNA -RF01169 snRNA -RF01170 snRNA -RF01171 snRNA -RF01172 snRNA -RF01173 snRNA -RF01174 snRNA -RF01175 snRNA -RF01176 snRNA -RF01177 snRNA -RF01178 snRNA -RF01179 snRNA -RF01180 snRNA -RF01181 snRNA -RF01182 snRNA -RF01183 snRNA -RF01184 snRNA -RF01185 snRNA -RF01186 snRNA -RF01188 snRNA -RF01189 snRNA -RF01190 snRNA -RF01191 snRNA -RF01192 snRNA -RF01193 snRNA -RF01194 snRNA -RF01195 snRNA -RF01196 snRNA -RF01197 snRNA -RF01198 snRNA -RF01199 snRNA -RF01200 snRNA -RF01201 snRNA -RF01202 snRNA -RF01203 snRNA -RF01204 snRNA -RF01205 snRNA -RF01206 snRNA -RF01207 snRNA -RF01208 snRNA -RF01209 snRNA -RF01210 snRNA -RF01211 snRNA -RF01212 snRNA -RF01213 snRNA -RF01214 snRNA -RF01215 snRNA -RF01216 snRNA -RF01218 snRNA -RF01219 snRNA -RF01220 snRNA -RF01221 snRNA -RF01222 snRNA -RF01223 snRNA -RF01224 snRNA -RF01225 snRNA -RF01226 snRNA -RF01227 snRNA -RF01228 snRNA -RF01229 snRNA -RF01230 snRNA -RF01231 snRNA -RF01232 snRNA -RF01233 snRNA -RF01234 snRNA -RF01235 snRNA -RF01236 snRNA -RF01237 snRNA -RF01238 snRNA -RF01239 snRNA -RF01240 snRNA -RF01241 snRNA -RF01242 snRNA -RF01243 snRNA -RF01244 snRNA -RF01245 snRNA -RF01246 snRNA -RF01247 snRNA -RF01248 snRNA -RF01249 snRNA -RF01250 snRNA -RF01251 snRNA -RF01252 snRNA -RF01253 snRNA -RF01254 snRNA -RF01255 snRNA -RF01256 snRNA -RF01257 snRNA -RF01258 snRNA -RF01259 snRNA -RF01260 snRNA -RF01261 snRNA -RF01262 snRNA -RF01263 snRNA -RF01264 snRNA -RF01265 snRNA -RF01266 snRNA -RF01267 snRNA -RF01268 snRNA -RF01269 snRNA -RF01270 snRNA -RF01271 snRNA -RF01272 snRNA -RF01273 snRNA -RF01274 snRNA -RF01275 snRNA -RF01276 snRNA -RF01277 snRNA -RF01278 snRNA -RF01279 snRNA -RF01280 snRNA -RF01281 snRNA -RF01283 snRNA -RF01284 snRNA -RF01285 snRNA -RF01286 snRNA -RF01287 snRNA -RF01288 snRNA -RF01289 snRNA -RF01290 snRNA -RF01291 snRNA -RF01292 snRNA -RF01293 snRNA -RF01294 snRNA -RF01295 snRNA -RF01296 snRNA -RF01297 snRNA -RF01298 snRNA -RF01299 snRNA -RF01300 snRNA -RF01301 snRNA -RF01302 snRNA -RF01303 snRNA -RF01304 snRNA -RF01305 snRNA -RF01306 snRNA -RF01307 snRNA -RF01308 snRNA -RF01309 snRNA -RF01310 snRNA -RF01311 snRNA -RF01312 snRNA -RF01420 snRNA -RF01421 snRNA -RF01422 snRNA -RF01423 snRNA -RF01424 snRNA -RF01425 snRNA -RF01426 snRNA -RF01427 snRNA -RF01428 snRNA -RF01429 snRNA -RF01430 snRNA -RF01431 snRNA -RF01432 snRNA -RF01433 snRNA -RF01434 snRNA -RF01435 snRNA -RF01436 snRNA -RF01437 snRNA -RF01438 snRNA -RF01439 snRNA -RF01440 snRNA -RF01441 snRNA -RF01442 snRNA -RF01443 snRNA -RF01444 snRNA -RF01445 snRNA -RF01446 snRNA -RF01447 snRNA -RF01448 snRNA -RF01449 snRNA -RF01450 snRNA -RF01451 snRNA -RF01452 snRNA -RF01498 snRNA -RF01499 snRNA -RF01500 snRNA -RF01501 snRNA -RF01505 snRNA -RF01506 snRNA -RF01507 snRNA -RF01509 snRNA -RF01511 snRNA -RF01513 snRNA -RF01514 snRNA -RF01515 snRNA -RF01516 snRNA -RF01522 snRNA -RF01523 snRNA -RF01524 snRNA -RF01525 snRNA -RF01526 snRNA -RF01531 snRNA -RF01532 snRNA -RF01533 snRNA -RF01534 snRNA -RF01535 snRNA -RF01536 snRNA -RF01537 snRNA -RF01538 snRNA -RF01539 snRNA -RF01540 snRNA -RF01541 snRNA -RF01542 snRNA -RF01543 snRNA -RF01544 snRNA -RF01545 snRNA -RF01546 snRNA -RF01547 snRNA -RF01548 snRNA -RF01549 snRNA -RF01550 snRNA -RF01551 snRNA -RF01552 snRNA -RF01553 snRNA -RF01554 snRNA -RF01555 snRNA -RF01556 snRNA -RF01557 snRNA -RF01558 snRNA -RF01559 snRNA -RF01560 snRNA -RF01561 snRNA -RF01562 snRNA -RF01563 snRNA -RF01564 snRNA -RF01565 snRNA -RF01566 snRNA -RF01567 snRNA -RF01568 snRNA -RF01569 snRNA -RF01570 snRNA -RF01572 snRNA -RF01573 snRNA -RF01574 snRNA -RF01575 snRNA -RF01576 snRNA -RF01583 snRNA -RF01584 snRNA -RF01585 snRNA -RF01586 snRNA -RF01587 snRNA -RF01588 snRNA -RF01589 snRNA -RF01590 snRNA -RF01591 snRNA -RF01592 snRNA -RF01593 snRNA -RF01594 snRNA -RF01595 snRNA -RF01596 snRNA -RF01597 snRNA -RF01598 snRNA -RF01599 snRNA -RF01600 snRNA -RF01601 snRNA -RF01602 snRNA -RF01603 snRNA -RF01604 snRNA -RF01605 snRNA -RF01606 snRNA -RF01607 snRNA -RF01608 snRNA -RF01609 snRNA -RF01610 snRNA -RF01611 snRNA -RF01612 snRNA -RF01613 snRNA -RF01614 snRNA -RF01615 snRNA -RF01617 snRNA -RF01618 snRNA -RF01620 snRNA -RF01621 snRNA -RF01622 snRNA -RF01624 snRNA -RF01625 snRNA -RF01626 snRNA -RF01627 snRNA -RF01628 snRNA -RF01629 snRNA -RF01630 snRNA -RF01631 snRNA -RF01632 snRNA -RF01633 snRNA -RF01634 snRNA -RF01635 snRNA -RF01636 snRNA -RF01637 snRNA -RF01638 snRNA -RF01639 snRNA -RF01640 snRNA -RF01641 snRNA -RF01642 snRNA -RF01644 snRNA -RF01645 snRNA -RF01646 snRNA -RF01647 snRNA -RF01648 snRNA -RF01649 snRNA -RF01650 snRNA -RF01651 snRNA -RF01652 snRNA -RF01653 snRNA -RF01654 snRNA -RF01655 snRNA -RF01658 snRNA -RF01659 snRNA -RF01660 snRNA -RF01661 snRNA -RF01662 snRNA -RF01664 snRNA -RF01802 snRNA -RF01829 snRNA -RF01844 snRNA -RF01846 snRNA -RF01847 snRNA -RF01848 snRNA -RF01860 snRNA -RF01861 snRNA -RF01862 snRNA -RF01863 snRNA -RF01864 snRNA -RF01866 snRNA -RF02163 snRNA -RF00014 sRNA -RF00018 sRNA -RF00021 sRNA -RF00034 sRNA -RF00035 sRNA -RF00057 sRNA -RF00077 sRNA -RF00078 sRNA -RF00079 sRNA -RF00081 sRNA -RF00082 sRNA -RF00083 sRNA -RF00084 sRNA -RF00101 sRNA -RF00110 sRNA -RF00111 sRNA -RF00112 sRNA -RF00113 sRNA -RF00115 sRNA -RF00116 sRNA -RF00117 sRNA -RF00118 sRNA -RF00119 sRNA -RF00121 sRNA -RF00122 sRNA -RF00124 sRNA -RF00125 sRNA -RF00126 sRNA -RF00128 sRNA -RF00166 sRNA -RF00195 sRNA -RF00368 sRNA -RF00369 sRNA -RF00370 sRNA -RF00371 sRNA -RF00372 sRNA -RF00378 sRNA -RF00444 sRNA -RF00505 sRNA -RF00519 sRNA -RF00615 sRNA -RF00616 sRNA -RF01116 sRNA -RF01385 sRNA -RF01386 sRNA -RF01387 sRNA -RF01388 sRNA -RF01389 sRNA -RF01390 sRNA -RF01391 sRNA -RF01392 sRNA -RF01393 sRNA -RF01394 sRNA -RF01395 sRNA -RF01396 sRNA -RF01397 sRNA -RF01398 sRNA -RF01399 sRNA -RF01400 sRNA -RF01401 sRNA -RF01402 sRNA -RF01403 sRNA -RF01404 sRNA -RF01405 sRNA -RF01406 sRNA -RF01407 sRNA -RF01408 sRNA -RF01409 sRNA -RF01410 sRNA -RF01411 sRNA -RF01412 sRNA -RF01457 sRNA -RF01459 sRNA -RF01460 sRNA -RF01461 sRNA -RF01462 sRNA -RF01463 sRNA -RF01464 sRNA -RF01465 sRNA -RF01466 sRNA -RF01467 sRNA -RF01468 sRNA -RF01469 sRNA -RF01470 sRNA -RF01471 sRNA -RF01472 sRNA -RF01473 sRNA -RF01474 sRNA -RF01476 sRNA -RF01477 sRNA -RF01478 sRNA -RF01479 sRNA -RF01487 sRNA -RF01488 sRNA -RF01489 sRNA -RF01492 sRNA -RF01493 sRNA -RF01494 sRNA -RF01496 sRNA -RF01503 sRNA -RF01504 sRNA -RF01512 sRNA -RF01519 sRNA -RF01520 sRNA -RF01521 sRNA -RF01527 sRNA -RF01528 sRNA -RF01529 sRNA -RF01530 sRNA -RF01571 sRNA -RF01578 sRNA -RF01579 sRNA -RF01580 sRNA -RF01581 sRNA -RF01582 sRNA -RF01619 sRNA -RF01623 sRNA -RF01643 sRNA -RF01656 sRNA -RF01663 sRNA -RF01665 sRNA -RF01668 sRNA -RF01669 sRNA -RF01670 sRNA -RF01671 sRNA -RF01672 sRNA -RF01673 sRNA -RF01674 sRNA -RF01675 sRNA -RF01676 sRNA -RF01677 sRNA -RF01678 sRNA -RF01679 sRNA -RF01680 sRNA -RF01681 sRNA -RF01682 sRNA -RF01683 sRNA -RF01684 sRNA -RF01685 sRNA -RF01686 sRNA -RF01687 sRNA -RF01690 sRNA -RF01691 sRNA -RF01693 sRNA -RF01694 sRNA -RF01696 sRNA -RF01698 sRNA -RF01699 sRNA -RF01700 sRNA -RF01701 sRNA -RF01702 sRNA -RF01703 sRNA -RF01705 sRNA -RF01706 sRNA -RF01710 sRNA -RF01712 sRNA -RF01714 sRNA -RF01718 sRNA -RF01719 sRNA -RF01722 sRNA -RF01723 sRNA -RF01728 sRNA -RF01732 sRNA -RF01742 sRNA -RF01757 sRNA -RF01762 sRNA -RF01775 sRNA -RF01781 sRNA -RF01782 sRNA -RF01783 sRNA -RF01784 sRNA -RF01789 sRNA -RF01791 sRNA -RF01793 sRNA -RF01796 sRNA -RF01808 sRNA -RF01810 sRNA -RF01812 sRNA -RF01814 sRNA -RF01815 sRNA -RF01816 sRNA -RF01817 sRNA -RF01818 sRNA -RF01819 sRNA -RF01820 sRNA -RF01821 sRNA -RF01822 sRNA -RF01823 sRNA -RF01827 sRNA -RF01828 sRNA -RF01858 sRNA -RF01867 sRNA -RF02029 sRNA -RF02030 sRNA -RF02031 sRNA -RF02049 sRNA -RF02050 sRNA -RF02051 sRNA -RF02052 sRNA -RF02053 sRNA -RF02054 sRNA -RF02055 sRNA -RF02056 sRNA -RF02057 sRNA -RF02059 sRNA -RF02060 sRNA -RF02062 sRNA -RF02063 sRNA -RF02064 sRNA -RF02065 sRNA -RF02066 sRNA -RF02067 sRNA -RF02070 sRNA -RF02071 sRNA -RF02072 sRNA -RF02073 sRNA -RF02074 sRNA -RF02075 sRNA -RF02077 sRNA -RF02078 sRNA -RF02079 sRNA -RF02080 sRNA -RF02081 sRNA -RF02082 sRNA -RF02099 sRNA -RF02100 sRNA -RF02151 sRNA -RF02221 sRNA -RF02222 sRNA -RF02223 sRNA -RF02224 sRNA -RF02225 sRNA -RF02226 sRNA -RF02227 sRNA -RF02228 sRNA -RF02230 sRNA -RF02231 sRNA -RF02232 sRNA -RF02233 sRNA -RF02234 sRNA -RF02235 sRNA -RF02236 sRNA -RF02237 sRNA -RF02238 sRNA -RF02239 sRNA -RF02240 sRNA -RF02241 sRNA -RF02242 sRNA -RF02243 sRNA -RF02268 sRNA -RF02269 sRNA -RF00127 sRNA -RF01852 tRNA -RF00005 tRNA diff -r 45de5e1ff487 -r 5691802f074b filterReadsByLength.pl --- a/filterReadsByLength.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2010-01 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","min=i","max=i","o=s","mark:s","h"); -if (!(defined $opts{i} and defined $opts{o} and defined $opts{min} and defined $opts{max}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample"; -my @mark=split /,/,$mark; - - -open OUT,">$opts{o}"; -open IN,"<$opts{i}"; -my %hash;my %reads; -while (my $aline=) { - chomp $aline; - my $seq=; - chomp $seq; - - if($aline=~/:([\d|_]+)_x(\d+)$/){ - my @ss=split/_/,$1; - for (my $i=0;$i<@ss;$i++) { - $hash{length($seq)}[$i]++ if($ss[$i]>0); - $hash{length($seq)}[$i] +=0 if($ss[$i]>0); - $reads{length($seq)}[$i]+=$ss[$i]; - } - } - #else{$reads{length($seq)}+=1;} - if (length ($seq)>=$opts{'min'} && length ($seq) <=$opts{'max'}) { - print OUT "$aline\n$seq\n"; - } -} -close IN; -close OUT; - -my $dir=dirname($opts{'o'}); -chdir $dir; -my $lengthfile=$dir."/reads_length_distribution.txt"; -open OUT, ">$lengthfile"; -open R,">$dir/length_distribution.R"; - -print OUT "Tags length\t@mark\n"; - -my $samNo=@mark; -my $avalue=""; -my @length=sort{$a<=>$b} keys %hash; -foreach (@length) { - print OUT $_,"\t@{$hash{$_}}\n"; - my $vv=join ", ",@{$hash{$_}}; - $avalue .="$vv,"; -} -$avalue =~s/,$//; -my $lengths=join ",",@length; -my $marks=join "\",\"",@mark; - -print R "a<-c($avalue) -b<-matrix(a,ncol=$samNo,byrow=T) -cl<-colors() -names=c($lengths) -legends=c(\"$marks\") -png(\"Tags_length.png\",width=800,height=600) -barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") -abline(h=0) -dev.off() - -"; -$avalue=""; -print OUT "\nReads length\t@mark\n"; -foreach (@length) { - print OUT $_,"\t@{$reads{$_}}\n"; - my $vv=join ", ", @{$reads{$_}}; - $avalue .= "$vv,"; -} -$avalue =~s/,$//; - -print R "a<-c($avalue)\n -b<-matrix(a,ncol=$samNo,byrow=T) - -png(\"Reads_length.png\",width=800,height=600) -barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") -abline(h=0) -dev.off() - -"; -close OUT; -close R; - -system ("R CMD BATCH $dir/length_distribution.R"); - -#system ("rm $dir/length_distribution.R"); -#system ("rm $dir/length_distribution.Rout"); -#system ("rm $dir/.RData"); -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -min -max -mark -options: - --i input file --o output file --min reads min length. --max reads max length. --mark string #sample name eg: samA,samB,samC --h help -USAGE -exit(1); -} - diff -r 45de5e1ff487 -r 5691802f074b html.pl --- a/html.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,269 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2014-5-29 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","format=s","o=s","h"); -if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments -&usage; -} -my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath); -my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir); -open IN,"<$opts{i}"; -$config=; chomp $config; -$prepath=; chomp $prepath; -$rfampath=;chomp $rfampath; -$knownpath=; chomp $knownpath; -$genomepath=; chomp $genomepath; -$novelpath=; chomp $novelpath; -close IN; -my @tmp=split/\//,$prepath; -$predir=$tmp[-1]; -@tmp=split/\//,$rfampath; -$rfamdir=$tmp[-1]; -@tmp=split/\//,$knownpath; -$knowndir=$tmp[-1]; -@tmp=split/\//,$genomepath; -$genomedir=$tmp[-1]; -@tmp=split/\//,$novelpath; -$noveldir=$tmp[-1]; - -my $dir=dirname($opts{'o'}); - -open OUT ,">$opts{'o'}"; -print OUT "\n \n Analysis Report \n - \n

\n \n Small RNA Analysis Report\n \n

-

1. Sequence No. and quality

-

1.1 Sequece No.

-"; - -### raw data no -open IN,"<$config"; -my @files;my @marks; my @rawNo; -while (my $aline=) { - chomp $aline; - my @tmp=split/\t/,$aline; - push @files,$tmp[0]; - - my $no=`less $tmp[0] |wc -l `; - chomp $no; - if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { - $no=$no/4; - } - else{ - $no=$no/2; - } - push @rawNo,$no; - - push @marks,$tmp[1]; -} -close IN; - -### preprocess -unless ($prepath=~/\/$/) { - $prepath .="/"; -} - -my @trimNo;my @collapse; -my $collapsefile=$prepath."collapse_reads.fa"; -open IN,"<$collapsefile"; -while (my $aline=) { - chomp $aline; - ; - $aline=~/:([\d|_]+)_x(\d+)$/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $trimNo[$i] +=$lng[$i]; - $collapse[$i] ++; - } - } -} -close IN; - -my @cleanR;my @cleanT; -my $clean=$prepath."collapse_reads_19_28.fa"; -open IN,"<$clean"; -while (my $aline=) { - chomp $aline; - ; - $aline=~/:([\d|_]+)_x(\d+)$/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $cleanR[$i] +=$lng[$i]; - $cleanT[$i] ++; - } - } -} -close IN; - -print OUT " - - -"; -foreach (@marks) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@rawNo) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@trimNo) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@collapse) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@cleanR) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@cleanT) { - print OUT "\n"; -} -print OUT "\n
  $_
Raw Reads No. $_
Reads No. After Trimed 3\' adapter $_
Unique Tags No. $_
Clean Reads No. $_
Clean Tags No. $_
"; -print OUT "

-Note:
-The raw data file path is: $files[0]
-"; -for (my $i=1;$i<@files;$i++) { - print OUT "           $files[$i]
"; -} -print OUT "The collapsed file path is: $collapsefile
-The clean data file path is: $clean
-

-

1. Sequence length count

-

1.1 Reads length

-"; - -print OUT "\"Reads_length.png\" -

1.2 Tags length count

-\"Tags_length.png\" -

Note:
The sequence length data: length file -

-"; - -#### rfam -unless ($rfampath=~/\/$/) { - $rfampath .="/"; -} -print OUT "

2. Rfam non-miRNA annotation

-

2.1 Reads count

- - -"; - -my @rfamR; my @rfamT; -my $tag=1; -open IN,"<$dir/rfam_non-miRNA_annotation.txt"; -while (my $aline=) { - chomp $aline; - $tag=0 if($aline=~/tags\s+number/); - next if($aline=~/^\#/); - next if($aline=~/^\s*$/); - my @tmp=split/\s+/,$aline; - if($tag == 1){push @rfamR,[@tmp];} - else{push @rfamT,[@tmp];} -} -close IN; - - -print OUT "\n"; -foreach (@marks) { - print OUT "\n"; -} -for (my $i=0;$i<@rfamR;$i++) { - print OUT " - - - "; - for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { - print OUT "\n"; - } -} - -print OUT "\n
RNA Name $_
$rfamR[$i][0] $rfamR[$i][$j]
-

2.2 Tags count

- - - \n"; -foreach (@marks) { - print OUT "\n"; -} -for (my $i=0;$i<@rfamT;$i++) { - print OUT " - - - "; - for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { - print OUT "\n"; - } -} -print OUT "\n
RNA Name $_
$rfamT[$i][0] $rfamT[$i][$j]
-

Note:
The rfam mapping results is: $rfampath"; -print OUT "rfam_mapped.bwt

-

3. MicroRNA result

-

3.1 known microRNA

-

The known microRNA express list: known_microRNA_express.txt
- The known microRNA alngment file: known_microRNA_express.aln
- The known moRs file: known_microRNA_express.moRs
- The known microRNA mature sequence file: known_microRNA_mature.fa
- The knowm microRNA precursor sequence file: known_microRNA_precursor.fa -

- -

3.2 novel microRNA

-

The novel microRNA prediction file: microRNA_prediction.mrd
- The novel microRNA express list: novel_microRNA_express.txt
- The novel microRNA mature sequence file: novel_microRNA_mature.fa
- The novel microRNA precursor sequence file: novel_microRNA_precursor.fa -

-"; - - - -print OUT " - - -"; -close OUT; - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -o -options: --o output file --h help -USAGE -exit(1); -} - diff -r 45de5e1ff487 -r 5691802f074b miRNA_Express_and_sequence.pl --- a/miRNA_Express_and_sequence.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,173 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2014-6-4 -#Modified: -#Description: solexa miRNA express and sequence -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h"); -if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $fileout=$opts{'list'}; -my $out=$opts{'fa'}; -my $preout=$opts{'pre'}; - -=cut -my %hash_pri; -open PRI,"<$opts{p}"; -while (my $aline=) { - chomp $aline; - if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;} -} -close PRI; -=cut - -open IN,"<$filein"; #input file -open OUT,">$fileout"; #output file -open FA ,">$out"; -open PRE,">$preout"; - -print OUT "#ID\tcoordinate\tpos1\tpos2"; -my @marks=split/\,/,$opts{'tag'}; -foreach (@marks) { - print OUT "\t",$_,"_matureExp"; -} -foreach (@marks) { - print OUT "\t",$_,"_starExp"; -} -foreach (@marks) { - print OUT "\t",$_,"_totalExp"; -} - -print OUT "\n"; - -my (%uniq_id,$novel); -while (my $aline=) { - chomp $aline; - until ($aline =~ /^score\s+[-\d\.]+/){ - $aline = ; - if (eof) {last;} - } - if (eof) {last;} -########## miRNA ID ################ - $novel++; -########### annotate#################### - do {$aline=;} until($aline=~/flank_first_end/) ; - chomp $aline; - my @flank1=split/\t/,$aline; - do {$aline=;} until($aline=~/flank_second_beg/) ; - chomp $aline; - my @flank2=split/\t/,$aline; -# -########## mature start loop pre #### - do {$aline=;} until($aline=~/mature_beg/) ; - chomp $aline; - my @start=split/\t/,$aline; -# $start[1] -=$flank1[1]; - do {$aline=;} until($aline=~/mature_end/) ; - chomp $aline; - my @end=split/\t/,$aline; -# $end[1] -=$flank1[1]; - do {$aline=;} until($aline=~/mature_seq/) ; - chomp $aline; - my @arr1=split/\t/,$aline; - do {$aline=;} until($aline=~/pre_seq/) ; - chomp $aline; - my @arr2=split/\t/,$aline; - do {$aline=;} until($aline=~/pri_id/) ; - chomp $aline; - my @pri_id=split/\t/,$aline; - do {$aline=;} until($aline=~/pri_seq/) ; - chomp $aline; - my @pri_seq=split/\t/,$aline; - do {$aline=;} until($aline=~/star_beg/) ; - chomp $aline; - my @star_start=split/\t/,$aline; -# $star_start[1] -=$flank1[1]; - do {$aline=;} until($aline=~/star_end/) ; - chomp $aline; - my @star_end=split/\t/,$aline; -# $star_end[1] -=$flank1[1]; - do {$aline=;} until($aline=~/star_seq/) ; - chomp $aline; - my @arr3=split/\t/,$aline; - print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t"; - #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t"; - print FA ">miR-c-$novel\n$arr1[1]\n"; - print PRE ">miR-c-$novel\n$pri_seq[1]\n"; -########## reads count ############# - ; - my @count1;my @count2;my @count3;my @count4; - $aline=; - do { - chomp $aline; - my @reads=split/\t/,$aline; - my @pos=(); - $reads[5]=~/(\d+)\.\.(\d+)/; -# $pos[0] =$1-$flank1[1]; -# $pos[1] =$2-$flank1[1]; - $pos[0]=$1; - $pos[1]=$2; - $reads[0]=~/:([\d|_]+)_x(\d+)$/; - my @ss=split/_/,$1; - for (my $i=0;$i<@ss ;$i++) { - if (!(defined $count3[$i])) { - $count3[$i]=0; - } - if (!(defined $count4[$i])) { - $count4[$i]=0; - } - $count2[$i]+=$ss[$i]; - - } -# $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 ); -# $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 ); -# $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1); -# $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1); - if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 ) - { - for (my $i=0;$i<@ss;$i++) { - $count3[$i]+=$ss[$i]; - } - } - if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){ - for (my $i=0;$i<@ss;$i++) { - $count4[$i]+=$ss[$i]; - } - } - $aline=; - chomp $aline; - } until(length $aline < 1) ; - $"="\t"; - print OUT "@count3\t@count4\t@count2\n"; - $"=" "; -} - -close IN; -close OUT; - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -list -fa -pre -tag -options: --i input file,predictions file --list output file miRNA list file --fa output file ,miRNA sequence fasta file. --pre output file, miRNA precursor fasta file. --tag string, sample names# eg: samA,samB,samC --h help -USAGE -exit(1); -} - diff -r 45de5e1ff487 -r 5691802f074b miRPlant.xml --- a/miRPlant.xml Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ - - tool for plant microRNA analisis - - - SCRIPT_PATH - bowtie - R - fastx_toolkit - ViennaRNA - - - - - miRPlant.pl - ## Change this to accommodate the number of threads you have available. - -t \${GALAXY_SLOTS:-4} - ## Do or not delet rfam mapped tags - #if $params.delet_rfam == "yes": - -D - #end if - -path \$SCRIPT_PATH - - #for $j, $s in enumerate( $series ) - ##rank_of_series=$j - -i ${s.input} - -tag ${s.tag} - #end for - - -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 45de5e1ff487 -r 5691802f074b precursors.pl --- a/precursors.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,789 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use RNA; - -my %opts; -GetOptions(\%opts,"map=s","g=s","d:i","f:i","o=s","e:f","s=s","h"); -if (!(defined $opts{map} and defined $opts{g} and defined $opts{o} and defined $opts{s} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'map'}; -my $faout=$opts{'o'}; -my $strout=$opts{'s'}; -my $genome= $opts{'g'}; - -my $maxd=defined $opts{'d'} ? $opts{'d'} : 200; -my $flank=defined $opts{'f'}? $opts{'f'} : 10; - -my $MAX_ENERGY=-18; -if (defined $opts{'e'}) {$MAX_ENERGY=$opts{'e'};} -my $MAX_UNPAIR=5; -my $MIN_PAIR=15; -my $MAX_SIZEDIFF=4; -my $MAX_BULGE=2; -my $ASYMMETRY=5; -my $MIN_UNPAIR=0; -my $MIN_SPACE=5; -my $MAX_SPACE=$maxd; -my $FLANK=$flank; - -######### load in genome sequences start ######## -my %genome; -my %lng; -my $name; -open IN,"<$genome"; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - if ($aline=~/^>(\S+)/) { - $name=$1; - next; - } - $genome{$name} .=$aline; -} -close IN; -foreach my $key (keys %genome) { - $lng{$key}=length($genome{$key}); -} -####### load in genome sequences end ########## - -my %breaks; ### reads number bigger than 3 -open IN,"<$filein"; #input file -while (my $aline=) { - chomp $aline; - my @tmp=split/\t/,$aline; - $tmp[0]=~/_x(\d+)$/; - my $no=$1; - next if($no<3); - #my $trand=&find_strand($tmp[9]); - #my @pos=split/\.\./,$tmp[5]; - my $end=$tmp[3]+length($tmp[4])-1; - if($tmp[1] eq "-"){$tmp[4]=revcom($tmp[4]);} - push @{$breaks{$tmp[2]}{$tmp[1]}},[$tmp[3],$end,$no,$tmp[4]]; ### 0 base -} -close IN; - -my %cites; ### peaks -foreach my $chr (keys %breaks) { - foreach my $strand (keys %{$breaks{$chr}}) { - my @array=@{$breaks{$chr}{$strand}}; - @array=sort{$a->[0]<=>$b->[0]} @array; - for (my $i=0;$i<@array;$i++) { - my $start=$array[$i][0];my $end=$array[$i][1]; - my @subarray=(); - push @subarray,$array[$i]; - - for (my $j=$i+1;$j<@array;$j++) { - if ($start<$array[$j][1] && $end>$array[$j][0]) { - push @subarray,$array[$j]; - ($start,$end)=&newpos($start,$end,$array[$j][0],$array[$j][1]); - } - else{ - $i=$j; - &find_cites(\@subarray,$chr,$strand); - last; - } - } - } - } -} - -open FA,">$faout"; #output file -open STR,">$strout"; -foreach my $chr (keys %cites) { - foreach my $strand (keys %{$cites{$chr}}) { - - my @array2=@{$cites{$chr}{$strand}}; - @array2=sort{$a->[0]<=>$b->[0]} @array2; - &excise(\@array2,$chr,$strand); - } -} -close FA; -close STR; -sub oneCiteDn{ - my ($array,$a,$chr,$strand)=@_; - - my $ss=$$array[$a][0]-$flank; - $ss=0 if($ss<0); - my $ee=$$array[$a][1]+$maxd+$flank; - $ee=$lng{$chr} if($ee>$lng{$chr}); - - my $seq=substr($genome{$chr},$ss,$ee-$ss+1); - if($strand eq "-"){$seq=revcom($seq);} - - my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee); - return $val; -} -sub oneCiteUp{ - my ($array,$a,$chr,$strand)=@_; - - my $ss=$$array[$a][0]-$maxd-$flank; - $ss=0 if($ss<0); - my $ee=$$array[$a][1]+$flank; - $ee=$lng{$chr} if($ee>$lng{$chr}); - - my $seq=substr($genome{$chr},$ss,$ee-$ss+1); - if($strand eq "-"){$seq=revcom($seq);} - - my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee); - return $val; - -} - -sub twoCites{ - my ($array,$a,$b,$chr,$strand)=@_; - - my $ss=$$array[$a][0]-$flank; - $ss=0 if($ss<0); - my $ee=$$array[$b][1]+$flank; - $ee=$lng{$chr} if($ee>$lng{$chr}); - - my $seq=substr($genome{$chr},$ss,$ee-$ss+1); - if($strand eq "-"){$seq=revcom($seq);} - -# my( $str,$mfe)=RNA::fold($seq); -# return 0 if($mfe>$MAX_ENERGY); ### minimum mfe - my $val=&ffw2($seq,$$array[$a][3],$$array[$b][3],$chr,$strand,$ss,$ee); - - return $val; - -} -sub excise{ - my ($cluster,$chr,$strand)=@_; - - my $last_pos=0; - for (my $i=0;$i<@{$cluster};$i++) { - next if($$cluster[$i][0]<$last_pos); - my $ok=0; - for (my $j=$i+1;$j<@{$cluster} ;$j++) { - if($$cluster[$j][0]-$$cluster[$i][1]>$maxd){ - $i=$j; - last; - }else{ - $ok=&twoCites($cluster,$i,$j,$chr,$strand); - if($ok){ $last_pos=$$cluster[$j][1]+$flank; $i=$j; last;} - } - } - next if($ok); - - $ok=&oneCiteDn($cluster,$i,$chr,$strand); - if($ok){$last_pos=$$cluster[$i][1]+$maxd+$flank; next;} - $ok=&oneCiteUp($cluster,$i,$chr,$strand); - if($ok){$last_pos=$$cluster[$i][1]+$flank;next;} - } - - -} - -sub ffw2{ - my ($seq,$tag1,$tag2,$chr,$strand,$ss,$ee)=@_; - - my $N_count=$seq=~tr/N//; ## precursor sequence has not more than 5 Ns - if ($N_count > 5) { - return 0; - } - - my $seq_length=length $seq; - # position tag1 and tag2 - my $tag1_beg=index($seq,$tag1,0)+1; - if ($tag1_beg < 1) { - warn "[ffw2] coordinate error.\n"; -# $fold->{reason}="coordinate error"; - return 0; - } - my $tag2_beg=index($seq,$tag2,0)+1; - if ($tag2_beg < 1) { - warn "[ffw2] coordinate error.\n"; -# $fold->{reason}="coordinate error"; - return 0; - } - if ($tag2_beg < $tag1_beg) { - # swap tag1 and tag2 - ($tag1,$tag2)=($tag2,$tag1); - ($tag1_beg,$tag2_beg)=($tag2_beg,$tag1_beg); - } - my $tag1_end=$tag1_beg+length($tag1)-1; - my $tag2_end=$tag2_beg+length($tag2)-1; - # re-clipping - my $beg=$tag1_beg-$FLANK; $beg=1 if $beg < 1; - my $end=$tag2_end+$FLANK; $end=$seq_length if $end > $seq_length; - $seq=substr($seq,$beg-1,$end-$beg+1); - $seq_length=length $seq; - # re-reposition - $tag1_beg=index($seq,$tag1,0)+1; - if ($tag1_beg < 1) { - warn "[ffw2] coordinate error.\n"; -# $fold->{reason}="coordinate error"; - return 0; - } - - $tag2_beg=index($seq,$tag2,0)+1; - if ($tag2_beg < 1) { - warn "[ffw2] coordinate error.\n"; -# $fold->{reason}="coordinate error"; - return 0; - } - $tag1_end=$tag1_beg+length($tag1)-1; - $tag2_end=$tag2_beg+length($tag2)-1; - - # fold - my ($struct,$mfe)=RNA::fold($seq); - $mfe=sprintf "%.2f", $mfe; - if ($mfe > $MAX_ENERGY) {return 0;} - - # tag1 - my $tag1_length=$tag1_end-$tag1_beg+1; - my $tag1_struct=substr($struct,$tag1_beg-1,$tag1_length); - my $tag1_arm=which_arm($tag1_struct); - my $tag1_unpair=$tag1_struct=~tr/.//; - my $tag1_pair=$tag1_length-$tag1_unpair; - my $tag1_max_bulge=biggest_bulge($tag1_struct); - if ($tag1_arm ne "5p") { return 0;} # tag not in stem -# if ($tag1_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag1_unpair ($MAX_UNPAIR)"; return $pass} - if ($tag1_pair < $MIN_PAIR) {return 0;} - if ($tag1_max_bulge > $MAX_BULGE) {return 0;} - - # tag2 - my $tag2_length=$tag2_end-$tag2_beg+1; - my $tag2_struct=substr($struct,$tag2_beg-1,$tag2_length); - my $tag2_arm=which_arm($tag2_struct); - my $tag2_unpair=$tag2_struct=~tr/.//; - my $tag2_pair=$tag2_length-$tag2_unpair; - my $tag2_max_bulge=biggest_bulge($tag2_struct); - if ($tag2_arm ne "3p") {return 0;} # star not in stem -# if ($tag2_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag2_unpair ($MAX_UNPAIR)"; return $pass} - if ($tag2_pair < $MIN_PAIR) {return 0;} - if ($tag2_max_bulge > $MAX_BULGE) {return 0;} - - # space size between miR and miR* - my $space=$tag2_beg-$tag1_end-1; - if ($space < $MIN_SPACE) {return 0;} - if ($space > $MAX_SPACE) {return 0;} - - # size diff of miR and miR* - my $size_diff=abs($tag1_length-$tag2_length); - if ($size_diff > $MAX_SIZEDIFF) {return 0;} - - # build base pairing table - my %pairtable; - &parse_struct($struct,\%pairtable); # coords count from 1 - - my $asy1=get_asy(\%pairtable,$tag1_beg,$tag1_end); - my $asy2=get_asy(\%pairtable,$tag2_beg,$tag2_end); - my $asy=($asy1 < $asy2) ? $asy1 : $asy2; - if ($asy > $ASYMMETRY) {return 0} - - # duplex fold, determine whether two matures like a miR/miR* ike duplex - my ($like_mir_duplex1,$duplex_pair,$overhang1,$overhang2)=likeMirDuplex1($tag1,$tag2); - # parse hairpin, determine whether two matures form miR/miR* duplex in hairpin context - my ($like_mir_duplex2,$duplex_pair2,$overhang_b,$overhang_t)=likeMirDuplex2(\%pairtable,$tag1_beg,$tag1_end,$tag2_beg,$tag2_end); - if ($like_mir_duplex1==0 && $like_mir_duplex2==0) { - return 0; - } - - print FA ">$chr:$strand:$ss..$ee\n$seq\n"; - print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n"; - - return 1; -} - -sub ffw1{ - my ($seq,$tag,$chr,$strand,$ss,$ee)=@_; - my $pass=0; - - my $N_count=$seq=~tr/N//; - if ($N_count > 5) { - return 0; - } - - my $seq_length=length $seq; - my $tag_length=length $tag; - - # position - my $tag_beg=index($seq,$tag,0)+1; - if ($tag_beg < 1) { - warn "[ffw1] coordinate error.\n"; - return $pass; - } - my $tag_end=$tag_beg+length($tag)-1; - - - # define candidate precursor by hybrid short arm to long arm, not solid enough - my($beg,$end)=define_precursor($seq,$tag); - if (not defined $beg) { - return $pass; - } - if (not defined $end) { - return $pass; - } - $seq=substr($seq,$beg-1,$end-$beg+1); - $seq_length=length $seq; - - - # fold - my ($struct,$mfe)=RNA::fold($seq); - $mfe=sprintf "%.2f",$mfe; - if ($mfe > $MAX_ENERGY) { - $pass=0; - return $pass; - } - - # reposition - $tag_beg=index($seq,$tag,0)+1; - if ($tag_beg < 1) { - warn "[ffw1] coordinate error.\n"; - return 0; - } - $tag_end=$tag_beg+length($tag)-1; - - my $tag_struct=substr($struct,$tag_beg-1,$tag_length); - my $tag_arm=which_arm($tag_struct); - my $tag_unpair=$tag_struct=~tr/.//; - my $tag_pair=$tag_length-$tag_unpair; - my $tag_max_bulge=biggest_bulge($tag_struct); - if ($tag_arm eq "-") { return $pass;} -# if ($tag_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag_unpair ($MAX_UNPAIR)"; return $pass} - if ($tag_pair < $MIN_PAIR) { return $pass;} - if ($tag_max_bulge > $MAX_BULGE) {return $pass;} - - # build base pairing table - my %pairtable; - &parse_struct($struct,\%pairtable); # coords count from 1 - - # get star - my ($star_beg,$star_end)=get_star(\%pairtable,$tag_beg,$tag_end); - my $star=substr($seq,$star_beg-1,$star_end-$star_beg+1); - my $star_length=$star_end-$star_beg+1; - my $star_struct=substr($struct,$star_beg-1,$star_end-$star_beg+1); - my $star_arm=which_arm($star_struct); - my $star_unpair=$star_struct=~tr/.//; - my $star_pair=$star_length-$star_unpair; - my $star_max_bulge=biggest_bulge($star_struct); - if ($star_arm eq "-") { return $pass;} -# if ($star_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$star_unpair ($MAX_UNPAIR)"; return $pass} - if ($star_pair < $MIN_PAIR) {return $pass;} - if ($star_max_bulge > $MAX_BULGE) {return $pass;} - - if ($tag_arm eq $star_arm) {return $pass;} - - # space size between miR and miR* - my $space; - if ($tag_beg < $star_beg) { - $space=$star_beg-$tag_end-1; - } - else { - $space=$tag_beg-$star_end-1; - } - if ($space < $MIN_SPACE) { return $pass;} - if ($space > $MAX_SPACE) { return $pass;} - - # size diff - my $size_diff=abs($tag_length-$star_length); - if ($size_diff > $MAX_SIZEDIFF) { return $pass;} - - # asymmetry - my $asy=get_asy(\%pairtable,$tag_beg,$tag_end); - if ($asy > $ASYMMETRY) {return $pass;} - - $pass=1; - print FA ">$chr:$strand:$ss..$ee\n$seq\n"; - print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n"; - return $pass; - -} -sub get_star { - my($table,$beg,$end)=@_; - - my ($s1,$e1,$s2,$e2); # s1 pair to s2, e1 pair to e2 - foreach my $i ($beg..$end) { - if (defined $table->{$i}) { - my $j=$table->{$i}; - $s1=$i; - $s2=$j; - last; - } - } - foreach my $i (reverse ($beg..$end)) { - if (defined $table->{$i}) { - my $j=$table->{$i}; - $e1=$i; - $e2=$j; - last; - } - } -# print "$s1,$e1 $s2,$e2\n"; - - # correct terminus - my $off1=$s1-$beg; - my $off2=$end-$e1; - $s2+=$off1; - $s2+=2; # 081009 - $e2-=$off2; $e2=1 if $e2 < 1; - $e2+=2; $e2=1 if $e2 < 1; # 081009 - ($s2,$e2)=($e2,$s2) if ($s2 > $e2); - return ($s2,$e2); -} - -sub define_precursor { - my $seq=shift; - my $tag=shift; - - my $seq_length=length $seq; - my $tag_length=length $tag; - my $tag_beg=index($seq,$tag,0)+1; - my $tag_end=$tag_beg+$tag_length-1; - - # split the candidate region into short arm and long arm - my $tag_arm; - my ($larm,$larm_beg,$larm_end); - my ($sarm,$sarm_beg,$sarm_end); - if ($tag_beg-1 < $seq_length-$tag_end) { # on 5' arm - $sarm=substr($seq,0,$tag_end); - $larm=substr($seq,$tag_end); - $sarm_beg=1; - $sarm_end=$tag_end; - $larm_beg=$tag_end+1; - $larm_end=$seq_length; - $tag_arm="5p"; - } - else { - $larm=substr($seq,0,$tag_beg-1); # on 3' arm - $sarm=substr($seq,$tag_beg-1); - $larm_beg=1; - $larm_end=$tag_beg-1; - $sarm_beg=$tag_beg; - $sarm_end=$seq_length; - $tag_arm="3p"; - } - -# print "$sarm_beg,$sarm_end $sarm\n"; -# print "$larm_beg,$larm_end $larm\n"; - - # clipping short arm - if ($tag_arm eq "5p") { - $sarm_beg=$tag_beg-$flank; $sarm_beg=1 if $sarm_beg < 1; - $sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1); - } - else { - $sarm_end=$tag_end+$flank; $sarm_end=$seq_length if $sarm_end > $seq_length; - $sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1); - } -# print "$sarm_beg,$sarm_end $sarm\n"; -# print "$larm_beg,$larm_end $larm\n"; - - # define the precursor by hybriding short arm to long arm - my $duplex=RNA::duplexfold($sarm,$larm); - my $struct=$duplex->{structure}; - my $energy=sprintf "%.2f", $duplex->{energy}; - my ($str1,$str2)=split(/&/,$struct); - my $pair=$str1=~tr/(//; -# print "pair=$pair\n"; - my $beg1=$duplex->{i}+1-length($str1); - my $end1=$duplex->{i}; - my $beg2=$duplex->{j}; - my $end2=$duplex->{j}+length($str2)-1; -# print "$beg1:$end1 $beg2:$end2\n"; - # transform coordinates - $beg1=$beg1+$sarm_beg-1; - $end1=$end1+$sarm_beg-1; - $beg2=$beg2+$larm_beg-1; - $end2=$end2+$larm_beg-1; -# print "$beg1:$end1 $beg2:$end2\n"; - - my $off5p=$beg1-$sarm_beg; - my $off3p=$sarm_end-$end1; - $beg2-=$off3p; $beg2=1 if $beg2 < 1; - $end2+=$off5p; $end2=$seq_length if $end2 > $seq_length; - -# print "$beg1:$end1 $beg2:$end2\n"; - - my $beg=$sarm_beg < $beg2 ? $sarm_beg : $beg2; - my $end=$sarm_end > $end2 ? $sarm_end : $end2; - - return if $pair < $MIN_PAIR; -# print "$beg,$end\n"; - return ($beg,$end); -} - - -# duplex fold, judge whether two short seqs like a miRNA/miRNA* duplex -sub likeMirDuplex1 { - my $seq1=shift; - my $seq2=shift; - my $like_mir_duplex=1; - - my $length1=length $seq1; - my $length2=length $seq2; - my $duplex=RNA::duplexfold($seq1, $seq2); - my $duplex_struct=$duplex->{structure}; - my $duplex_energy=sprintf "%.2f", $duplex->{energy}; - my ($str1,$str2)=split(/&/,$duplex_struct); - my $beg1=$duplex->{i}+1-length($str1); - my $end1=$duplex->{i}; - my $beg2=$duplex->{j}; - my $end2=$duplex->{j}+length($str2)-1; - - # revise beg1, end1, beg2, end2 - $str1=~/^(\.*)/; - $beg1+=length($1); - $str1=~/(\.*)$/; - $end1-=length($1); - $str2=~/^(\.*)/; - $beg2+=length($1); - $str2=~/(\.*)$/; - $end2-=length($1); - - my $pair_num=$str1=~tr/(//; - my $overhang1=($length2-$end2)-($beg1-1); # 3' overhang at hairpin bottom - my $overhang2=($length1-$end1)-($beg2-1); # 3' overhang at hairpin neck -# print $pair_num,"\n"; -# print $overhang1,"\n"; -# print $overhang2,"\n"; - if ($pair_num < 13) { - $like_mir_duplex=0; - } - if ($overhang1 < 0 || $overhang2 < 0 ) { - $like_mir_duplex=0; - } - if ($overhang1 > 4 || $overhang2 > 4) { - $like_mir_duplex=0; - } - return ($like_mir_duplex,$pair_num,$overhang1,$overhang2); -} - -# judge whether two matures form miR/miR* duplex, in hairpin context -sub likeMirDuplex2 { - my ($table,$beg1,$end1,$beg2,$end2)=@_; - my $like_mir_duplex=1; - -# s1 e1 -# 5 ----------------------------3 -# | | |||| ||| | -#3 -------------------------------5 -# e2 s2 - - my $pair_num=0; - my $overhang1=0; - my $overhang2=0; - my ($s1,$e1,$s2,$e2); - foreach my $i ($beg1..$end1) { - if (defined $table->{$i}) { - my $j=$table->{$i}; - if ($j <= $end2 && $j >= $beg2) { - $s1=$i; - $e2=$j; - last; - } - } - } - foreach my $i (reverse ($beg1..$end1)) { - if (defined $table->{$i}) { - my $j=$table->{$i}; - if ($j <= $end2 && $j >= $beg2) { - $e1=$i; - $s2=$j; - last; - } - } - } - -# print "$beg1,$end1 $s1,$e1\n"; -# print "$beg2,$end2 $s2,$e2\n"; - - foreach my $i ($beg1..$end1) { - if (defined $table->{$i}) { - my $j=$table->{$i}; - if ($j <= $end2 && $j >= $beg2) { - ++$pair_num; - } - } - } - if (defined $s1 && defined $e2) { - $overhang1=($end2-$e2)-($s1-$beg1); - } - if (defined $e1 && defined $s2) { - $overhang2=($end1-$e1)-($s2-$beg2); - } - - if ($pair_num < 13) { - $like_mir_duplex=0; - } - if ($overhang1 < 0 && $overhang2 < 0) { - $like_mir_duplex=0; - } - return ($like_mir_duplex,$pair_num,$overhang1,$overhang2); -} -sub parse_struct { - my $struct=shift; - my $table=shift; - - my @t=split('',$struct); - my @lbs; # left brackets - foreach my $k (0..$#t) { - if ($t[$k] eq "(") { - push @lbs, $k+1; - } - elsif ($t[$k] eq ")") { - my $lb=pop @lbs; - my $rb=$k+1; - $table->{$lb}=$rb; - $table->{$rb}=$lb; - } - } - if (@lbs) { - warn "unbalanced RNA struct.\n"; - } -} -sub which_arm { - my $substruct=shift; - my $arm; - if ($substruct=~/\(/ && $substruct=~/\)/) { - $arm="-"; - } - elsif ($substruct=~/\(/) { - $arm="5p"; - } - else { - $arm="3p"; - } - return $arm; -} -sub biggest_bulge { - my $struct=shift; - my $bulge_size=0; - my $max_bulge=0; - while ($struct=~/(\.+)/g) { - $bulge_size=length $1; - if ($bulge_size > $max_bulge) { - $max_bulge=$bulge_size; - } - } - return $max_bulge; -} -sub get_asy { - my($table,$a1,$a2)=@_; - my ($pre_i,$pre_j); - my $asymmetry=0; - foreach my $i ($a1..$a2) { - if (defined $table->{$i}) { - my $j=$table->{$i}; - if (defined $pre_i && defined $pre_j) { - my $diff=($i-$pre_i)+($j-$pre_j); - $asymmetry += abs($diff); - } - $pre_i=$i; - $pre_j=$j; - } - } - return $asymmetry; -} - -sub peaks{ - my @cluster=@{$_[0]}; - - return if(@cluster<1); - - my $max=0; my $index=-1; - for (my $i=0;$i<@cluster;$i++) { - if($cluster[$i][2]>$max){ - $max=$cluster[$i][2]; - $index=$i; - } - } -# &excise(\@cluster,$index,$_[1],$_[2]); - return($index); -} - -sub find_cites{ - my @tmp=@{$_[0]}; - my $i=&peaks(\@tmp); - - my $start=$tmp[$i][0]; - my $total=0; my $node5=0; - for (my $j=0;$j<@tmp ;$j++) { - $total+=$tmp[$j][2]; - $node5 +=$tmp[$j][2] if($tmp[$j][0]-$start<=2 && $tmp[$j][0]-$start>=-2); - } - push @{$cites{$_[1]}{$_[2]}},$tmp[$i] if($node5/$total>0.80 && $tmp[$i][2]/$node5>0.5); -} - -sub newpos{ - my ($a,$b,$c,$d)=@_; - my $s= $a>$c ? $c : $a; - my $e=$b>$d ? $b : $d; - return($s,$e); -} - -sub rev{ - - my($sequence)=@_; - - my $rev=reverse $sequence; - - return $rev; -} - -sub com{ - - my($sequence)=@_; - - $sequence=~tr/acgtuACGTU/TGCAATGCAA/; - - return $sequence; -} - -sub revcom{ - - my($sequence)=@_; - - my $revcom=rev(com($sequence)); - - return $revcom; -} - -sub find_strand{ - - #A subroutine to find the strand, parsing different blast formats - my($other)=@_; - - my $strand="+"; - - if($other=~/-/){ - $strand="-"; - } - - if($other=~/minus/i){ - $strand="-"; - } - - return($strand); -} -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -map -g -d -f -o -s -e -options: - -map input file# align result # bst. format - -g input file # genome sequence fasta format - -d Maximal space between miRNA and miRNA* (200) - -f Flank sequence length of miRNA precursor (10) - -o output file# percursor fasta file - -s output file# precursor structure file - -e Maximal free energy allowed for a miRNA precursor (-18 kcal/mol) - - -h help -USAGE -exit(1); -} - diff -r 45de5e1ff487 -r 5691802f074b quantify.pl --- a/quantify.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,495 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use File::Path; -use strict; -use File::Basename; -#use Getopt::Std; -use Getopt::Long; -use RNA; - -my %opts; -GetOptions(\%opts,"r=s","p=s","m=s","mis:i","t:i","e:i","f:i","tag:s","o=s","time:s","h"); -if (!(defined $opts{r} and defined $opts{p} and defined $opts{m} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $read=$opts{'r'}; -my $pre=$opts{'p'}; -my $mature=$opts{'m'}; - -my $dir=$opts{'o'}; -unless ($dir=~/\/$/) {$dir .="/";} -if (not -d $dir) { - mkdir $dir; -} - -my $threads=defined $opts{'t'} ? $opts{'t'} : 1; -my $mismatch=defined $opts{'mis'} ? $opts{'mis'} : 0; - -my $upstream = 2; -my $downstream = 5; - -$upstream = $opts{'e'} if(defined $opts{'e'}); -$downstream = $opts{'f'} if(defined $opts{'f'}); - -my $marks=defined $opts{'tag'} ? $opts{'tag'} : ""; - -my $time=Time(); -if (defined $opts{'time'}) { $time=$opts{'time'};} - -my $tmpdir="${dir}/miRNA_Express_${time}"; -if(not -d $tmpdir){ - mkdir($tmpdir); -} -chdir $tmpdir; - -`cp $pre ./`; -my $pre_file_name=basename($pre); - -&mapping(); # matures align to precursors && reads align to precursors; - -my %pre_mature; # $pre_mature{pre_id}{matre_ID}{"mature"}[0]->start; $pre_mature{pre_id}{matre_ID}{"mature"}[1]->end; -&maturePosOnPre(); # acknowledge mature positions on precursor - -my %pre_read; -&readPosOnPre(); # acknowledge reads positions on precursors - -if(!(defined $opts{'tag'})){ - foreach my $key (keys %pre_read) { - $pre_read{$key}[0][0]=~/:([\d|_]+)_x(\d+)$/; - my @ss=split/_/,$1; - for (my $i=1;$i<=@ss;$i++) { - $marks .="Smp$i;"; - } - last; - } -} - -my %pre;## read in precursor sequences #$pre{pre_id}="CGTA...." -&attachPre(); - -my $preno=scalar (keys %pre); -print "Total Precursor Number is $preno !!!!\n"; - -my %struc; #mature star loop; $struc{$key}{"struc"}=$str; $struc{$key}{"mfe"}=$mfe; -&structure(); - - -##### analysis and print out && moRs -my $aln=$dir."known_microRNA_express.aln"; -my $list=$dir."known_microRNA_express.txt"; -my $moRs=$dir."known_microRNA_express.moRs"; - -system("ln $mature $dir/known_microRNA_mature.fa "); -system("ln $pre $dir/known_microRNA_precursor.fa "); - -open ALN,">$aln"; -open LIST,">$list"; -open MORS,">$moRs"; - -$"="\t"; ##### @array print in \t - -my @marks=split/\;/,$marks; -#print LIST "#matueID\tpreID\tpos1\tpos2\tmatureExp\tstarExp\ttotalExp\n"; -print LIST "#matueID\tpreID\tpos1\tpos2"; -for (my $i=0;$i<@marks;$i++) { - print LIST "\t",$marks[$i],"_matureExp"; -} -for (my $i=0;$i<@marks;$i++) { - print LIST "\t",$marks[$i],"_starExp"; -} -for (my $i=0;$i<@marks;$i++) { - print LIST "\t",$marks[$i],"_totalExp"; -} -print LIST "\n"; -print ALN "#>precursor ID \n#precursor sequence\n#precursor structure (mfe)\n#RNA_seq\t@marks\ttotal\n"; -print MORS "#>precursor ID\tstrand\texpress_reads\texpress_reads\/total_reads\tblock_number\tprecursor_sequence\n#\tblock_start\tblock_end\t@marks\ttotal\ttag_number\tsequence\n"; -my %moRs; - -foreach my $key (keys %pre) { - print ALN ">$key\n$pre{$key}\n$struc{$key}{struc} ($struc{$key}{mfe})\n"; - next if(! (exists $pre_read{$key})); - my @array=@{$pre_read{$key}}; - @array=sort{$a->[3]<=> $b->[3]} @array; - - my $length=length($pre{$key}); - - my $maxline=-1;my $max=0; ### storage the maxinum express read line - my $totalReadsNo=0; - my @not_over=(); ### new read format better for moRs analysis - -####print out Aln file start - for (my $i=0;$i<@array;$i++) { - my $maps=$array[$i][3]+1; - my $mape=$array[$i][3]+length($array[$i][4]); - my $str=""; - $str .= "." x ($maps-1); - $str .=$array[$i][4]; - $str .="." x ($length-$mape); - $str .=" "; - - $array[$i][0]=~/:([\d|_]+)_x(\d+)$/; - my @sample=split /\_/,$1; - my $total=$2; - print ALN $str,"@sample","\t",$total,"\n"; - - if($total>$max){$max=$total; $maxline=$i;} - $totalReadsNo+=$total; - - push @not_over,[$key,$maps,$mape,$array[$i][0],$total,"+"]; - } -####print out Aln file end - -#### express list start - my ($ms,$me,$ss,$se); - if (!(exists($pre_mature{$key}))) { - $ms=$array[$maxline][3]+1; - $me=$array[$maxline][3]+length($array[$maxline][4]); - ($ss,$se)=&other_pair($ms,$me,$struc{$key}{'struc'}); - - my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array); - print LIST "$key\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n"; - } - else{ - foreach my $maID (keys %{$pre_mature{$key}}) { - $ms=$pre_mature{$key}{$maID}{"mature"}[0]; - $me=$pre_mature{$key}{$maID}{"mature"}[1]; - $ss=$pre_mature{$key}{$maID}{"star"}[0]; - $se=$pre_mature{$key}{$maID}{"star"}[1]; - my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array); - print LIST "$maID\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n"; - } - } -#### express list end - -#### analysis moRs start - my @result; my @m_texp;my $m_texp=0; ### moRs informations - - while (@not_over>0) { - my @over=@not_over; - @not_over=(); - -#丰度最高tag - my $m_max=0;my $m_maxline=-1;my $m_start=0;my $m_end=0;my $m_exp=0;my @m_exp;my $m_no=1; - for (my $i=0;$i<@over;$i++) { - my @m_array=@{$over[$i]}; - if ($m_max<$m_array[4]) { - $m_max=$m_array[4]; - $m_maxline=$i; - } - } - $m_start=$over[$m_maxline][1]; - $m_end=$over[$m_maxline][2]; - $m_exp=$m_max; - $over[$m_maxline][3]=~/:([\d|_]+)_x(\d+)$/; - my @m_nums=split/_/,$1; - for (my $j=0;$j<@m_nums;$j++) { - $m_exp[$j]=$m_nums[$j]; - } - -#统计以丰度最高tag为坐标的reads, 两端位置差异不超过3nt - for (my $i=0;$i<@over;$i++) { - next if($i==$m_maxline); - my @m_array=@{$over[$i]}; - if (abs($m_array[1]-$m_start)<=3 && abs($m_array[2]-$m_end)<=3) { - $m_exp+=$m_array[4]; - $m_no++; - $m_array[3]=~/:([\d|_]+)_x(\d+)$/; - my @m_nums=split/_/,$1; - for (my $j=0;$j<@m_nums;$j++) { - $m_exp[$j] +=$m_nums[$j]; - } - } - elsif($m_array[1]>=$m_end || $m_array[2]<=$m_start){push @not_over,[@{$over[$i]}];} #去除跨越block的reads - } - if($m_exp>5){### 5个reads - $m_texp+=$m_exp; - for (my $j=0;$j<@m_exp;$j++) { - $m_texp[$j]+=$m_exp[$j]; - } - my $string=&subseq($pre{$key},$m_start,$m_end,"+"); - push @result,"\t$m_start\t$m_end\t@m_exp\t$m_exp\t$m_no\t$string" ; - } - } - - my $str=scalar @result; - my $percent=sprintf("%.2f",$m_texp/$totalReadsNo); - $str=">$key\t+\t$m_texp\t$percent\t".$str."\t$pre{$key}"; - @{$moRs{$str}}=@result; - -#### analysis moRs end -} - -##### moRs print out start -foreach my $key (keys %moRs) { - my @tmp=split/\t/,$key; - next if ($tmp[4]<=2); - next if($tmp[3]<0.95); - my @over; - for (my $i=0;$i<@{$moRs{$key}};$i++) { - my @arrayi=split/\t/,$moRs{$key}[$i]; - for (my $j=0;$j<@{$moRs{$key}};$j++) { - next if($i==$j); - my @arrayj=split/\t/,$moRs{$key}[$j]; - if ((($arrayj[1]-$arrayi[2]>=0 && $arrayj[1]-$arrayi[2] <=3) || ($arrayj[1]-$arrayi[2]>=18 && $arrayj[1]-$arrayi[2] <=25) )||(($arrayi[1]-$arrayj[2]>=0 && $arrayi[1]-$arrayj[2] <=3)||($arrayi[1]-$arrayj[2]>=18 && $arrayi[1]-$arrayj[2] <=25))) { - push @over,$moRs{$key}[$i]; - } - } - } - if (@over>0) { - print MORS "$key\n"; - foreach (@{$moRs{$key}}) { - print MORS "$_\n"; - } - } -} -###### moRs print out end -close ALN; -close LIST; -close MORS; - -$"=" ";##### reset - - -################### Sub programs ################# -sub express{ - my ($ms,$me,$ss,$se,$read)=@_; - my (@mexp,@sexp,@texp); - $$read[0][0]=~/:([_|\d]+)_x(\d+)$/; - my @numsample=split/_/,$1; - for (my $i=0;$i<@numsample;$i++) { - $mexp[$i]=0; - $sexp[$i]=0; - $texp[$i]=0; - } - - for (my $i=0;$i<@{$read};$i++) { - my $start=$$read[$i][3]+1; - my $end=$$read[$i][3]+length($$read[$i][4]); - $$read[$i][0]=~/:([_|\d]+)_x(\d+)$/; - my $expresses=$1; - my @nums=split/_/,$expresses; - - for (my $j=0;$j<@nums;$j++) { - $texp[$j]+=$nums[$j]; - } - if ($start>=$ms && $end<=$me) { - for (my $j=0;$j<@nums;$j++) { - $mexp[$j]+=$nums[$j]; - } - } - if ($start>=$ss && $end<=$se) { - for (my $j=0;$j<@nums;$j++) { - $sexp[$j]+=$nums[$j]; - } - } - } - return(\@mexp,\@sexp,\@texp); -} - -sub structure{ - foreach my $key (keys %pre_mature) { - if (!(defined $pre{$key})){die "!!!!! No precursor sequence $key, please check it!\n";} - my ($str,$mfe)=RNA::fold($pre{$key}); - $struc{$key}{"struc"}=$str; - $struc{$key}{"mfe"}=sprintf ("%.2f",$mfe); - - foreach my $id (keys %{$pre_mature{$key}}) { - ($pre_mature{$key}{$id}{"star"}[0],$pre_mature{$key}{$id}{"star"}[1])=&other_pair($pre_mature{$key}{$id}{"mature"}[0],$pre_mature{$key}{$id}{"mature"}[1],$str); - } -=cut -##### Nucleotide complementary - my @tmp=split//,$str; - my %a2b; - my @bps; - for (my $i=0;$i<@tmp;$i++) { - if ($tmp[$i] eq "("){push @bps,$i+1 ; next;} - if ($tmp[$i] eq ")") { - my $up=pop @bps; - $a2b{$i+1}=$up; - $a2b{$up}=$i+1; - } - } - -##### search star position - foreach my $id (keys %{$pre_mature{$key}}) { - my $n=0; - for (my $i=$pre_mature{$key}{$id}{"mature"}[0];$i<=$pre_mature{$key}{$id}{"mature"}[1] ; $i++) { - if (defined $a2b{$i}) { - my $a=$i; my $b=$a2b{$i}; - if($a>$b){ - $pre_mature{$key}{$id}{"star"}[0]=$b-$n+2; - $pre_mature{$key}{$id}{"star"}[1]=$b-$n+2+($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]); - } - if($a<$b{ - $pre_mature{$key}{$id}{"star"}[1]=$b+$n+2; - $pre_mature{$key}{$id}{"star"}[0]=$b+$n+2-($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]); - } - last; - } - $n++; - } - } -=cut - } -} -sub other_pair{ - my ($start,$end,$structure)=@_; - ##### Nucleotide complementary - my @tmp=split//,$structure; - my %a2b; my @bps; - for (my $i=0;$i<@tmp;$i++) { - if ($tmp[$i] eq "("){push @bps,$i+1 ; next;} - if ($tmp[$i] eq ")") { - my $up=pop @bps; - $a2b{$i+1}=$up; - $a2b{$up}=$i+1; - } - } -##### search star position - my $n=0;my $startpos; my $endpos; - for (my $i=$start;$i<=$end ; $i++) { - if (defined $a2b{$i}) { - my $a=$i; my $b=$a2b{$i}; -# if($a>$b){ -# $startpos=$b-$n+2; -# $endpos=$b-$n+2+($end-$start); -# } -# if($a<$b){ - $endpos=$b+$n+2; - if($endpos>length($structure)){$endpos=length($structure);} - $startpos=$b+$n+2-($end-$start); - if($startpos<1){$startpos=1;} -# } - last; - } - $n++; - } - return ($startpos,$endpos); -} -sub attachPre{ - open IN, "<$pre_file_name"; - my $name; - while (my $aline=) { - chomp $aline; - if ($aline=~/^>(\S+)/) { - $name=$1; - next; - } - $pre{$name} .=$aline; - } - close IN; -} -sub readPosOnPre{ - open IN,") { - chomp $aline; - my @tmp=split/\t/,$aline; - my $id=lc($tmp[2]); - push @{$pre_read{$tmp[2]}},[@tmp]; - } - close IN; -} -sub maturePosOnPre{ - open IN,") { - chomp $aline; - my @tmp=split/\t/,$aline; - my $mm=$tmp[0]; -# $mm=~s/\-3P|\-5P//i; - $mm=lc($mm); - my $pm=$tmp[2]; - $pm=lc($pm); - -# next if ($mm ne $pm);### stringent mapping let7a only allowed to map pre-let7a - next if($mm!~/$pm/); -# print "$tmp[2]\t$tmp[0]\n"; -# $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]-$upstream; -# $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=0 if($pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]<0); -# $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4])-1+$downstream; - $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]+1; - $pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4]); - } - close IN; -} -sub mapping{ - my $err; -## build bowtie index - print STDERR "building bowtie index\n"; - $err = `bowtie-build $pre_file_name miRNA_precursor`; - -## map mature sequences against precursors - print STDERR "mapping mature sequences against index\n"; - $err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature mature_mapped.bwt`; - -## map reads against precursors - print STDERR "mapping read sequences against index\n"; - $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa read_mapped.bwt `; - -} - -sub subseq{ - my $seq=shift; - my $beg=shift; - my $end=shift; - my $strand=shift; - - my $subseq=substr($seq,$beg-1,$end-$beg+1); - if ($strand eq "-") { - $subseq=revcom($subseq); - } - return uc $subseq; -} - -sub revcom{ - my $seq=shift; - $seq=~tr/ATCGatcg/TAGCtagc/; - $seq=reverse $seq; - return uc $seq; -} - -sub Time{ - my $time=time(); - my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; - $month++; - $year+=1900; - if (length($sec) == 1) {$sec = "0"."$sec";} - if (length($min) == 1) {$min = "0"."$min";} - if (length($hour) == 1) {$hour = "0"."$hour";} - if (length($day) == 1) {$day = "0"."$day";} - if (length($month) == 1) {$month = "0"."$month";} - #print "$year-$month-$day $hour:$min:$sec\n"; - return("$year-$month-$day-$hour-$min-$sec"); -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -r -p -m -mis -t -e -f -tag -o -time -mandatory parameters: --p precursor.fa miRNA precursor sequences from miRBase # must be absolute path --m mature.fa miRNA sequences from miRBase # must be absolute path --r reads.fa your read sequences #must be absolute path - --o output directory - -options: --mis [int] number of allowed mismatches when mapping reads to precursors, default 0 --t [int] threads number,default 1 --e [int] number of nucleotides upstream of the mature sequence to consider, default 2 --f [int] number of nucleotides downstream of the mature sequence to consider, default 5 --tag [string] sample marks# eg. sampleA;sampleB;sampleC --time sting #make directory time,default is the local time --h help -USAGE -exit(1); -} - diff -r 45de5e1ff487 -r 5691802f074b rfam.pl --- a/rfam.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","time:s","h"); -if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $dir=$opts{'o'}; -unless ($dir=~/\/$/) {$dir.="/";} -my $rfam=$opts{'ref'}; -my $mis=defined $opts{'v'}? $opts{'v'} : 0; -my $index=defined $opts{'index'} ? $opts{'index'} : ""; -my $threads=defined $opts{'p'} ? $opts{'p'} : 1; - -if (not -d $dir) { - mkdir $dir; -} - - -my $time=Time(); -if (defined $opts{'time'}) { - $time=$opts{'time'}; -} -my $mapdir=$dir."/rfam_match_".$time; -if(not -d $mapdir){ - mkdir $mapdir; -} -chdir $mapdir; -###check genome index -if (-s $index.".1.ebwt") { -}else{ - &checkACGT($rfam); - `bowtie-build $rfam`; - $index="$rfam"; -} -### genome mapping -`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt`; - -sub checkACGT{ - my $string; - open IN,"<$rfam"; - while (my $aline=) { - if ($aline!~/^>/) { - $aline=~s/U/T/gi; - } - $string .=$aline; - } - close IN; - $rfam=basename($rfam); - open OUT, ">$rfam"; - print OUT $string; - close OUT; -} - -sub Time{ - my $time=time(); - my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; - $month++; - $year+=1900; - if (length($sec) == 1) {$sec = "0"."$sec";} - if (length($min) == 1) {$min = "0"."$min";} - if (length($hour) == 1) {$hour = "0"."$hour";} - if (length($day) == 1) {$day = "0"."$day";} - if (length($month) == 1) {$month = "0"."$month";} - #print "$year-$month-$day $hour:$min:$sec\n"; - return("$year-$month-$day-$hour-$min-$sec"); -} -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -options: --i input file# input reads fasta/fastq file --ref input file# rfam file, which do not contain miRNAs --index file-prefix #(must be indexed by bowtie-build) The parameter - string must be the prefix of the bowtie index. For instance, if - the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then - the prefix is 'h_sapiens_37_asm'.##can be null --v report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; - --p/--threads number of alignment threads to launch (default: 1) - --o output directory --time sting #make directory time,default is the local time --h help -USAGE -exit(1); -} - diff -r 45de5e1ff487 -r 5691802f074b tool_dependencies.xml --- a/tool_dependencies.xml Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ - - - - - - - - - - $REPOSITORY_INSTALL_DIR - - - - - - - - http://web.mit.edu/seven/src/ViennaRNA-1.5beta.tar.gz - ./configure --prefix=$INSTALL_DIR --datadir=$INSTALL_DIR - make - make install - - $INSTALL_DIR/bin - - - - -