view miRNA_Express_and_sequence.pl @ 20:d88fc3c0084e draft

Uploaded
author big-tiandm
date Fri, 25 Jul 2014 05:32:24 -0400
parents d1cc2e6ecf90
children
line wrap: on
line source

#!/usr/bin/perl -w
#Filename:
#Author: Tian Dongmei
#Email: tiandm@big.ac.cn
#Date: 2014-6-4
#Modified:
#Description: solexa miRNA express and sequence
my $version=1.00;

use strict;
use Getopt::Long;

my %opts;
GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h");
if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments
&usage;
}

my $filein=$opts{'i'};
my $fileout=$opts{'list'};
my $out=$opts{'fa'};
my $preout=$opts{'pre'};

=cut
my %hash_pri;
open PRI,"<$opts{p}";
while (my $aline=<PRI>) {
	chomp $aline;
	if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;}
}
close PRI;
=cut

open IN,"<$filein"; #input file  
open OUT,">$fileout"; #output file  
open FA ,">$out";
open PRE,">$preout";

print OUT "#ID\tcoordinate\tpos1\tpos2";
my @marks=split/\,/,$opts{'tag'};
foreach  (@marks) {
	print OUT "\t",$_,"_matureExp";
}
foreach  (@marks) {
	print OUT "\t",$_,"_starExp";
}
foreach  (@marks) {
	print OUT "\t",$_,"_totalExp";
}

print OUT "\n";

my (%uniq_id,$novel);
while (my $aline=<IN>) {
	chomp $aline;
	until ($aline =~ /^score\s+[-\d\.]+/){
		$aline = <IN>;
		if (eof) {last;}
	}
	if (eof) {last;}
########## miRNA ID ################
	$novel++;
########### annotate####################
	do {$aline=<IN>;} until($aline=~/flank_first_end/) ;
	chomp $aline;
	my @flank1=split/\t/,$aline;
	do {$aline=<IN>;} until($aline=~/flank_second_beg/) ;
	chomp $aline;
	my @flank2=split/\t/,$aline;
#		
########## mature start loop pre ####
	do {$aline=<IN>;} until($aline=~/mature_beg/) ;
	chomp $aline;
	my @start=split/\t/,$aline;
#	$start[1] -=$flank1[1];
	do {$aline=<IN>;} until($aline=~/mature_end/) ;
	chomp $aline;
	my @end=split/\t/,$aline;
#	$end[1] -=$flank1[1];
	do {$aline=<IN>;} until($aline=~/mature_seq/) ;
	chomp $aline;
	my @arr1=split/\t/,$aline;
	do {$aline=<IN>;} until($aline=~/pre_seq/) ;
	chomp $aline;
	my @arr2=split/\t/,$aline;
	do {$aline=<IN>;} until($aline=~/pri_id/) ;
	chomp $aline;
	my @pri_id=split/\t/,$aline;
	do {$aline=<IN>;} until($aline=~/pri_seq/) ;
	chomp $aline;
	my @pri_seq=split/\t/,$aline;
	do {$aline=<IN>;} until($aline=~/star_beg/) ;
	chomp $aline;
	my @star_start=split/\t/,$aline;
#	$star_start[1] -=$flank1[1];
	do {$aline=<IN>;} until($aline=~/star_end/) ;
	chomp $aline;
	my @star_end=split/\t/,$aline;
#	$star_end[1] -=$flank1[1];
	do {$aline=<IN>;} until($aline=~/star_seq/) ;
	chomp $aline;
	my @arr3=split/\t/,$aline;
	print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t";
	#print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t";
	print FA ">miR-c-$novel\n$arr1[1]\n";
	print PRE ">miR-c-$novel\n$pri_seq[1]\n";
########## reads count #############
	<IN>;
	my @count1;my @count2;my @count3;my @count4;
	$aline=<IN>;
	do {
		chomp $aline;
		my @reads=split/\t/,$aline;
		my @pos=();
		$reads[5]=~/(\d+)\.\.(\d+)/;
#		$pos[0] =$1-$flank1[1];
#		$pos[1] =$2-$flank1[1];
		$pos[0]=$1;
		$pos[1]=$2;
		$reads[0]=~/:([\d|_]+)_x(\d+)$/;
		my @ss=split/_/,$1;
		for (my $i=0;$i<@ss ;$i++) {
			if (!(defined $count3[$i])) {
				$count3[$i]=0;
			}
			if (!(defined $count4[$i])) {
				$count4[$i]=0;
			}
			$count2[$i]+=$ss[$i];

		}
#		$count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 );
#		$count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 );
#		$count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1);
#		$count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1);
		if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 )
		{
			for (my $i=0;$i<@ss;$i++) {
				$count3[$i]+=$ss[$i];
			}
		}
		if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){
			for (my $i=0;$i<@ss;$i++) {
				$count4[$i]+=$ss[$i];
			}
		} 
		$aline=<IN>;
		chomp $aline;
	} until(length $aline < 1) ;
	$"="\t";
	print OUT "@count3\t@count4\t@count2\n";
	$"=" ";
}

close IN;
close OUT;

sub usage{
print <<"USAGE";
Version $version
Usage:
$0 -i -list -fa -pre -tag
options:
-i input file,predictions file
-list output file miRNA list file
-fa output file ,miRNA sequence fasta file.
-pre output file, miRNA precursor fasta file.
-tag string, sample names# eg: samA,samB,samC
-h help
USAGE
exit(1);
}