view rfam.pl @ 42:3b9a6381e6bf draft

Uploaded
author big-tiandm
date Tue, 28 Oct 2014 01:35:17 -0400
parents 9ae0d25e4169
children 0c4e11018934
line wrap: on
line source

#!/usr/bin/perl -w
#Filename:
#Author: Tian Dongmei
#Email: tiandm@big.ac.cn
#Date: 2013/7/19
#Modified:
#Description: 
my $version=1.00;

use strict;
use Getopt::Long;
use File::Basename;

my %opts;
GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","time:s","h");
if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
&usage;
}

my $filein=$opts{'i'};
my $dir=$opts{'o'};
unless ($dir=~/\/$/) {$dir.="/";}
my $rfam=$opts{'ref'};
my $mis=defined $opts{'v'}? $opts{'v'} : 0;
my $index=defined $opts{'index'} ? $opts{'index'} : "";
my $threads=defined $opts{'p'} ? $opts{'p'} : 1;

if (not -d $dir) {
	mkdir $dir;
}


my $time=Time();
if (defined $opts{'time'}) {
	$time=$opts{'time'};
}
my $mapdir=$dir."/rfam_match_".$time;
if(not -d $mapdir){
	mkdir $mapdir;
}
chdir $mapdir;
###check genome index
if (-s $index.".1.ebwt") {
}else{
	&checkACGT($rfam);
	`bowtie-build $rfam $rfam`;
	$index="$rfam";
}
### genome mapping
`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt`;

sub checkACGT{
	my $string;
	open IN,"<$rfam";
	while (my $aline=<IN>) {
		if ($aline!~/^>/) {
			$aline=~s/U/T/gi;
		}
		$string .=$aline;	
	}
	close IN;
	$rfam=basename($rfam);
	open OUT, ">$rfam";
	print OUT $string;
	close OUT;
}

sub Time{
        my $time=time();
        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
        $month++;
        $year+=1900;
        if (length($sec) == 1) {$sec = "0"."$sec";}
        if (length($min) == 1) {$min = "0"."$min";}
        if (length($hour) == 1) {$hour = "0"."$hour";}
        if (length($day) == 1) {$day = "0"."$day";}
        if (length($month) == 1) {$month = "0"."$month";}
        #print "$year-$month-$day $hour:$min:$sec\n";
        return("$year-$month-$day-$hour-$min-$sec");
}
sub usage{
print <<"USAGE";
Version $version
Usage:
$0 -i -o
options:
-i input file# input reads fasta/fastq file
-ref input file# rfam file, which do not contain miRNAs
-index file-prefix #(must be indexed by bowtie-build) The parameter
                string must be the prefix of the bowtie index. For instance, if
                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
                the prefix is 'h_sapiens_37_asm'.##can be null
-v <int>           report end-to-end hits w/ <=v mismatches; ignore qualities,default 0;

-p/--threads <int> number of alignment threads to launch (default: 1)

-o output directory 
-time sting #make directory time,default is the local time
-h help
USAGE
exit(1);
}