diff rfam.pl @ 23:cad6a1dfb174 draft

Uploaded
author big-tiandm
date Wed, 05 Nov 2014 21:11:49 -0500
parents 9dcffd531c76
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rfam.pl	Wed Nov 05 21:11:49 2014 -0500
@@ -0,0 +1,85 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","h");
+if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+unless ($fileout=~/\/$/) {$fileout.="/";}
+my $rfam=$opts{'ref'};
+my $mis=defined $opts{'v'}? $opts{'v'} : 0;
+my $index=defined $opts{'index'} ? $opts{'index'} : "";
+my $threads=defined $opts{'p'} ? $opts{'p'} : 1;
+
+
+#my $time=time();
+
+#my $mapdir=$fileout."/rfam_match_".$time;
+my $mapdir=$fileout."/rfam_match";
+mkdir $mapdir;
+chdir $mapdir;
+###check genome index
+if (-s $index.".1.ebwt") {
+}else{
+	&checkACGT($rfam);
+	`bowtie-build $rfam rfam`;
+	$index="rfam";
+}
+
+#chdir "rfam_match_1397022331";
+### genome mapping
+`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`;
+
+sub checkACGT{
+	my $string;
+	open IN,"<$rfam";
+	while (my $aline=<IN>) {
+		if ($aline!~/^>/) {
+			$aline=~s/U/T/gi;
+		}
+		$string .=$aline;	
+	}
+	close IN;
+	$rfam=basename($rfam);
+	open OUT, ">$rfam";
+	print OUT $string;
+	close OUT;
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o
+options:
+-i input file# input reads fasta/fastq file
+-ref input file# rfam file, which do not contain miRNAs
+-index file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+-v <int>           report end-to-end hits w/ <=v mismatches; ignore qualities,default 0;
+
+-p/--threads <int> number of alignment threads to launch (default: 1)
+
+-o output directory 
+
+-h help
+USAGE
+exit(1);
+}
+