annotate rfam.pl @ 18:22d79320085c draft

Uploaded
author big-tiandm
date Thu, 30 Oct 2014 21:31:55 -0400
parents 318617877a10
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Tian Dongmei
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
4 #Email: tiandm@big.ac.cn
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2013/7/19
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
7 #Description:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
9
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
10 use strict;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
11 use Getopt::Long;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
12 use File::Basename;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
13
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
14 my %opts;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
15 GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","h");
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
16 if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
17 &usage;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
18 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
19
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
20 my $filein=$opts{'i'};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
21 my $fileout=$opts{'o'};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
22 unless ($fileout=~/\/$/) {$fileout.="/";}
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
23 my $rfam=$opts{'ref'};
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
24 my $mis=defined $opts{'v'}? $opts{'v'} : 0;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
25 my $index=defined $opts{'index'} ? $opts{'index'} : "";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
26 my $threads=defined $opts{'p'} ? $opts{'p'} : 1;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
27
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
28
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
29 #my $time=time();
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
30
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
31 #my $mapdir=$fileout."/rfam_match_".$time;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
32 my $mapdir=$fileout."/rfam_match";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
33 mkdir $mapdir;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
34 chdir $mapdir;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
35 ###check genome index
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
36 if (-s $index.".1.ebwt") {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
37 }else{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
38 &checkACGT($rfam);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
39 `bowtie-build $rfam rfam`;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
40 $index="rfam";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
41 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
42
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
43 #chdir "rfam_match_1397022331";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
44 ### genome mapping
12
318617877a10 Uploaded
big-tiandm
parents: 0
diff changeset
45 `bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`;
0
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
46
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
47 sub checkACGT{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
48 my $string;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
49 open IN,"<$rfam";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
50 while (my $aline=<IN>) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
51 if ($aline!~/^>/) {
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
52 $aline=~s/U/T/gi;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
53 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
54 $string .=$aline;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
55 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
56 close IN;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
57 $rfam=basename($rfam);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
58 open OUT, ">$rfam";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
59 print OUT $string;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
60 close OUT;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
61 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
62
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
63 sub usage{
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
64 print <<"USAGE";
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
65 Version $version
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
66 Usage:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
67 $0 -i -o
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
68 options:
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
69 -i input file# input reads fasta/fastq file
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
70 -ref input file# rfam file, which do not contain miRNAs
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
71 -index file-prefix #(must be indexed by bowtie-build) The parameter
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
72 string must be the prefix of the bowtie index. For instance, if
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
73 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
74 the prefix is 'h_sapiens_37_asm'.##can be null
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
75 -v <int> report end-to-end hits w/ <=v mismatches; ignore qualities,default 0;
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
76
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
77 -p/--threads <int> number of alignment threads to launch (default: 1)
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
78
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
79 -o output directory
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
80
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
81 -h help
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
82 USAGE
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
83 exit(1);
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
84 }
07745c0958dd Uploaded
big-tiandm
parents:
diff changeset
85