annotate matching.pl @ 37:9ae0d25e4169 draft

Uploaded
author big-tiandm
date Thu, 31 Jul 2014 03:08:35 -0400
parents 7321a6f82492
children 0c4e11018934
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Tian Dongmei
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
4 #Email: tiandm@big.ac.cn
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2013/7/19
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
7 #Description:
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
9
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
10 use strict;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
11 use Getopt::Long;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
12
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
13 my %opts;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
14 GetOptions(\%opts,"i=s","g=s","index:s","v:i","p:i","r:s","o=s","time:s","h");
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
15 if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
16 &usage;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
17 }
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
18
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
19 my $filein=$opts{'i'};
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
20 my $fileout=$opts{'o'};
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
21 unless ($fileout=~/\/$/) {$fileout.="/";}
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
22 my $genome=$opts{'g'};
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
23 my $mis=defined $opts{'v'}? $opts{'v'} : 0;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
24 my $hits=defined $opts{'r'}? $opts{'r'} : 25;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
25 my $index=defined $opts{'index'} ? $opts{'index'} : "";
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
26 my $threads=defined $opts{'p'} ? $opts{'p'} : 1;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
27
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
28
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
29 my $time=&Time();
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
30 if (defined $opts{'time'}) {
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
31 $time=$opts{'time'};
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
32 }
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
33
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
34 my $mapdir=$fileout."/genome_match_".$time;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
35 if(not -d $mapdir){
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
36 mkdir $mapdir;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
37 }
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
38 chdir $mapdir;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
39 ###check genome index
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
40 if (-s $index.".1.ebwt") {
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
41 }else{
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
42 `bowtie-build $genome $genome`;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
43 $index="$genome";
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
44 }
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
45
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
46 ### genome mapping
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
47 `bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa --max genome_mapped_Mlimit.fa > genome_mapped.bwt`;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
48
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
49 #`convert_bowtie_to_blast.pl genome_mapped.bwt genome_mapped.fa $genome > genome_mapped.bst`;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
50
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
51 sub Time{
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
52 my $time=time();
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
53 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
54 $month++;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
55 $year+=1900;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
56 if (length($sec) == 1) {$sec = "0"."$sec";}
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
57 if (length($min) == 1) {$min = "0"."$min";}
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
58 if (length($hour) == 1) {$hour = "0"."$hour";}
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
59 if (length($day) == 1) {$day = "0"."$day";}
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
60 if (length($month) == 1) {$month = "0"."$month";}
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
61 #print "$year-$month-$day $hour:$min:$sec\n";
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
62 return("$year-$month-$day-$hour-$min-$sec");
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
63 }
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
64
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
65 sub usage{
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
66 print <<"USAGE";
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
67 Version $version
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
68 Usage:
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
69 $0 -i -o
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
70 options:
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
71 -i input file# input reads fasta/fastq file
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
72 -g input file# genome file
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
73 -index file-prefix #(must be indexed by bowtie-build) The parameter
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
74 string must be the prefix of the bowtie index. For instance, if
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
75 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
76 the prefix is 'h_sapiens_37_asm'.##can be null
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
77 -v <int> report end-to-end hits w/ <=v mismatches; ignore qualities,default 0;
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
78
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
79 -p/--threads <int> number of alignment threads to launch (default: 1)
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
80
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
81 -r int a read is allowed to map up to this number of positions in the genome
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
82 default is 25
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
83
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
84 -o output directory
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
85
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
86 -h help
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
87 USAGE
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
88 exit(1);
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
89 }
7321a6f82492 Uploaded
big-tiandm
parents:
diff changeset
90