0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 if(@ARGV<9) { print "usage: $0 [options] -L=<read_length> -i=<input_file> -U=<EUMA_prefix(fullpath, before .gEUMA or .iEUMA)> --g2m=<gene2NM_file> --g2s=<gene2symbol_file> -b=<bowtie_dir(eg.bin/bowtie-0.12.7)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>\n\nOrder of arguments can be interchangeable.\n\n"; exit; }
|
|
4
|
|
5
|
|
6
|
|
7 ## options ##
|
|
8 ($fastoption)= grep /^-f=/, @ARGV;
|
|
9 if(!defined($fastoption)){$fastoption='q'; }
|
|
10 else { $fastoption =~s/^-f=//; if($fastoption !~/^[fq]$/) { die "wrong file type (-f).\n"; } }
|
|
11
|
|
12 ($coding_option)= grep /^-c=/, @ARGV;
|
|
13 if(!defined($coding_option)){$coding_option='n'; }
|
|
14 else { $coding_option =~s/^-c=//; if($coding_option !~/^[nc]$/) { die "wrong coding option (-c).\n"; } }
|
|
15
|
|
16 ($datatype)= grep /^-d=/, @ARGV;
|
|
17 if(!defined($datatype)){$datatype='R'; }
|
|
18 else { $datatype =~s/^-d=//; if($datatype !~/^[RE]$/) { die "wrong data type (-d).\n"; } }
|
|
19
|
|
20 ($parallel)= grep /^-p=/, @ARGV;
|
|
21 if(!defined($parallel)){$parallel=1; }
|
|
22 else { $parallel =~s/^-p=//; if($parallel !~/^[\d]+$/) { die "wrong parallel (multi-thread) option (-p).\n"; } }
|
|
23
|
|
24 ($EUMAcut)= grep /^-t=/, @ARGV;
|
|
25 if(!defined($EUMAcut)){$EUMAcut=50; }
|
|
26 else { $EUMAcut =~s/^-t=//; if($EUMAcut !~/^[\d\.]+$/) { die "wrong EUMAcut (-t).\n"; } }
|
|
27
|
|
28 ($StrandSpecificity)= grep /^--str=/, @ARGV;
|
|
29 if(!defined($StrandSpecificity)){ $StrandSpecificity='N'; }
|
|
30 else { $StrandSpecificity =~s/^--str=//; if($StrandSpecificity !~/^[SRN]+$/) { die "wrong strand specificity (--str).\n"; } }
|
|
31
|
|
32 ($get_gReadcount)= grep /^--gReadcount/, @ARGV;
|
|
33 if(!defined($get_gReadcount)){ $get_gReadcount=0; }
|
|
34 else { $get_gReadcount=1; }
|
|
35
|
|
36 ($mm)= grep /^--mm=/, @ARGV;
|
|
37 if(!defined($mm)){ $mm=0; }
|
|
38 else { $mm =~s/^--mm=//; if($mm !~/^[\d\.]+$/) { die "ERROR: wrong number of mismatches (--mm).\n"; } }
|
|
39
|
|
40 ($noNIR)= grep /^--noNIR/, @ARGV;
|
|
41 if(!defined($noNIR)){ $noNIR=0; }
|
|
42 else { $noNIR=1; }
|
|
43
|
|
44 ($noNEUMA)= grep /^--noNEUMA/, @ARGV;
|
|
45 if(!defined($noNEUMA)){ $noNEUMA=0; }
|
|
46 else { $noNEUMA=1; }
|
|
47
|
|
48 if($noNIR==1 && $noNEUMA==0) { die "ERROR: --noNIR must be used together with --noNEUMA.\n"; }
|
|
49
|
|
50
|
|
51
|
|
52
|
|
53 ## required arguments ##
|
|
54
|
|
55 ($READ_LENGTH)= grep /^-L=/, @ARGV;
|
|
56 if(!defined($READ_LENGTH)){ die "READ_LENGTH must be specified (-L).\n"; }
|
|
57 else { $READ_LENGTH =~s/^-L=//; if($READ_LENGTH !~/^[\d]+$/) { die "wrong READ_LENGTH (-L).\n"; } }
|
|
58
|
|
59 ($input_fa)= grep /^-i=/, @ARGV;
|
|
60 if(!defined($input_fa)){ die "input_fa must be specified (-i).\n"; }
|
|
61 else { $input_fa =~s/^-i=//; if(!-e $input_fa) { die "wrong input_file (-i).\n"; } }
|
|
62
|
|
63 ($EUMAprefix)= grep /^-U=/, @ARGV;
|
|
64 if(!defined($EUMAprefix)){ die "EUMAprefix must be specified (-U).\n"; }
|
|
65 else { $EUMAprefix =~s/^-U=//; if(!-e $EUMAprefix.".gEUMA" || !-e $EUMAprefix.".iEUMA") { die "wrong EUMAprefix (-U).\n"; } }
|
|
66
|
|
67 ($gene2NM_file)= grep /^--g2m=/, @ARGV;
|
|
68 if(!defined($gene2NM_file)){ die "gene2NM_file must be specified (--g2m).\n"; }
|
|
69 else { $gene2NM_file =~s/^--g2m=//; if(!-e $gene2NM_file) { die "wrong gene2NM_file (--g2m).\n"; } }
|
|
70
|
|
71 ($gene2symbol_file)= grep /^--g2s=/, @ARGV;
|
|
72 if(!defined($gene2symbol_file)){ die "gene2symbol_file must be specified (--g2s).\n"; }
|
|
73 else { $gene2symbol_file =~s/^--g2s=//; if(!-e $gene2symbol_file) { die "wrong gene2symbol_file (--g2s).\n"; } }
|
|
74
|
|
75 ($bowtie_dir)= grep /^-b=/, @ARGV;
|
|
76 if(!defined($bowtie_dir)){ die "bowtie_dir must be specified (-b).\n"; }
|
|
77 else { $bowtie_dir =~s/^-b=//; if(!-d $bowtie_dir || !-e "$bowtie_dir/bowtie") { die "wrong bowtie_dir(please avoid using '~') (-b).\n"; } }
|
|
78
|
|
79 ($bowtieindex)= grep /^--bi=/, @ARGV;
|
|
80 if(!defined($bowtieindex)){ die "bowtieindex must be specified (--bi).\n"; }
|
|
81 else { $bowtieindex =~s/^--bi=//; if(!-e "$bowtieindex.1.ebwt") { die "wrong bowtieindex (--bi).\n"; } }
|
|
82
|
|
83 ($basedir)= grep /^-o=/, @ARGV;
|
|
84 if(!defined($basedir)){ die "output dir must be specified (-o).\n"; }
|
|
85 else { $basedir =~s/^-o=//; if($basedir=~/~/) { die "wrong base_dir(please avoid using '~') (-o).\n"; } }
|
|
86
|
|
87 ($sample)= grep /^-s=/, @ARGV;
|
|
88 if(!defined($sample)){ die "sample name must be specified (-s).\n"; }
|
|
89 else { $sample =~s/^-s=//; }
|
|
90
|
|
91
|
|
92 ##############
|
|
93
|
|
94
|
|
95 my $coding='';
|
|
96 if($coding_option eq 'n') {$coding='';}
|
|
97 elsif($coding_option eq 'c') {$coding='-C';}
|
|
98 else { die "Error: invalid coding option.\n"; }
|
|
99
|
|
100 if($datatype eq 'R') { $mapping_stat_column = 1; }
|
|
101 elsif($datatype eq 'E') { $mapping_stat_column = 3; }
|
|
102 else { die "Error: wrong datatype.\n"; }
|
|
103
|
|
104 if($StrandSpecificity eq 'S') { $bowtie_strand_option = "--norc"; }
|
|
105 elsif($StrandSpecificity eq 'R') { $bowtie_strand_option = "--nofc"; }
|
|
106 else { $bowtie_strand_option = ""; }
|
|
107
|
|
108
|
|
109 ($scriptdir) = $0 =~ /(.+)\/[^\/]+$/;
|
|
110
|
|
111 $bowtieout_dir = "$basedir/bowtieout";
|
|
112 $readcount_dir = "$basedir/readcount";
|
|
113 $FVKMdir = "$basedir/FVKM";
|
|
114 $LVKMdir = "$basedir/LVKM";
|
|
115
|
|
116 $bowtieoutfile = "$bowtieout_dir/$sample.single.mm$mm.bowtieout";
|
|
117 $bestbowtieoutfile = "$bowtieout_dir/$sample.single.mm$mm.best.bowtieout";
|
|
118 $mapping_stat_file = "$basedir/mapping_stat.$sample.single";
|
|
119
|
|
120 `mkdir -p $bowtieout_dir`;
|
|
121 `mkdir -p $readcount_dir`;
|
|
122 `mkdir -p $FVKMdir`;
|
|
123 `mkdir -p $LVKMdir`;
|
|
124
|
|
125 `dos2unix $gene2NM_file`;
|
|
126 `dos2unix $gene2symbol_file`;
|
|
127
|
|
128
|
|
129 #$perlcommand = "perl -I $scriptdir";
|
|
130 $perlcommand = "perl";
|
|
131 $bestbowtieout_script = "$scriptdir/filter.best.from.bowtieout.3.pl";
|
|
132 $bowtieout2mappingstat_script = "$scriptdir/bowtieout2mappingstat.3.pl";
|
|
133 $bowtieout2readcount_script = "$scriptdir/bowtie2genecount.11.pl";
|
|
134 $NIR2LVKM_script = " $scriptdir/NIR2LVKM_SE.pl";
|
|
135
|
|
136 =cut
|
|
137 print STDERR "Mapping reads using bowtie...\n";
|
|
138 `$bowtie_dir/bowtie -$fastoption $coding -a -v $mm --suppress 5,6,7 -p $parallel -m 100 $bowtie_strand_option $bowtieindex $input_fa > $bowtieoutfile`;
|
|
139
|
|
140
|
|
141
|
|
142 print STDERR "Filtering best-matching alignments...\n";
|
|
143 if($mm > 0) {
|
|
144 system("$perlcommand $bestbowtieout_script -d $datatype --rm $bowtieoutfile 0");
|
|
145 }
|
|
146 else {
|
|
147 system("mv $bowtieoutfile $bestbowtieoutfile");
|
|
148 }
|
|
149
|
|
150 print STDERR "Computing the mapping stat...\n";
|
|
151 system("$perlcommand $bowtieout2mappingstat_script -d $datatype $bestbowtieoutfile $sample 0 > $mapping_stat_file");
|
|
152 print STDERR "Mapping stat :\n";
|
|
153 system("cat $mapping_stat_file");
|
|
154
|
|
155 if($noNIR==0){
|
|
156 system("$perlcommand $bowtieout2readcount_script -d $datatype --gNIR -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.gNIR");
|
|
157 system("$perlcommand $bowtieout2readcount_script -d $datatype --iNIR -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.iNIR");
|
|
158 }
|
|
159 =cut
|
|
160 #if($get_gReadcount==1) {
|
|
161 system("$perlcommand $bowtieout2readcount_script -d $datatype -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.gReadcount");
|
|
162 #}
|
|
163
|
|
164
|
|
165 =cut
|
|
166
|
|
167 if($noNEUMA==0){
|
|
168 print STDERR "Computing FVKM and LVKM...\n";
|
|
169 `$perlcommand $NIR2LVKM_script $sample $readcount_dir mm$mm $EUMAprefix $FVKMdir $LVKMdir $EUMAcut $mapping_stat_column $mapping_stat_file 0 2 $gene2NM_file $gene2symbol_file $datatype $scriptdir`;
|
|
170 }
|
|
171
|
|
172 print STDERR "Done.\n";
|
|
173
|
|
174
|
|
175
|
|
176
|