annotate NEUMA-1.2.1/auto_NEUMA_PE.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
1 #!/usr/bin/perl
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
2
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
3 if(@ARGV<8) { print "usage: $0 [options] -L=<read_length> -D=<maxdist> -1=<input_file1(mate1)> -2=<input_file2(mate2)> -U=<Utable_prefix(fullpath, before .gU.table or .iU.table)> --g2m=<gene2NM_file> --g2s=<gene2symbol_file> -b=<bowtie_dir(eg.bin/bowtie-0.12.7)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>\n\nOrder of arguments can be interchangeable.\n\n"; exit; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
4
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
5
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
6 ## options ##
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
7 ($fastoption)= grep /^-f=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
8 if(!defined($fastoption)){$fastoption='q'; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
9 else { $fastoption =~s/^-f=//; if($fastoption !~/^[fq]$/) { die "ERROR: wrong file type (-f).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
10
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
11 ($coding_option)= grep /^-c=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
12 if(!defined($coding_option)){$coding_option='n'; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
13 else { $coding_option =~s/^-c=//; if($coding_option !~/^[nc]$/) { die "ERROR: wrong coding option (-c).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
14
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
15 ($datatype)= grep /^-d=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
16 if(!defined($datatype)){$datatype='R'; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
17 else { $datatype =~s/^-d=//; if($datatype !~/^[RE]$/) { die "ERROR: wrong data type (-d).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
18
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
19 ($parallel)= grep /^-p=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
20 if(!defined($parallel)){$parallel=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
21 else { $parallel =~s/^-p=//; if($parallel !~/^[\d]+$/) { die "ERROR: wrong parallel (multi-thread) option (-p).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
22
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
23 ($EUMAcut)= grep /^-t=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
24 if(!defined($EUMAcut)){$EUMAcut=50; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
25 else { $EUMAcut =~s/^-t=//; if($EUMAcut !~/^[\d\.]+$/) { die "ERROR: wrong EUMAcut (-t).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
26
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
27 ($StrandSpecificity)= grep /^--str=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
28 if(!defined($StrandSpecificity)){ $StrandSpecificity='N'; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
29 else { $StrandSpecificity =~s/^--str=//; if($StrandSpecificity !~/^[SN]+$/) { die "ERROR: wrong strand specificity (--str).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
30
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
31 ($skip_init)= grep /^--skip_init/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
32 if(!defined($skip_init)){ $skip_init=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
33 else { $skip_init=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
34
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
35 ($only_init)= grep /^--only_init/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
36 if(!defined($only_init)){ $only_init=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
37 else { $only_init=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
38
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
39 if($only_init==1 && $skip_init==1) { die "ERROR: --only_init and --skip_init cannot be used simultaneously.\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
40
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
41 ($noNIR)= grep /^--noNIR/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
42 if(!defined($noNIR)){ $noNIR=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
43 else { $noNIR=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
44
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
45 ($noNEUMA)= grep /^--noNEUMA/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
46 if(!defined($noNEUMA)){ $noNEUMA=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
47 else { $noNEUMA=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
48
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
49 if($noNIR==1 && $noNEUMA==0) { die "ERROR: --noNIR must be used together with --noNEUMA.\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
50
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
51 ($get_gReadcount)= grep /^--gReadcount/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
52 if(!defined($get_gReadcount)){ $get_gReadcount=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
53 else { $get_gReadcount=1; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
54
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
55 ($mm)= grep /^--mm=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
56 if(!defined($mm)){ $mm=0; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
57 else { $mm =~s/^--mm=//; if($mm !~/^[\d\.]+$/) { die "ERROR: wrong number of mismatches (--mm).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
58
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
59
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
60
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
61 ## required arguments ##
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
62
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
63 ($READ_LENGTH)= grep /^-L=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
64 if(!defined($READ_LENGTH)){ die "ERROR: READ_LENGTH must be specified (-L).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
65 else { $READ_LENGTH =~s/^-L=//; if($READ_LENGTH !~/^[\d]+$/) { die "ERROR: wrong READ_LENGTH (-L).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
66
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
67 ($MAXDIST)= grep /^-D=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
68 if(!defined($MAXDIST)){ die "ERROR: MAXDIST must be specified (-D).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
69 else { $MAXDIST =~s/^-D=//; if($MAXDIST !~/^[\d]+$/) { die "ERROR: wrong MAXDIST (-D).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
70
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
71 ($input_file1)= grep /^-1=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
72 if(!defined($input_file1)){ die "ERROR: input_file1 must be specified (-1).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
73 else { $input_file1 =~s/^-1=//; if(!-e $input_file1) { die "ERROR: wrong input_file1 (-1).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
74
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
75 ($input_file2)= grep /^-2=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
76 if(!defined($input_file2)){ die "ERROR: input_file2 must be specified (-2).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
77 else { $input_file2 =~s/^-2=//; if(!-e $input_file2) { die "ERROR: wrong input_file2 (-2).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
78
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
79 ($Utable_prefix)= grep /^-U=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
80 if($only_init==0 && !defined($Utable_prefix)){ die "ERROR: Utable_prefix must be specified (-U).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
81 else { $Utable_prefix =~s/^-U=//; if(!-e $Utable_prefix.".gU.table" || !-e $Utable_prefix.".iU.table") { die "ERROR: wrong Utable_prefix (-U).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
82
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
83 ($gene2NM_file)= grep /^--g2m=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
84 if($only_init==0 && !defined($gene2NM_file)){ die "ERROR: gene2NM_file must be specified (--g2m).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
85 else { $gene2NM_file =~s/^--g2m=//; if(!-e $gene2NM_file) { die "ERROR: wrong gene2NM_file (--g2m).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
86
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
87 ($gene2symbol_file)= grep /^--g2s=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
88 if($only_init==0 && !defined($gene2symbol_file)){ die "ERROR: gene2symbol_file must be specified (--g2s).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
89 else { $gene2symbol_file =~s/^--g2s=//; if(!-e $gene2symbol_file) { die "ERROR: wrong gene2symbol_file (--g2s).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
90
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
91 ($bowtie_dir)= grep /^-b=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
92 if(!defined($bowtie_dir)){ die "ERROR: bowtie_dir must be specified (-b).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
93 else { $bowtie_dir =~s/^-b=//; if(!-d $bowtie_dir || !-e "$bowtie_dir/bowtie") { die "ERROR: wrong bowtie_dir(please avoid using '~') (-b).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
94
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
95 ($bowtieindex)= grep /^--bi=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
96 if(!defined($bowtieindex)){ die "ERROR: bowtieindex must be specified (--bi).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
97 else { $bowtieindex =~s/^--bi=//; if(!-e "$bowtieindex.1.ebwt") { die "ERROR: wrong bowtieindex (--bi).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
98
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
99 ($basedir)= grep /^-o=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
100 if(!defined($basedir)){ die "ERROR: output dir must be specified (-o).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
101 else { $basedir =~s/^-o=//; if($basedir=~/~/) { die "ERROR: wrong base_dir(please avoid using '~') (-o).\n"; } }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
102
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
103 ($sample)= grep /^-s=/, @ARGV;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
104 if(!defined($sample)){ die "ERROR: sample name must be specified (-s).\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
105 else { $sample =~s/^-s=//; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
106
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
107
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
108 ##############
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
109
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
110
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
111 $gUtable_file = $Utable_prefix.".gU.table";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
112 $iUtable_file = $Utable_prefix.".iU.table";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
113
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
114 my $coding='';
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
115 if($coding_option eq 'n') {$coding='';}
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
116 elsif($coding_option eq 'c') {$coding='-C';}
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
117 else { die "ERROR: Error: invalid coding option.\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
118
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
119 if($datatype eq 'R') { $mapping_stat_column = 4; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
120 elsif($datatype eq 'E') { $mapping_stat_column = 6; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
121 else { die "ERROR: Error: wrong datatype.\n"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
122
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
123 if($StrandSpecificity eq 'S') { $bowtie_strand_option = "--norc"; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
124 else { $bowtie_strand_option = ""; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
125
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
126 ($scriptdir) = $0 =~ /(.+)\/[^\/]+$/;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
127
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
128 $bowtieoutdir = "$basedir/bowtieout";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
129 $readcount_dir = "$basedir/readcount";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
130 $lendis_dir = "$basedir/insertlendis";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
131 $EUMAdir = "$basedir/EUMA";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
132 $FVKM_dir = "$basedir/FVKM";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
133 $LVKM_dir = "$basedir/LVKM";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
134
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
135 $mapping_stat_file = "$basedir/mapping_stat.$sample";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
136
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
137 ($midrefix) = $bowtieindex=~/([^\/]+)$/;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
138
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
139
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
140 if(!-d $bowtieoutdir) { `mkdir -p $bowtieoutdir`; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
141 if(!-d $readcount_dir) { `mkdir -p $readcount_dir`; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
142 if(!-d $lendis_dir) { `mkdir -p $lendis_dir`; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
143 if(!-d $EUMAdir) { `mkdir -p $EUMAdir`; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
144 if(!-d $FVKM_dir) { `mkdir -p $FVKM_dir`; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
145 if(!-d $LVKM_dir) { `mkdir -p $LVKM_dir`; }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
146
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
147
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
148 #`dos2unix $gene2NM_file`;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
149 #`dos2unix $gene2symbol_file`;
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
150
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
151
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
152
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
153 #$perlcommand = "perl -I $scriptdir";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
154 $perlcommand = "perl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
155 $bestbowtieout_script = "$scriptdir/filter.best.from.bowtieout.3.pl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
156 $bowtie2insertlendis_script = "$scriptdir/bowtieout2insertlendis.PE.pl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
157 $bowtieout2mappingstat_script = "$scriptdir/bowtieout2mappingstat.3.pl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
158 $bowtieout2readcount_script = "$scriptdir/bowtie2genecount.11.pl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
159
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
160 $calc_gEUMA_script = "$scriptdir/calculate_gEUMA.2.pl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
161 $calc_iEUMA_script = "$scriptdir/calculate_iEUMA.2.pl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
162 $NIR2LVKM_script = "$scriptdir/NIR2LVKM.pl";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
163
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
164
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
165 system("date");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
166
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
167
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
168 if($skip_init==0){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
169 print STDERR "Mapping reads using bowtie...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
170 system("$bowtie_dir/bowtie -$fastoption $coding --minins 0 --maxins $MAXDIST -v $mm -a --suppress 5,6,7 -p $parallel $bowtie_strand_option $bowtieindex -1 $input_file1 -2 $input_file2 > $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.bowtieout");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
171
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
172 print STDERR "Filtering best-matching alignments...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
173 if($mm > 0) {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
174 system("$perlcommand $bestbowtieout_script -d $datatype --rm $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.bowtieout 1");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
175 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
176 else {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
177 system("mv $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.bowtieout $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
178 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
179
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
180 print STDERR "Computing fragment length distribution...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
181 system("$perlcommand $bowtie2insertlendis_script $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout $READ_LENGTH > $lendis_dir/$sample.$midrefix.maxins$MAXDIST.mm0.i.insertlendis");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
182 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
183
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
184 if($only_init==0) {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
185
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
186 print STDERR "Computing the mapping stat...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
187 system("$perlcommand $bowtieout2mappingstat_script -d $datatype -m $MAXDIST -l $READ_LENGTH $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout $sample 1 > $mapping_stat_file");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
188 print STDERR "Mapping stat :\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
189 system("cat $mapping_stat_file");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
190
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
191 if($noNIR==0){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
192 print STDERR "Computing NIRs...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
193 system("$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH --gNIR -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gNIR");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
194 system("$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH --iNIR -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.iNIR");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
195 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
196
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
197 if($get_gReadcount==1) {
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
198 print STDERR "Computing total gene read counts (gReadcount)...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
199 print STDERR "$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gReadcount\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
200 system("$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gReadcount");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
201 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
202
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
203 if($noNEUMA==0){
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
204 print STDERR "Computing gEUMA and iEUMA...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
205 system("$perlcommand $calc_gEUMA_script $gUtable_file $lendis_dir/$sample.$midrefix.maxins$MAXDIST.mm0.i.insertlendis $READ_LENGTH > $EUMAdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gEUMA");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
206 system("$perlcommand $calc_iEUMA_script $iUtable_file $lendis_dir/$sample.$midrefix.maxins$MAXDIST.mm0.i.insertlendis $READ_LENGTH > $EUMAdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.iEUMA");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
207
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
208 print STDERR "Computing FVKM and LVKM...\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
209 system("$perlcommand $NIR2LVKM_script $sample $readcount_dir $midrefix.maxins$MAXDIST.mm$mm $EUMAdir $FVKM_dir $LVKM_dir $EUMAcut $mapping_stat_column $mapping_stat_file 1 2 $gene2NM_file $gene2symbol_file $datatype $scriptdir");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
210
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
211 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
212 }
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
213
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
214 print STDERR "Done.\n";
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
215
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
216 system("date");
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
217
c44c43d185ef NEUMA-1.2.1 Uploaded
chawhwa
parents:
diff changeset
218