Mercurial > repos > chawhwa > neuma
diff NEUMA-1.2.1/auto_NEUMA_SE.pl @ 0:c44c43d185ef draft default tip
NEUMA-1.2.1 Uploaded
author | chawhwa |
---|---|
date | Thu, 08 Aug 2013 00:46:13 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NEUMA-1.2.1/auto_NEUMA_SE.pl Thu Aug 08 00:46:13 2013 -0400 @@ -0,0 +1,176 @@ +#!/usr/bin/perl + +if(@ARGV<9) { print "usage: $0 [options] -L=<read_length> -i=<input_file> -U=<EUMA_prefix(fullpath, before .gEUMA or .iEUMA)> --g2m=<gene2NM_file> --g2s=<gene2symbol_file> -b=<bowtie_dir(eg.bin/bowtie-0.12.7)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>\n\nOrder of arguments can be interchangeable.\n\n"; exit; } + + + +## options ## +($fastoption)= grep /^-f=/, @ARGV; +if(!defined($fastoption)){$fastoption='q'; } +else { $fastoption =~s/^-f=//; if($fastoption !~/^[fq]$/) { die "wrong file type (-f).\n"; } } + +($coding_option)= grep /^-c=/, @ARGV; +if(!defined($coding_option)){$coding_option='n'; } +else { $coding_option =~s/^-c=//; if($coding_option !~/^[nc]$/) { die "wrong coding option (-c).\n"; } } + +($datatype)= grep /^-d=/, @ARGV; +if(!defined($datatype)){$datatype='R'; } +else { $datatype =~s/^-d=//; if($datatype !~/^[RE]$/) { die "wrong data type (-d).\n"; } } + +($parallel)= grep /^-p=/, @ARGV; +if(!defined($parallel)){$parallel=1; } +else { $parallel =~s/^-p=//; if($parallel !~/^[\d]+$/) { die "wrong parallel (multi-thread) option (-p).\n"; } } + +($EUMAcut)= grep /^-t=/, @ARGV; +if(!defined($EUMAcut)){$EUMAcut=50; } +else { $EUMAcut =~s/^-t=//; if($EUMAcut !~/^[\d\.]+$/) { die "wrong EUMAcut (-t).\n"; } } + +($StrandSpecificity)= grep /^--str=/, @ARGV; +if(!defined($StrandSpecificity)){ $StrandSpecificity='N'; } +else { $StrandSpecificity =~s/^--str=//; if($StrandSpecificity !~/^[SRN]+$/) { die "wrong strand specificity (--str).\n"; } } + +($get_gReadcount)= grep /^--gReadcount/, @ARGV; +if(!defined($get_gReadcount)){ $get_gReadcount=0; } +else { $get_gReadcount=1; } + +($mm)= grep /^--mm=/, @ARGV; +if(!defined($mm)){ $mm=0; } +else { $mm =~s/^--mm=//; if($mm !~/^[\d\.]+$/) { die "ERROR: wrong number of mismatches (--mm).\n"; } } + +($noNIR)= grep /^--noNIR/, @ARGV; +if(!defined($noNIR)){ $noNIR=0; } +else { $noNIR=1; } + +($noNEUMA)= grep /^--noNEUMA/, @ARGV; +if(!defined($noNEUMA)){ $noNEUMA=0; } +else { $noNEUMA=1; } + +if($noNIR==1 && $noNEUMA==0) { die "ERROR: --noNIR must be used together with --noNEUMA.\n"; } + + + + +## required arguments ## + +($READ_LENGTH)= grep /^-L=/, @ARGV; +if(!defined($READ_LENGTH)){ die "READ_LENGTH must be specified (-L).\n"; } +else { $READ_LENGTH =~s/^-L=//; if($READ_LENGTH !~/^[\d]+$/) { die "wrong READ_LENGTH (-L).\n"; } } + +($input_fa)= grep /^-i=/, @ARGV; +if(!defined($input_fa)){ die "input_fa must be specified (-i).\n"; } +else { $input_fa =~s/^-i=//; if(!-e $input_fa) { die "wrong input_file (-i).\n"; } } + +($EUMAprefix)= grep /^-U=/, @ARGV; +if(!defined($EUMAprefix)){ die "EUMAprefix must be specified (-U).\n"; } +else { $EUMAprefix =~s/^-U=//; if(!-e $EUMAprefix.".gEUMA" || !-e $EUMAprefix.".iEUMA") { die "wrong EUMAprefix (-U).\n"; } } + +($gene2NM_file)= grep /^--g2m=/, @ARGV; +if(!defined($gene2NM_file)){ die "gene2NM_file must be specified (--g2m).\n"; } +else { $gene2NM_file =~s/^--g2m=//; if(!-e $gene2NM_file) { die "wrong gene2NM_file (--g2m).\n"; } } + +($gene2symbol_file)= grep /^--g2s=/, @ARGV; +if(!defined($gene2symbol_file)){ die "gene2symbol_file must be specified (--g2s).\n"; } +else { $gene2symbol_file =~s/^--g2s=//; if(!-e $gene2symbol_file) { die "wrong gene2symbol_file (--g2s).\n"; } } + +($bowtie_dir)= grep /^-b=/, @ARGV; +if(!defined($bowtie_dir)){ die "bowtie_dir must be specified (-b).\n"; } +else { $bowtie_dir =~s/^-b=//; if(!-d $bowtie_dir || !-e "$bowtie_dir/bowtie") { die "wrong bowtie_dir(please avoid using '~') (-b).\n"; } } + +($bowtieindex)= grep /^--bi=/, @ARGV; +if(!defined($bowtieindex)){ die "bowtieindex must be specified (--bi).\n"; } +else { $bowtieindex =~s/^--bi=//; if(!-e "$bowtieindex.1.ebwt") { die "wrong bowtieindex (--bi).\n"; } } + +($basedir)= grep /^-o=/, @ARGV; +if(!defined($basedir)){ die "output dir must be specified (-o).\n"; } +else { $basedir =~s/^-o=//; if($basedir=~/~/) { die "wrong base_dir(please avoid using '~') (-o).\n"; } } + +($sample)= grep /^-s=/, @ARGV; +if(!defined($sample)){ die "sample name must be specified (-s).\n"; } +else { $sample =~s/^-s=//; } + + +############## + + +my $coding=''; +if($coding_option eq 'n') {$coding='';} +elsif($coding_option eq 'c') {$coding='-C';} +else { die "Error: invalid coding option.\n"; } + +if($datatype eq 'R') { $mapping_stat_column = 1; } +elsif($datatype eq 'E') { $mapping_stat_column = 3; } +else { die "Error: wrong datatype.\n"; } + +if($StrandSpecificity eq 'S') { $bowtie_strand_option = "--norc"; } +elsif($StrandSpecificity eq 'R') { $bowtie_strand_option = "--nofc"; } +else { $bowtie_strand_option = ""; } + + +($scriptdir) = $0 =~ /(.+)\/[^\/]+$/; + +$bowtieout_dir = "$basedir/bowtieout"; +$readcount_dir = "$basedir/readcount"; +$FVKMdir = "$basedir/FVKM"; +$LVKMdir = "$basedir/LVKM"; + +$bowtieoutfile = "$bowtieout_dir/$sample.single.mm$mm.bowtieout"; +$bestbowtieoutfile = "$bowtieout_dir/$sample.single.mm$mm.best.bowtieout"; +$mapping_stat_file = "$basedir/mapping_stat.$sample.single"; + +`mkdir -p $bowtieout_dir`; +`mkdir -p $readcount_dir`; +`mkdir -p $FVKMdir`; +`mkdir -p $LVKMdir`; + +`dos2unix $gene2NM_file`; +`dos2unix $gene2symbol_file`; + + +#$perlcommand = "perl -I $scriptdir"; +$perlcommand = "perl"; +$bestbowtieout_script = "$scriptdir/filter.best.from.bowtieout.3.pl"; +$bowtieout2mappingstat_script = "$scriptdir/bowtieout2mappingstat.3.pl"; +$bowtieout2readcount_script = "$scriptdir/bowtie2genecount.11.pl"; +$NIR2LVKM_script = " $scriptdir/NIR2LVKM_SE.pl"; + +=cut +print STDERR "Mapping reads using bowtie...\n"; +`$bowtie_dir/bowtie -$fastoption $coding -a -v $mm --suppress 5,6,7 -p $parallel -m 100 $bowtie_strand_option $bowtieindex $input_fa > $bowtieoutfile`; + + + +print STDERR "Filtering best-matching alignments...\n"; +if($mm > 0) { + system("$perlcommand $bestbowtieout_script -d $datatype --rm $bowtieoutfile 0"); +} +else { + system("mv $bowtieoutfile $bestbowtieoutfile"); +} + +print STDERR "Computing the mapping stat...\n"; +system("$perlcommand $bowtieout2mappingstat_script -d $datatype $bestbowtieoutfile $sample 0 > $mapping_stat_file"); +print STDERR "Mapping stat :\n"; +system("cat $mapping_stat_file"); + +if($noNIR==0){ + system("$perlcommand $bowtieout2readcount_script -d $datatype --gNIR -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.gNIR"); + system("$perlcommand $bowtieout2readcount_script -d $datatype --iNIR -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.iNIR"); +} +=cut +#if($get_gReadcount==1) { + system("$perlcommand $bowtieout2readcount_script -d $datatype -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.gReadcount"); +#} + + +=cut + +if($noNEUMA==0){ + print STDERR "Computing FVKM and LVKM...\n"; + `$perlcommand $NIR2LVKM_script $sample $readcount_dir mm$mm $EUMAprefix $FVKMdir $LVKMdir $EUMAcut $mapping_stat_column $mapping_stat_file 0 2 $gene2NM_file $gene2symbol_file $datatype $scriptdir`; +} + +print STDERR "Done.\n"; + + + +