comparison rsem/rsem-wrapper-1.1.17.pl @ 0:4edac0183857

Initial commit from tarball version 1.17
author victor
date Mon, 05 Mar 2012 11:12:34 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4edac0183857
1 #!/usr/bin/perl
2
3
4 use Data::Dumper;
5 use Getopt::Long;
6 use Pod::Usage;
7
8
9
10 #pod2usage(-verbose => 1) if ($help == 1);
11 #if (@ARGV == 0) {
12 # pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2);
13 #}
14
15 my $rsem_version = "/opt/rsem-1.1.17";
16 my $minL = 1;
17 my $maxL = 1000;
18 my $NMB = 1024;
19
20 # Extra file output #beta
21 # --isoformfile $isoforms
22 # --thetafile $theta
23 # --cntfile $cnt
24 # --modelfile $model
25 # --bamfile $bam_res
26
27 GetOptions(
28 "log=s" => \$log,
29 "bam_genome=s" => \$bam_genome,
30 "bamtype=s" => \$bamtype,
31 "isoformfile=s" => \$isoforms,
32 "reference=s" => \$dbref,
33 "sampling-for-bam=s" => \$samplingbam,
34 "thetafile=s" => \$theta,
35 "cntfile=s" => \$cnt,
36 "modelfile=s" => \$model,
37 "bamfile=s" => \$bamfile,
38 "output=s" => \$output,
39 "single_fasta=s" => \$single_fasta,
40 "fasta1=s" => \$fasta1,
41 "fasta2=s" => \$fasta2,
42 "single_fastq=s" => \$single_fastq,
43 "fastq1=s" => \$fastq1,
44 "fastq2=s" => \$fastq2,
45 "no-qualities" => \$no_qual,
46 "paired-end" => \$paired_end,
47 "sam" => \$is_sam,
48 "bam" => \$is_bam,
49 "sam-header-info=s" => \$fn_list,
50 "tag=s" => \$tagName,
51 "seed-length=i" => \$L,
52 "bowtie-path=s" => \$bowtie_path,
53 "bowtie-n=i" => \$C,
54 "bowtie-e=i" => \$E,
55 "bowtie-m=i" => \$maxHits,
56 "phred33-quals" => \$phred33,
57 "phred64-quals" => \$phred64,
58 "solexa-quals" => \$solexa,
59 "forward-prob=f" => \$probF,
60 "fragment-length-min=i" => \$minL,
61 "fragment-length-max=i" => \$maxL,
62 "fragment-length-mean=f" => \$mean,
63 "fragment-length-sd=f" => \$sd,
64 "estimate-rspd=s" => \$estRSPD,
65 "num-rspd-bins=i" => \$B,
66 "p|num-threads=i" => \$nThreads,
67 "output-genome-bam" => \$genBamF,
68 "calc-ci=s" => \$calcCI,
69 "ci-memory=i" => \$NMB,
70 "time" => \$mTime,
71 "q|quiet" => \$quiet,
72 ) or pod2usage( -exitval => 2, -verbose => 2 );
73
74 #check parameters and options
75
76 if ($is_sam || $is_bam) {
77 pod2usage(-msg => "from rsem-wrapper->Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 4);
78 pod2usage(-msg => "--sam and --bam cannot be active at the same time!", -exitval => 2, -verbose => 2) if ($is_sam == 1&& $is_bam == 1);
79 pod2usage(-msg => "--bowtie-path, --bowtie-n, --bowtie-e, --bowtie-m, --phred33-quals, --phred64-quals or --solexa-quals cannot be set if input is SAM/BAM format!", -exitval => 2, -verbose => 2) if ($bowtie_path ne "" || $C != 2 || $E != 99999999 || $maxHits != 200 || $phred33 || $phred64 || $solexa);
80 }
81 #else {
82 # pod2usage(-msg => "from rsem-wraper->Invalid number of arguments!", -exitval => 2, -verbose => 2)
83 # if (!$paired_end && scalar(@ARGV) != 1 || $paired_end && scalar(@ARGV) != 1);
84 # pod2usage(-msg => "Only one of --phred33-quals --phred64-quals/--solexa1.3-quals --solexa-suqls can be active!", -exitval => 2, -verbose => 2) if ($phred33 + $phred64 + $solexa > 1);
85 # podwusage(-msg => "--sam , --bam or --sam-header-info cannot be set if use bowtie aligner to produce alignments!", -exitval => 2, -verbose => 2) if ($is_sam || $is_bam || $fn_list ne "");
86 #}
87
88 pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1);
89 pod2usage(-msg => "Min fragment length should be at least 1!", -exitval => 2, -verbose => 2) if ($minL < 1);
90 pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL);
91 pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
92 pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);
93
94 # IO Redirection to log file
95 use IO::Handle;
96 open OUTPUT, '>', $log or die "cant open file $log $!\n";;
97 open ERROR, '>>', $log or die "cant open file $log $!\n";
98 STDOUT->fdopen( \*OUTPUT, 'w' ) or die "cant open file $!\n";
99 STDERR->fdopen( \*ERROR, 'w' ) or die "cant open file $!\n";
100 #
101
102 my @options;
103
104 # generates new output called sample_name.genome.bam
105 # with alignments
106 # mapped to genomic coordinates and annotated with their posterior
107 # probabilities. In addition, RSEM will call samtools (included in
108 # RSEM package) to sort and index the bam file.
109 # 'sample_name.genome.sorted.bam' and
110 # 'sample_name.genome.sorted.bam.bai' will be generated. (Default: off)
111
112 if ($bamtype eq "yes") {
113 my $bam_genome_par = "--output-genome-bam";
114 push @options, $bam_genome_par;
115 }
116 if ($samplingbam eq "yes") {
117 my $samplingbam = "--sampling-for-bam";
118 push @options, $samplingbam;
119 }
120 if ($estRSPD eq "yes") {
121 my $rspd = "--estimate-rspd";
122 push @options, $rspd;
123 }
124 $probF = "--forward-prob $probF";
125 push @options, $probF;
126
127 if ($calcCI eq "yes") {
128 my $calcCI = "--calc-ci";
129 push @options, $calcCI;
130 my $cimem = "--ci-memory $NMB";
131 push @options, $cimem;
132 }
133 if ($tagName) {
134 my $tagName = "--tag $tagName";
135 push @options, $tagName;
136 }
137 if ($L) {
138 my $L = "--seed-length $L";
139 push @options, $L;
140 }
141 if ($C) {
142 my $C = "--bowtie-n $C";
143 push @options, $C;
144 }
145 if ($E) {
146 my $E = "--bowtie-e $E";
147 push @options, $E;
148 }
149 if ($maxHits) {
150 my $maxHits = "--bowtie-m $maxHits";
151 push @options, $maxHits;
152 }
153 if ($minL != 1) {
154 my $minL = "--fragment-length-min $minL";
155 push @options, $minL;
156 }
157 if ($maxL != 1000) {
158 my $maxL = "--fragment-length-max $maxL";
159 push @options, $maxL;
160 }
161 if ($mean) {
162 my $mean = "--fragment-length-mean $mean";
163 push @options, $mean;
164 }
165 if ($sd) {
166 my $sd = "--fragment-length-sd $sd";
167 push @options, $sd;
168 }
169 my $options= join(" ", @options);
170
171 #BUILD COMMAND BASED ON PARSED OPTIONS
172 if ($no_qual) {
173 #reads are in fasta file format
174 if ($paired_end) { # reads are in paired end
175 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities --paired-end -p $nThreads $options $fasta1 $fasta2 $dbref $output";
176 print "RSEM Parameters used by Galaxy:\n$cmd\n";
177 system($cmd);
178 }
179 #run single end with one fasta file
180 else {
181 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities -p $nThreads $options $single_fasta $dbref $output";
182 print "RSEM Parameters used by Galaxy:\n$cmd\n";
183 system($cmd);
184 }
185 }
186 else {
187 # reads are in fastq file format
188 # type of fastq file?
189 my $fastqtype;
190 if ($phred33) {
191 $fastqtype = "--phred33-quals";
192 }
193 elsif ($phred64) {
194 $fastqtype = "--phred64-quals";
195 }
196 elsif ($solexa) {
197 $fastqtype = "--solexa-quals";
198 }
199 if ($paired_end) {
200 #reads in paired end
201 #run paired end with two fasq files
202 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --paired-end -p $nThreads $options $fastqtype $fastq1 $fastq2 $dbref $output";
203 print "RSEM Parameters used by Galaxy:\n$cmd\n";
204 system($cmd);
205 }
206 else {
207 my $cmd = "$rsem_version/rsem-calculate-expression --quiet -p $nThreads $options $fastqtype $single_fastq $dbref $output";
208 print "RSEM Parameters used by Galaxy:\n$cmd\n";
209 system($cmd);
210 }
211 }
212
213 #Rename files for galaxy
214 my $mv_genes = "mv $output.genes.results $output";
215 my $mv_isoforms = "mv $output.isoforms.results $isoforms";
216
217 #print "bamtype-parameter=$bamtype\n";
218 my $mv_bam_transcript;
219 my $mv_bam_genome;
220 if ($bamtype eq "yes") {
221 $mv_bam_genome = "mv $output.genome.sorted.bam $bam_genome";
222 system($mv_bam_genome);
223 }
224
225 $mv_bam_transcript = "mv $output.transcript.sorted.bam $bamfile";
226
227 my @rsem_dir = split(/\//, $output);
228 my $short_output = $rsem_dir[-1];
229 my $mv_theta = "mv $output.stat/$short_output.theta $theta";
230 my $mv_cnt = "mv $output.stat/$short_output.cnt $cnt";
231 my $mv_model = "mv $output.stat/$short_output.model $model";
232 system($mv_genes);
233 system($mv_isoforms);
234 system($mv_bam_transcript);
235 #system($mv_theta);
236 #system($mv_cnt);
237 #system($mv_model);
238 #print "LOG $mv\n";