Mercurial > repos > victor > rsem
comparison rsem/rsem-wrapper-1.1.17.pl @ 0:4edac0183857
Initial commit from tarball version 1.17
author | victor |
---|---|
date | Mon, 05 Mar 2012 11:12:34 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4edac0183857 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 | |
4 use Data::Dumper; | |
5 use Getopt::Long; | |
6 use Pod::Usage; | |
7 | |
8 | |
9 | |
10 #pod2usage(-verbose => 1) if ($help == 1); | |
11 #if (@ARGV == 0) { | |
12 # pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2); | |
13 #} | |
14 | |
15 my $rsem_version = "/opt/rsem-1.1.17"; | |
16 my $minL = 1; | |
17 my $maxL = 1000; | |
18 my $NMB = 1024; | |
19 | |
20 # Extra file output #beta | |
21 # --isoformfile $isoforms | |
22 # --thetafile $theta | |
23 # --cntfile $cnt | |
24 # --modelfile $model | |
25 # --bamfile $bam_res | |
26 | |
27 GetOptions( | |
28 "log=s" => \$log, | |
29 "bam_genome=s" => \$bam_genome, | |
30 "bamtype=s" => \$bamtype, | |
31 "isoformfile=s" => \$isoforms, | |
32 "reference=s" => \$dbref, | |
33 "sampling-for-bam=s" => \$samplingbam, | |
34 "thetafile=s" => \$theta, | |
35 "cntfile=s" => \$cnt, | |
36 "modelfile=s" => \$model, | |
37 "bamfile=s" => \$bamfile, | |
38 "output=s" => \$output, | |
39 "single_fasta=s" => \$single_fasta, | |
40 "fasta1=s" => \$fasta1, | |
41 "fasta2=s" => \$fasta2, | |
42 "single_fastq=s" => \$single_fastq, | |
43 "fastq1=s" => \$fastq1, | |
44 "fastq2=s" => \$fastq2, | |
45 "no-qualities" => \$no_qual, | |
46 "paired-end" => \$paired_end, | |
47 "sam" => \$is_sam, | |
48 "bam" => \$is_bam, | |
49 "sam-header-info=s" => \$fn_list, | |
50 "tag=s" => \$tagName, | |
51 "seed-length=i" => \$L, | |
52 "bowtie-path=s" => \$bowtie_path, | |
53 "bowtie-n=i" => \$C, | |
54 "bowtie-e=i" => \$E, | |
55 "bowtie-m=i" => \$maxHits, | |
56 "phred33-quals" => \$phred33, | |
57 "phred64-quals" => \$phred64, | |
58 "solexa-quals" => \$solexa, | |
59 "forward-prob=f" => \$probF, | |
60 "fragment-length-min=i" => \$minL, | |
61 "fragment-length-max=i" => \$maxL, | |
62 "fragment-length-mean=f" => \$mean, | |
63 "fragment-length-sd=f" => \$sd, | |
64 "estimate-rspd=s" => \$estRSPD, | |
65 "num-rspd-bins=i" => \$B, | |
66 "p|num-threads=i" => \$nThreads, | |
67 "output-genome-bam" => \$genBamF, | |
68 "calc-ci=s" => \$calcCI, | |
69 "ci-memory=i" => \$NMB, | |
70 "time" => \$mTime, | |
71 "q|quiet" => \$quiet, | |
72 ) or pod2usage( -exitval => 2, -verbose => 2 ); | |
73 | |
74 #check parameters and options | |
75 | |
76 if ($is_sam || $is_bam) { | |
77 pod2usage(-msg => "from rsem-wrapper->Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 4); | |
78 pod2usage(-msg => "--sam and --bam cannot be active at the same time!", -exitval => 2, -verbose => 2) if ($is_sam == 1&& $is_bam == 1); | |
79 pod2usage(-msg => "--bowtie-path, --bowtie-n, --bowtie-e, --bowtie-m, --phred33-quals, --phred64-quals or --solexa-quals cannot be set if input is SAM/BAM format!", -exitval => 2, -verbose => 2) if ($bowtie_path ne "" || $C != 2 || $E != 99999999 || $maxHits != 200 || $phred33 || $phred64 || $solexa); | |
80 } | |
81 #else { | |
82 # pod2usage(-msg => "from rsem-wraper->Invalid number of arguments!", -exitval => 2, -verbose => 2) | |
83 # if (!$paired_end && scalar(@ARGV) != 1 || $paired_end && scalar(@ARGV) != 1); | |
84 # pod2usage(-msg => "Only one of --phred33-quals --phred64-quals/--solexa1.3-quals --solexa-suqls can be active!", -exitval => 2, -verbose => 2) if ($phred33 + $phred64 + $solexa > 1); | |
85 # podwusage(-msg => "--sam , --bam or --sam-header-info cannot be set if use bowtie aligner to produce alignments!", -exitval => 2, -verbose => 2) if ($is_sam || $is_bam || $fn_list ne ""); | |
86 #} | |
87 | |
88 pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1); | |
89 pod2usage(-msg => "Min fragment length should be at least 1!", -exitval => 2, -verbose => 2) if ($minL < 1); | |
90 pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL); | |
91 pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1); | |
92 pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1); | |
93 | |
94 # IO Redirection to log file | |
95 use IO::Handle; | |
96 open OUTPUT, '>', $log or die "cant open file $log $!\n";; | |
97 open ERROR, '>>', $log or die "cant open file $log $!\n"; | |
98 STDOUT->fdopen( \*OUTPUT, 'w' ) or die "cant open file $!\n"; | |
99 STDERR->fdopen( \*ERROR, 'w' ) or die "cant open file $!\n"; | |
100 # | |
101 | |
102 my @options; | |
103 | |
104 # generates new output called sample_name.genome.bam | |
105 # with alignments | |
106 # mapped to genomic coordinates and annotated with their posterior | |
107 # probabilities. In addition, RSEM will call samtools (included in | |
108 # RSEM package) to sort and index the bam file. | |
109 # 'sample_name.genome.sorted.bam' and | |
110 # 'sample_name.genome.sorted.bam.bai' will be generated. (Default: off) | |
111 | |
112 if ($bamtype eq "yes") { | |
113 my $bam_genome_par = "--output-genome-bam"; | |
114 push @options, $bam_genome_par; | |
115 } | |
116 if ($samplingbam eq "yes") { | |
117 my $samplingbam = "--sampling-for-bam"; | |
118 push @options, $samplingbam; | |
119 } | |
120 if ($estRSPD eq "yes") { | |
121 my $rspd = "--estimate-rspd"; | |
122 push @options, $rspd; | |
123 } | |
124 $probF = "--forward-prob $probF"; | |
125 push @options, $probF; | |
126 | |
127 if ($calcCI eq "yes") { | |
128 my $calcCI = "--calc-ci"; | |
129 push @options, $calcCI; | |
130 my $cimem = "--ci-memory $NMB"; | |
131 push @options, $cimem; | |
132 } | |
133 if ($tagName) { | |
134 my $tagName = "--tag $tagName"; | |
135 push @options, $tagName; | |
136 } | |
137 if ($L) { | |
138 my $L = "--seed-length $L"; | |
139 push @options, $L; | |
140 } | |
141 if ($C) { | |
142 my $C = "--bowtie-n $C"; | |
143 push @options, $C; | |
144 } | |
145 if ($E) { | |
146 my $E = "--bowtie-e $E"; | |
147 push @options, $E; | |
148 } | |
149 if ($maxHits) { | |
150 my $maxHits = "--bowtie-m $maxHits"; | |
151 push @options, $maxHits; | |
152 } | |
153 if ($minL != 1) { | |
154 my $minL = "--fragment-length-min $minL"; | |
155 push @options, $minL; | |
156 } | |
157 if ($maxL != 1000) { | |
158 my $maxL = "--fragment-length-max $maxL"; | |
159 push @options, $maxL; | |
160 } | |
161 if ($mean) { | |
162 my $mean = "--fragment-length-mean $mean"; | |
163 push @options, $mean; | |
164 } | |
165 if ($sd) { | |
166 my $sd = "--fragment-length-sd $sd"; | |
167 push @options, $sd; | |
168 } | |
169 my $options= join(" ", @options); | |
170 | |
171 #BUILD COMMAND BASED ON PARSED OPTIONS | |
172 if ($no_qual) { | |
173 #reads are in fasta file format | |
174 if ($paired_end) { # reads are in paired end | |
175 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities --paired-end -p $nThreads $options $fasta1 $fasta2 $dbref $output"; | |
176 print "RSEM Parameters used by Galaxy:\n$cmd\n"; | |
177 system($cmd); | |
178 } | |
179 #run single end with one fasta file | |
180 else { | |
181 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities -p $nThreads $options $single_fasta $dbref $output"; | |
182 print "RSEM Parameters used by Galaxy:\n$cmd\n"; | |
183 system($cmd); | |
184 } | |
185 } | |
186 else { | |
187 # reads are in fastq file format | |
188 # type of fastq file? | |
189 my $fastqtype; | |
190 if ($phred33) { | |
191 $fastqtype = "--phred33-quals"; | |
192 } | |
193 elsif ($phred64) { | |
194 $fastqtype = "--phred64-quals"; | |
195 } | |
196 elsif ($solexa) { | |
197 $fastqtype = "--solexa-quals"; | |
198 } | |
199 if ($paired_end) { | |
200 #reads in paired end | |
201 #run paired end with two fasq files | |
202 my $cmd = "$rsem_version/rsem-calculate-expression --quiet --paired-end -p $nThreads $options $fastqtype $fastq1 $fastq2 $dbref $output"; | |
203 print "RSEM Parameters used by Galaxy:\n$cmd\n"; | |
204 system($cmd); | |
205 } | |
206 else { | |
207 my $cmd = "$rsem_version/rsem-calculate-expression --quiet -p $nThreads $options $fastqtype $single_fastq $dbref $output"; | |
208 print "RSEM Parameters used by Galaxy:\n$cmd\n"; | |
209 system($cmd); | |
210 } | |
211 } | |
212 | |
213 #Rename files for galaxy | |
214 my $mv_genes = "mv $output.genes.results $output"; | |
215 my $mv_isoforms = "mv $output.isoforms.results $isoforms"; | |
216 | |
217 #print "bamtype-parameter=$bamtype\n"; | |
218 my $mv_bam_transcript; | |
219 my $mv_bam_genome; | |
220 if ($bamtype eq "yes") { | |
221 $mv_bam_genome = "mv $output.genome.sorted.bam $bam_genome"; | |
222 system($mv_bam_genome); | |
223 } | |
224 | |
225 $mv_bam_transcript = "mv $output.transcript.sorted.bam $bamfile"; | |
226 | |
227 my @rsem_dir = split(/\//, $output); | |
228 my $short_output = $rsem_dir[-1]; | |
229 my $mv_theta = "mv $output.stat/$short_output.theta $theta"; | |
230 my $mv_cnt = "mv $output.stat/$short_output.cnt $cnt"; | |
231 my $mv_model = "mv $output.stat/$short_output.model $model"; | |
232 system($mv_genes); | |
233 system($mv_isoforms); | |
234 system($mv_bam_transcript); | |
235 #system($mv_theta); | |
236 #system($mv_cnt); | |
237 #system($mv_model); | |
238 #print "LOG $mv\n"; |