annotate cuffdiff_wrapper.py @ 1:ebb9a992508d

Remove duplicate cds_diff_output option
author Jim Johnson <jj@umn.edu>
date Mon, 04 Feb 2013 21:23:20 -0600
parents da7241f92ecf
children fdf01b3c1841
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
1 #!/usr/bin/env python
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
3 # Wrapper supports Cuffdiff versions v1.3.0-v2.0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
4
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
6
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
7 def group_callback( option, op_str, value, parser ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
8 groups = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
9 flist = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
10 for arg in parser.rargs:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
11 arg = arg.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
12 if arg[0] is "-":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
13 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
14 elif arg[0] is ",":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
15 groups.append(flist)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
16 flist = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
17 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
18 flist.append(arg)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
19 groups.append(flist)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
20
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
21 setattr(parser.values, option.dest, groups)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
22
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
23 def label_callback( option, op_str, value, parser ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
24 labels = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
25 for arg in parser.rargs:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
26 arg = arg.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
27 if arg[0] is "-":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
28 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
29 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
30 labels.append(arg)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
31
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
32 setattr(parser.values, option.dest, labels)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
33
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
34 def stop_err( msg ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
35 sys.stderr.write( "%s\n" % msg )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
36 sys.exit()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
37
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
38 # Copied from sam_to_bam.py:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
39 def check_seq_file( dbkey, cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
40 seq_path = ''
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
41 for line in open( cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
42 line = line.rstrip( '\r\n' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
43 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
44 fields = line.split( '\t' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
45 if len( fields ) < 3:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
46 continue
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
47 if fields[1] == dbkey:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
48 seq_path = fields[2].strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
49 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
50 return seq_path
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
51
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
52 def __main__():
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
53 #Parse Command Line
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
54 parser = optparse.OptionParser()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
55
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
56 # Cuffdiff options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
57 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
58 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
59 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
60 For, example, for paired end runs with fragments selected at 300bp, \
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
61 where each end is 50bp, you should set -r to be 200. The default is 45bp.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
62 parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
63 parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
64 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
65
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
66 # Advanced Options:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
67 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
68 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
69
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
70 # Wrapper / Galaxy options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
71 parser.add_option( '-f', '--files', dest='groups', action="callback", callback=group_callback, help="Groups to be processed, groups are separated by spaces, replicates in a group comma separated. group1_rep1,group1_rep2 group2_rep1,group2_rep2, ..., groupN_rep1, groupN_rep2" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
72 parser.add_option( '-A', '--inputA', dest='inputA', help='A transcript GTF file produced by cufflinks, cuffcompare, or other source.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
73 parser.add_option( '-1', '--input1', dest='input1', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
74 parser.add_option( '-2', '--input2', dest='input2', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
75
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
76 # Label options
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
77 parser.add_option('-L', '--labels', dest='labels', action="callback", callback=label_callback, help="Labels for the groups the replicates are in.")
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
78
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
79 # Normalization options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
80 parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
81
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
82 # Bias correction options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
83 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
84 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
85 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
86 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
87
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
88 # Outputs.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
89 parser.add_option( "--isoforms_fpkm_tracking_output", dest="isoforms_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
90 parser.add_option( "--genes_fpkm_tracking_output", dest="genes_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
91 parser.add_option( "--cds_fpkm_tracking_output", dest="cds_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
92 parser.add_option( "--tss_groups_fpkm_tracking_output", dest="tss_groups_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
93 parser.add_option( "--isoforms_read_group_tracking_output", dest="isoforms_read_group_tracking_output", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
94 parser.add_option( "--genes_read_group_tracking_output", dest="genes_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
95 parser.add_option( "--cds_read_group_tracking_output", dest="cds_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
96 parser.add_option( "--tss_groups_read_group_tracking_output", dest="tss_groups_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
97 parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
98 parser.add_option( "--genes_exp_output", dest="genes_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
99 parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
100 parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
101 parser.add_option( "--isoforms_count_tracking_output", dest="isoforms_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
102 parser.add_option( "--genes_count_tracking_output", dest="genes_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
103 parser.add_option( "--cds_count_tracking_output", dest="cds_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
104 parser.add_option( "--tss_groups_count_tracking_output", dest="tss_groups_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
105 parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
106 parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
107 parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
108 parser.add_option( "--run_info_output", dest="run_info_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
109 parser.add_option( "--read_groups_info_output", dest="read_groups_info_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
110 parser.add_option( "--cuffdatadir", dest="cuffdatadir", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
111 parser.add_option( "--cummeRbund_db_output", dest="cummeRbund_db_output", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
112
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
113 (options, args) = parser.parse_args()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
114
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
115 # output version # of tool
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
116 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
117 tmp = tempfile.NamedTemporaryFile().name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
118 tmp_stdout = open( tmp, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
119 proc = subprocess.Popen( args='cuffdiff --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
120 tmp_stdout.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
121 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
122 stdout = None
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
123 for line in open( tmp_stdout.name, 'rb' ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
124 if line.lower().find( 'cuffdiff v' ) >= 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
125 stdout = line.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
126 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
127 if stdout:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
128 sys.stdout.write( '%s\n' % stdout )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
129 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
130 raise Exception
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
131 except:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
132 sys.stdout.write( 'Could not determine Cuffdiff version\n' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
133
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
134 # Make temp directory for output.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
135 tmp_output_dir = tempfile.mkdtemp()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
136 cuffdatadir = options.cuffdatadir if options.cuffdatadir else tmp_output_dir
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
137 if not os.path.exists( cuffdatadir ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
138 os.makedirs( cuffdatadir )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
139
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
140
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
141 # If doing bias correction, set/link to sequence file.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
142 if options.do_bias_correction:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
143 if options.ref_file != 'None':
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
144 # Sequence data from history.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
145 # Create symbolic link to ref_file so that index will be created in working directory.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
146 seq_path = os.path.join( cuffdatadir, "ref.fa" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
147 os.symlink( options.ref_file, seq_path )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
148 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
149 # Sequence data from loc file.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
150 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
151 if not os.path.exists( cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
152 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
153 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
154 # and the equCab2.fa file will contain fasta sequences.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
155 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
156 if seq_path == '':
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
157 stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
158
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
159 # Build command.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
160
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
161 # Base; always use quiet mode to avoid problems with storing log output.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
162 cmd = "cuffdiff --no-update-check -q"
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
163
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
164 # Add options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
165 if options.inner_dist_std_dev:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
166 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
167 if options.num_threads:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
168 cmd += ( " -p %i" % int ( options.num_threads ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
169 if options.inner_mean_dist:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
170 cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
171 if options.min_alignment_count:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
172 cmd += ( " -c %i" % int ( options.min_alignment_count ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
173 if options.FDR:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
174 cmd += ( " --FDR %f" % float( options.FDR ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
175 if options.multi_read_correct:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
176 cmd += ( " -u" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
177 if options.num_importance_samples:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
178 cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
179 if options.max_mle_iterations:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
180 cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
181 if options.do_normalization:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
182 cmd += ( " -N" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
183 if options.do_bias_correction:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
184 cmd += ( " -b %s" % seq_path )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
185
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
186 # Add inputs.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
187 # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
188 if options.groups:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
189 cmd += " --labels "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
190 for label in options.labels:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
191 cmd += label + ","
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
192 cmd = cmd[:-1]
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
193
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
194 cmd += " " + options.inputA + " "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
195
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
196 for group in options.groups:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
197 for filename in group:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
198 cmd += filename + ","
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
199 cmd = cmd[:-1] + " "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
200 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
201 cmd += " " + options.inputA + " " + options.input1 + " " + options.input2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
202
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
203 # Debugging.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
204 print cmd
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
205
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
206 # Run command.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
207 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
208 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
209 tmp_stderr = open( tmp_name, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
210 proc = subprocess.Popen( args=cmd, shell=True, cwd=cuffdatadir, stderr=tmp_stderr.fileno() )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
211 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
212 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
213
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
214 # Get stderr, allowing for case where it's very large.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
215 tmp_stderr = open( tmp_name, 'rb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
216 stderr = ''
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
217 buffsize = 1048576
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
218 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
219 while True:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
220 stderr += tmp_stderr.read( buffsize )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
221 if not stderr or len( stderr ) % buffsize != 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
222 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
223 except OverflowError:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
224 pass
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
225 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
226
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
227 # Error checking.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
228 if returncode != 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
229 raise Exception, stderr
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
230
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
231 # check that there are results in the output file
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
232 if len( open( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), 'rb' ).read().strip() ) == 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
233 raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
234 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
235 stop_err( 'Error running cuffdiff. ' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
236
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
237
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
238 # Copy output files from tmp directory to specified files.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
239 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
240 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
241 if options.isoforms_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
242 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), options.isoforms_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
243 if options.genes_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
244 shutil.copyfile( os.path.join( cuffdatadir, "genes.fpkm_tracking" ), options.genes_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
245 if options.cds_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
246 shutil.copyfile( os.path.join( cuffdatadir, "cds.fpkm_tracking" ), options.cds_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
247 if options.tss_groups_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
248 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" ), options.tss_groups_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
249
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
250 if options.isoforms_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
251 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.read_group_tracking" ), options.isoforms_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
252 if options.genes_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
253 shutil.copyfile( os.path.join( cuffdatadir, "genes.read_group_tracking" ), options.genes_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
254 if options.cds_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
255 shutil.copyfile( os.path.join( cuffdatadir, "cds.read_group_tracking" ), options.cds_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
256 if options.tss_groups_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
257 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.read_group_tracking" ), options.tss_groups_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
258
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
259 if options.isoforms_exp_output and os.path.exists(os.path.join( cuffdatadir, "isoform_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
260 shutil.copyfile( os.path.join( cuffdatadir, "isoform_exp.diff" ), options.isoforms_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
261 if options.genes_exp_output and os.path.exists(os.path.join( cuffdatadir, "gene_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
262 shutil.copyfile( os.path.join( cuffdatadir, "gene_exp.diff" ), options.genes_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
263 if options.cds_exp_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
264 shutil.copyfile( os.path.join( cuffdatadir, "cds_exp.diff" ), options.cds_exp_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
265 if options.tss_groups_exp_output and os.path.exists(os.path.join( cuffdatadir, "tss_group_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
266 shutil.copyfile( os.path.join( cuffdatadir, "tss_group_exp.diff" ), options.tss_groups_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
267
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
268 if options.isoforms_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
269 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.count_tracking" ), options.isoforms_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
270 if options.genes_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
271 shutil.copyfile( os.path.join( cuffdatadir, "genes.count_tracking" ), options.genes_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
272 if options.cds_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
273 shutil.copyfile( os.path.join( cuffdatadir, "cds.count_tracking" ), options.cds_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
274 if options.tss_groups_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
275 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.count_tracking" ), options.tss_groups_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
276
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
277 if options.cds_diff_output and os.path.exists(os.path.join( cuffdatadir, "cds.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
278 shutil.copyfile( os.path.join( cuffdatadir, "cds.diff" ), options.cds_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
279
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
280 if options.splicing_diff_output and os.path.exists(os.path.join( cuffdatadir, "splicing.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
281 shutil.copyfile( os.path.join( cuffdatadir, "splicing.diff" ), options.splicing_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
282 if options.promoters_diff_output and os.path.exists(os.path.join( cuffdatadir, "promoters.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
283 shutil.copyfile( os.path.join( cuffdatadir, "promoters.diff" ), options.promoters_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
284
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
285 if options.run_info_output and os.path.exists(os.path.join( cuffdatadir, "run.info" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
286 shutil.copyfile( os.path.join( cuffdatadir, "run.info" ), options.run_info_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
287 if options.read_groups_info_output and os.path.exists(os.path.join( cuffdatadir, "read_groups.info" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
288 shutil.copyfile( os.path.join( cuffdatadir, "read_groups.info" ), options.read_groups_info_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
289
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
290 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
291 stop_err( 'Error in cuffdiff:\n' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
292 if options.cummeRbund_db_output:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
293 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
294 dbFile = 'cuffData.db'
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
295 rscript = tempfile.NamedTemporaryFile( dir=tmp_output_dir,suffix='.r' ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
296 rscript_fh = open( rscript, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
297 rscript_fh.write('library(cummeRbund)\n')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
298 if options.inputA and options.ref_file:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
299 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", gtfFile = "%s", genome = "%s", rebuild = T)\n' % (cuffdatadir,dbFile,options.inputA,options.ref_file))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
300 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
301 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", rebuild = T)\n' % (cuffdatadir,dbFile))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
302 rscript_fh.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
303 cmd = ( "Rscript --vanilla %s" % rscript )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
304 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
305 tmp_stderr = open( tmp_name, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
306 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
307 #proc = subprocess.Popen( args=cmd, shell=True)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
308 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
309 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
310 if os.path.exists(os.path.join( cuffdatadir, dbFile )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
311 shutil.copyfile( os.path.join( cuffdatadir, dbFile ), options.cummeRbund_db_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
312 shutil.rmtree(os.path.join( cuffdatadir, dbFile ))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
313 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
314 stop_err( 'Error generating cummeRbund cuffData.db:\n' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
315 finally:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
316 # Clean up temp dirs
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
317 if os.path.exists( tmp_output_dir ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
318 shutil.rmtree( tmp_output_dir )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
319
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
320 if __name__=="__main__": __main__()