annotate cuffdiff_wrapper.py @ 0:da7241f92ecf

Uploaded
author jjohnson
date Mon, 04 Feb 2013 19:50:25 -0500
parents
children ebb9a992508d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
1 #!/usr/bin/env python
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
3 # Wrapper supports Cuffdiff versions v1.3.0-v2.0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
4
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
6
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
7 def group_callback( option, op_str, value, parser ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
8 groups = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
9 flist = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
10 for arg in parser.rargs:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
11 arg = arg.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
12 if arg[0] is "-":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
13 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
14 elif arg[0] is ",":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
15 groups.append(flist)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
16 flist = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
17 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
18 flist.append(arg)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
19 groups.append(flist)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
20
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
21 setattr(parser.values, option.dest, groups)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
22
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
23 def label_callback( option, op_str, value, parser ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
24 labels = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
25 for arg in parser.rargs:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
26 arg = arg.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
27 if arg[0] is "-":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
28 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
29 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
30 labels.append(arg)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
31
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
32 setattr(parser.values, option.dest, labels)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
33
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
34 def stop_err( msg ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
35 sys.stderr.write( "%s\n" % msg )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
36 sys.exit()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
37
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
38 # Copied from sam_to_bam.py:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
39 def check_seq_file( dbkey, cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
40 seq_path = ''
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
41 for line in open( cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
42 line = line.rstrip( '\r\n' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
43 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
44 fields = line.split( '\t' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
45 if len( fields ) < 3:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
46 continue
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
47 if fields[1] == dbkey:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
48 seq_path = fields[2].strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
49 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
50 return seq_path
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
51
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
52 def __main__():
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
53 #Parse Command Line
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
54 parser = optparse.OptionParser()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
55
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
56 # Cuffdiff options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
57 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
58 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
59 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
60 For, example, for paired end runs with fragments selected at 300bp, \
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
61 where each end is 50bp, you should set -r to be 200. The default is 45bp.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
62 parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
63 parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
64 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
65
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
66 # Advanced Options:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
67 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
68 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
69
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
70 # Wrapper / Galaxy options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
71 parser.add_option( '-f', '--files', dest='groups', action="callback", callback=group_callback, help="Groups to be processed, groups are separated by spaces, replicates in a group comma separated. group1_rep1,group1_rep2 group2_rep1,group2_rep2, ..., groupN_rep1, groupN_rep2" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
72 parser.add_option( '-A', '--inputA', dest='inputA', help='A transcript GTF file produced by cufflinks, cuffcompare, or other source.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
73 parser.add_option( '-1', '--input1', dest='input1', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
74 parser.add_option( '-2', '--input2', dest='input2', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
75
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
76 # Label options
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
77 parser.add_option('-L', '--labels', dest='labels', action="callback", callback=label_callback, help="Labels for the groups the replicates are in.")
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
78
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
79 # Normalization options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
80 parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
81
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
82 # Bias correction options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
83 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
84 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
85 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
86 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
87
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
88 # Outputs.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
89 parser.add_option( "--isoforms_fpkm_tracking_output", dest="isoforms_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
90 parser.add_option( "--genes_fpkm_tracking_output", dest="genes_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
91 parser.add_option( "--cds_fpkm_tracking_output", dest="cds_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
92 parser.add_option( "--tss_groups_fpkm_tracking_output", dest="tss_groups_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
93 parser.add_option( "--isoforms_read_group_tracking_output", dest="isoforms_read_group_tracking_output", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
94 parser.add_option( "--genes_read_group_tracking_output", dest="genes_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
95 parser.add_option( "--cds_read_group_tracking_output", dest="cds_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
96 parser.add_option( "--tss_groups_read_group_tracking_output", dest="tss_groups_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
97 parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
98 parser.add_option( "--genes_exp_output", dest="genes_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
99 parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
100 parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
101 parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
102 parser.add_option( "--isoforms_count_tracking_output", dest="isoforms_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
103 parser.add_option( "--genes_count_tracking_output", dest="genes_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
104 parser.add_option( "--cds_count_tracking_output", dest="cds_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
105 parser.add_option( "--tss_groups_count_tracking_output", dest="tss_groups_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
106 parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
107 parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
108 parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
109 parser.add_option( "--run_info_output", dest="run_info_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
110 parser.add_option( "--read_groups_info_output", dest="read_groups_info_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
111 parser.add_option( "--cuffdatadir", dest="cuffdatadir", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
112 parser.add_option( "--cummeRbund_db_output", dest="cummeRbund_db_output", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
113
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
114 (options, args) = parser.parse_args()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
115
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
116 # output version # of tool
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
117 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
118 tmp = tempfile.NamedTemporaryFile().name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
119 tmp_stdout = open( tmp, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
120 proc = subprocess.Popen( args='cuffdiff --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
121 tmp_stdout.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
122 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
123 stdout = None
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
124 for line in open( tmp_stdout.name, 'rb' ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
125 if line.lower().find( 'cuffdiff v' ) >= 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
126 stdout = line.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
127 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
128 if stdout:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
129 sys.stdout.write( '%s\n' % stdout )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
130 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
131 raise Exception
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
132 except:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
133 sys.stdout.write( 'Could not determine Cuffdiff version\n' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
134
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
135 # Make temp directory for output.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
136 tmp_output_dir = tempfile.mkdtemp()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
137 cuffdatadir = options.cuffdatadir if options.cuffdatadir else tmp_output_dir
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
138 if not os.path.exists( cuffdatadir ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
139 os.makedirs( cuffdatadir )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
140
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
141
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
142 # If doing bias correction, set/link to sequence file.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
143 if options.do_bias_correction:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
144 if options.ref_file != 'None':
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
145 # Sequence data from history.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
146 # Create symbolic link to ref_file so that index will be created in working directory.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
147 seq_path = os.path.join( cuffdatadir, "ref.fa" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
148 os.symlink( options.ref_file, seq_path )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
149 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
150 # Sequence data from loc file.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
151 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
152 if not os.path.exists( cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
153 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
154 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
155 # and the equCab2.fa file will contain fasta sequences.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
156 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
157 if seq_path == '':
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
158 stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
159
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
160 # Build command.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
161
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
162 # Base; always use quiet mode to avoid problems with storing log output.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
163 cmd = "cuffdiff --no-update-check -q"
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
164
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
165 # Add options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
166 if options.inner_dist_std_dev:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
167 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
168 if options.num_threads:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
169 cmd += ( " -p %i" % int ( options.num_threads ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
170 if options.inner_mean_dist:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
171 cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
172 if options.min_alignment_count:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
173 cmd += ( " -c %i" % int ( options.min_alignment_count ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
174 if options.FDR:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
175 cmd += ( " --FDR %f" % float( options.FDR ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
176 if options.multi_read_correct:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
177 cmd += ( " -u" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
178 if options.num_importance_samples:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
179 cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
180 if options.max_mle_iterations:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
181 cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
182 if options.do_normalization:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
183 cmd += ( " -N" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
184 if options.do_bias_correction:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
185 cmd += ( " -b %s" % seq_path )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
186
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
187 # Add inputs.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
188 # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
189 if options.groups:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
190 cmd += " --labels "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
191 for label in options.labels:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
192 cmd += label + ","
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
193 cmd = cmd[:-1]
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
194
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
195 cmd += " " + options.inputA + " "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
196
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
197 for group in options.groups:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
198 for filename in group:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
199 cmd += filename + ","
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
200 cmd = cmd[:-1] + " "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
201 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
202 cmd += " " + options.inputA + " " + options.input1 + " " + options.input2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
203
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
204 # Debugging.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
205 print cmd
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
206
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
207 # Run command.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
208 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
209 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
210 tmp_stderr = open( tmp_name, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
211 proc = subprocess.Popen( args=cmd, shell=True, cwd=cuffdatadir, stderr=tmp_stderr.fileno() )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
212 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
213 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
214
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
215 # Get stderr, allowing for case where it's very large.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
216 tmp_stderr = open( tmp_name, 'rb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
217 stderr = ''
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
218 buffsize = 1048576
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
219 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
220 while True:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
221 stderr += tmp_stderr.read( buffsize )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
222 if not stderr or len( stderr ) % buffsize != 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
223 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
224 except OverflowError:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
225 pass
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
226 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
227
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
228 # Error checking.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
229 if returncode != 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
230 raise Exception, stderr
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
231
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
232 # check that there are results in the output file
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
233 if len( open( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), 'rb' ).read().strip() ) == 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
234 raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
235 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
236 stop_err( 'Error running cuffdiff. ' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
237
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
238
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
239 # Copy output files from tmp directory to specified files.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
240 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
241 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
242 if options.isoforms_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
243 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), options.isoforms_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
244 if options.genes_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
245 shutil.copyfile( os.path.join( cuffdatadir, "genes.fpkm_tracking" ), options.genes_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
246 if options.cds_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
247 shutil.copyfile( os.path.join( cuffdatadir, "cds.fpkm_tracking" ), options.cds_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
248 if options.tss_groups_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
249 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" ), options.tss_groups_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
250
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
251 if options.isoforms_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
252 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.read_group_tracking" ), options.isoforms_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
253 if options.genes_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
254 shutil.copyfile( os.path.join( cuffdatadir, "genes.read_group_tracking" ), options.genes_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
255 if options.cds_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
256 shutil.copyfile( os.path.join( cuffdatadir, "cds.read_group_tracking" ), options.cds_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
257 if options.tss_groups_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
258 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.read_group_tracking" ), options.tss_groups_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
259
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
260 if options.isoforms_exp_output and os.path.exists(os.path.join( cuffdatadir, "isoform_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
261 shutil.copyfile( os.path.join( cuffdatadir, "isoform_exp.diff" ), options.isoforms_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
262 if options.genes_exp_output and os.path.exists(os.path.join( cuffdatadir, "gene_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
263 shutil.copyfile( os.path.join( cuffdatadir, "gene_exp.diff" ), options.genes_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
264 if options.cds_exp_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
265 shutil.copyfile( os.path.join( cuffdatadir, "cds_exp.diff" ), options.cds_exp_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
266 if options.tss_groups_exp_output and os.path.exists(os.path.join( cuffdatadir, "tss_group_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
267 shutil.copyfile( os.path.join( cuffdatadir, "tss_group_exp.diff" ), options.tss_groups_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
268
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
269 if options.isoforms_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
270 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.count_tracking" ), options.isoforms_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
271 if options.genes_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
272 shutil.copyfile( os.path.join( cuffdatadir, "genes.count_tracking" ), options.genes_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
273 if options.cds_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
274 shutil.copyfile( os.path.join( cuffdatadir, "cds.count_tracking" ), options.cds_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
275 if options.tss_groups_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
276 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.count_tracking" ), options.tss_groups_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
277
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
278 if options.cds_diff_output and os.path.exists(os.path.join( cuffdatadir, "cds.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
279 shutil.copyfile( os.path.join( cuffdatadir, "cds.diff" ), options.cds_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
280
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
281 if options.splicing_diff_output and os.path.exists(os.path.join( cuffdatadir, "splicing.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
282 shutil.copyfile( os.path.join( cuffdatadir, "splicing.diff" ), options.splicing_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
283 if options.promoters_diff_output and os.path.exists(os.path.join( cuffdatadir, "promoters.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
284 shutil.copyfile( os.path.join( cuffdatadir, "promoters.diff" ), options.promoters_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
285
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
286 if options.run_info_output and os.path.exists(os.path.join( cuffdatadir, "run.info" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
287 shutil.copyfile( os.path.join( cuffdatadir, "run.info" ), options.run_info_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
288 if options.read_groups_info_output and os.path.exists(os.path.join( cuffdatadir, "read_groups.info" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
289 shutil.copyfile( os.path.join( cuffdatadir, "read_groups.info" ), options.read_groups_info_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
290
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
291 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
292 stop_err( 'Error in cuffdiff:\n' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
293 if options.cummeRbund_db_output:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
294 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
295 dbFile = 'cuffData.db'
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
296 rscript = tempfile.NamedTemporaryFile( dir=tmp_output_dir,suffix='.r' ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
297 rscript_fh = open( rscript, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
298 rscript_fh.write('library(cummeRbund)\n')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
299 if options.inputA and options.ref_file:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
300 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", gtfFile = "%s", genome = "%s", rebuild = T)\n' % (cuffdatadir,dbFile,options.inputA,options.ref_file))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
301 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
302 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", rebuild = T)\n' % (cuffdatadir,dbFile))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
303 rscript_fh.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
304 cmd = ( "Rscript --vanilla %s" % rscript )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
305 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
306 tmp_stderr = open( tmp_name, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
307 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
308 #proc = subprocess.Popen( args=cmd, shell=True)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
309 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
310 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
311 if os.path.exists(os.path.join( cuffdatadir, dbFile )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
312 shutil.copyfile( os.path.join( cuffdatadir, dbFile ), options.cummeRbund_db_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
313 shutil.rmtree(os.path.join( cuffdatadir, dbFile ))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
314 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
315 stop_err( 'Error generating cummeRbund cuffData.db:\n' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
316 finally:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
317 # Clean up temp dirs
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
318 if os.path.exists( tmp_output_dir ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
319 shutil.rmtree( tmp_output_dir )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
320
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
321 if __name__=="__main__": __main__()