annotate cuffmerge_wrapper.py @ 2:5b285b6e4ee3

Update to the new data table specification.
author Dave Bouvier <dave@bx.psu.edu>
date Wed, 04 Dec 2013 13:24:59 -0500
parents dbbd37e013aa
children b6e3849293b1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
2
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
3 # Supports Cuffmerge versions 1.3 and newer.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
4
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
6
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
7 def stop_err( msg ):
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
8 sys.stderr.write( '%s\n' % msg )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
9 sys.exit()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
10
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
11 def __main__():
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
12 #Parse Command Line
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
13 parser = optparse.OptionParser()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
14 parser.add_option( '-g', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
15 parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffmerge to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
16 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
17
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
18
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
19 # Wrapper / Galaxy options.
2
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
20 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
21 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
22
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
23 # Outputs.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
24 parser.add_option( '', '--merged-transcripts', dest='merged_transcripts' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
25
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
26 (options, args) = parser.parse_args()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
27
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
28 # output version # of tool
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
29 try:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
30 tmp = tempfile.NamedTemporaryFile().name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
31 tmp_stdout = open( tmp, 'wb' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
32 proc = subprocess.Popen( args='cuffmerge -v 2>&1', shell=True, stdout=tmp_stdout )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
33 tmp_stdout.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
34 returncode = proc.wait()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
35 stdout = None
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
36 for line in open( tmp_stdout.name, 'rb' ):
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
37 if line.lower().find( 'merge_cuff_asms v' ) >= 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
38 stdout = line.strip()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
39 break
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
40 if stdout:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
41 sys.stdout.write( '%s\n' % stdout )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
42 else:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
43 raise Exception
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
44 except:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
45 sys.stdout.write( 'Could not determine Cuffmerge version\n' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
46
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
47 # Set/link to sequence file.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
48 if options.use_seq_data:
2
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
49 if options.ref_file:
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
50 # Sequence data from history.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
51 # Create symbolic link to ref_file so that index will be created in working directory.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
52 seq_path = "ref.fa"
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
53 os.symlink( options.ref_file, seq_path )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
54 else:
2
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
55 if not os.path.exists( options.index ):
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
56 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
57 seq_path = options.index
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
58
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
59 # Build command.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
60
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
61 # Base.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
62 cmd = "cuffmerge -o cm_output "
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
63
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
64 # Add options.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
65 if options.num_threads:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
66 cmd += ( " -p %i " % int ( options.num_threads ) )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
67 if options.ref_annotation:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
68 cmd += " -g %s " % options.ref_annotation
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
69 if options.use_seq_data:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
70 cmd += " -s %s " % seq_path
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
71
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
72 # Add input files to a file.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
73 inputs_file_name = tempfile.NamedTemporaryFile( dir="." ).name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
74 inputs_file = open( inputs_file_name, 'w' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
75 for arg in args:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
76 inputs_file.write( arg + "\n" )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
77 inputs_file.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
78 cmd += inputs_file_name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
79
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
80 # Debugging.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
81 print cmd
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
82
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
83 # Run command.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
84 try:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
85 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
86 tmp_stderr = open( tmp_name, 'wb' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
87 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
88 returncode = proc.wait()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
89 tmp_stderr.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
90
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
91 # Get stderr, allowing for case where it's very large.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
92 tmp_stderr = open( tmp_name, 'rb' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
93 stderr = ''
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
94 buffsize = 1048576
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
95 try:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
96 while True:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
97 stderr += tmp_stderr.read( buffsize )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
98 if not stderr or len( stderr ) % buffsize != 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
99 break
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
100 except OverflowError:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
101 pass
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
102 tmp_stderr.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
103
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
104 # Error checking.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
105 if returncode != 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
106 raise Exception, stderr
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
107
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
108 if len( open( "cm_output/merged.gtf", 'rb' ).read().strip() ) == 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
109 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
110
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
111 # Copy outputs.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
112 shutil.copyfile( "cm_output/merged.gtf" , options.merged_transcripts )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
113
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
114 except Exception, e:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
115 stop_err( 'Error running cuffmerge. ' + str( e ) )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
116
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
117 if __name__=="__main__": __main__()