annotate cuffcompare_wrapper.py @ 6:8e534225baa9 draft

Uploaded
author devteam
date Fri, 19 Dec 2014 11:55:55 -0500
parents 8b22e9adae34
children 1322b73ffe44
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
2
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
3 # Supports Cuffcompare versions v1.3.0 and newer.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
4
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
6
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
7 def stop_err( msg ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
8 sys.stderr.write( '%s\n' % msg )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
9 sys.exit()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
10
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
11 def __main__():
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
12 #Parse Command Line
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
13 parser = optparse.OptionParser()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
14 parser.add_option( '-r', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
6
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
15 parser.add_option( '-R', action="store_true", dest='ignore_nonoverlap_reference', help='If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts accuracy file' )
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
16 parser.add_option( '-Q', action="store_true", dest='ignore_nonoverlap_transfrag', help='If -r was specified, this option causes cuffcompare to consider only the input transcripts that overlap any of the reference transcripts (Sp correction); Warning: this will discard all "novel" loci!)' )
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
17
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
18 parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
19
6
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
20 parser.add_option( '-M', action="store_true", dest='discard_single_exon_all', help='discard (ignore) single-exon transfrags and reference transcript')
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
21 parser.add_option( '-N', action="store_true", dest='discard_single_exon_ref', help='discard (ignore) single-exon reference transcripts')
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
22 parser.add_option( '-e', dest='max_dist_exon', help='Max. Distance for assessing exon accuracy" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100')
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
23 parser.add_option( '-d', dest='max_dist_group', help='Max.Distance for transcript grouping" help="max. distance (range) for grouping transcript start sites. Default: 100')
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
24 parser.add_option( '-F', action="store_true", dest='discard_redundant_intron_transfrags', help='Discard intron-redundant transfrags if they share the 5-prime end (if they differ only at the 3-prime end)')
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
25
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
26 # Wrapper / Galaxy options.
2
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
27 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
28 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
29
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
30 # Outputs.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
31 parser.add_option( '', '--combined-transcripts', dest='combined_transcripts' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
32
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
33 (options, args) = parser.parse_args()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
34
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
35 # output version # of tool
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
36 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
37 tmp = tempfile.NamedTemporaryFile().name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
38 tmp_stdout = open( tmp, 'wb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
39 proc = subprocess.Popen( args='cuffcompare 2>&1', shell=True, stdout=tmp_stdout )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
40 tmp_stdout.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
41 returncode = proc.wait()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
42 stdout = None
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
43 for line in open( tmp_stdout.name, 'rb' ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
44 if line.lower().find( 'cuffcompare v' ) >= 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
45 stdout = line.strip()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
46 break
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
47 if stdout:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
48 sys.stdout.write( '%s\n' % stdout )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
49 else:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
50 raise Exception
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
51 except:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
52 sys.stdout.write( 'Could not determine Cuffcompare version\n' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
53
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
54 # Set/link to sequence file.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
55 if options.use_seq_data:
2
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
56 if options.ref_file:
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
57 # Sequence data from history.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
58 # Create symbolic link to ref_file so that index will be created in working directory.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
59 seq_path = "ref.fa"
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
60 os.symlink( options.ref_file, seq_path )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
61 else:
2
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
62 if not os.path.exists( options.index ):
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
63 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
64 seq_path = options.index
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
65
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
66 # Build command.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
67
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
68 # Base.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
69 cmd = "cuffcompare -o cc_output "
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
70
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
71 # Add options.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
72 if options.ref_annotation:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
73 cmd += " -r %s " % options.ref_annotation
6
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
74 if options.ignore_nonoverlap_reference:
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
75 cmd += " -R "
6
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
76 if options.ignore_nonoverlap_transfrag:
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
77 cmd += " -Q "
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
78 if options.use_seq_data:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
79 cmd += " -s %s " % seq_path
6
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
80 if options.discard_single_exon_all:
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
81 cmd += " -M "
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
82 if options.discard_single_exon_ref:
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
83 cmd += " -N "
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
84 if options.max_dist_exon:
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
85 cmd += " -e %i " % int( options.max_dist_exon )
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
86 if options.max_dist_group:
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
87 cmd += " -d %i " % int( options.max_dist_group )
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
88 if options.discard_redundant_intron_transfrags:
8e534225baa9 Uploaded
devteam
parents: 2
diff changeset
89 cmd += " -F "
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
90 # Add input files.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
91
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
92 # Need to symlink inputs so that output files are written to temp directory.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
93 for i, arg in enumerate( args ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
94 input_file_name = "./input%i" % ( i+1 )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
95 os.symlink( arg, input_file_name )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
96 cmd += "%s " % input_file_name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
97
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
98 # Debugging.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
99 print cmd
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
100
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
101 # Run command.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
102 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
103 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
104 tmp_stderr = open( tmp_name, 'wb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
105 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
106 returncode = proc.wait()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
107 tmp_stderr.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
108
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
109 # Get stderr, allowing for case where it's very large.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
110 tmp_stderr = open( tmp_name, 'rb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
111 stderr = ''
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
112 buffsize = 1048576
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
113 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
114 while True:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
115 stderr += tmp_stderr.read( buffsize )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
116 if not stderr or len( stderr ) % buffsize != 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
117 break
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
118 except OverflowError:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
119 pass
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
120 tmp_stderr.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
121
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
122 # Error checking.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
123 if returncode != 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
124 raise Exception, stderr
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
125
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
126 # Copy outputs.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
127 shutil.copyfile( "cc_output.combined.gtf" , options.combined_transcripts )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
128
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
129 # check that there are results in the output file
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
130 cc_output_fname = "cc_output.stats"
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
131 if len( open( cc_output_fname, 'rb' ).read().strip() ) == 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
132 raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
133 except Exception, e:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
134 stop_err( 'Error running cuffcompare. ' + str( e ) )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
135
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
136 if __name__=="__main__": __main__()