cummerbund_to_tabular: cummerbund_to

author	devteam
date	Tue, 23 Dec 2014 16:01:24 -0500
parents
children	36f917aa4b60

rev	line source
0 648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	1 import os
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	2 import argparse
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	3 import sys
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	4 import string
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	5
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	6 from galaxy.model.orm import *
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	7 import logging
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	8 from galaxy import eggs
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	9 eggs.require('SQLAlchemy')
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	10 import sqlalchemy
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	11
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	12
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	13 class CummerbundParser(object):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	14
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	15 def __init__(self, opts):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	16 self.cummerbund_db = opts.filename
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	17 self.__connect_database()
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	18
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	19 def generate_file( self, table ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	20 if hasattr( self, table ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	21 with open( '%s.tabular' % table, 'w' ) as self.fh:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	22 getattr( self, table )()
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	23 else:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	24 print 'Table %s is not supported or does not exist.' % table
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	25
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	26 def __connect_database( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	27 database_connection = 'sqlite:///%s' % os.path.abspath( self.cummerbund_db )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	28 # Initialize the database connection.
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	29 engine = create_engine( database_connection )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	30 meta = MetaData( bind=engine )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	31 sa_sesssion = Session = scoped_session( sessionmaker( bind=engine, autoflush=False, autocommit=True ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	32 self.session = sa_sesssion
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	33
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	34 def __write_line(self, line):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	35 columns = []
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	36 for col in line:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	37 if isinstance( col, float ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	38 if str( col ) in [ '-inf', 'inf' ]:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	39 columns.append( str( col ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	40 elif col == int(col):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	41 columns.append( str( int( col ) ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	42 else:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	43 columns.append( str( col ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	44 elif col is None:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	45 columns.append( '-' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	46 else:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	47 columns.append( str( col ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	48 print >>self.fh, '\t'.join( columns )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	49
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	50 def __get_diff_from_table( self, table, identifier ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	51 columns = [ '${table}.${identifier}', '${table}.gene_id', 'genes.gene_short_name', 'genes.locus',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	52 '${table}.sample_1', '${table}.sample_2', '${table}.status',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	53 '${table}.value_1', '${table}.value_2', '${table}.JS_dist',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	54 '${table}.test_stat', '${table}.p_value', '${table}.q_value',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	55 '${table}.significant' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	56 query = string.Template( 'SELECT %s FROM ${table} JOIN genes on ${table}.gene_id = genes.gene_id' % ', '.join(columns) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	57 result = self.session.execute( query.safe_substitute( table=table, identifier=identifier ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	58 self.__write_line( [ 'test_id', 'gene_id', 'gene', 'locus', 'sample_1',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	59 'sample_2', 'status', 'value_1', 'value_2', 'sqrt(JS)',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	60 'test_stat', 'p_value', 'q_value', 'significant' ] )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	61 for row in result:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	62 self.__write_line( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	63
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	64 def __get_read_group_data( self, table, identifier ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	65 header = [ 'tracking_id', 'condition', 'replicate', 'raw_frags',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	66 'internal_scaled_frags', 'external_scaled_frags', 'FPKM',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	67 'effective_length', 'status' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	68 columns = [ identifier, 'sample_name', 'replicate', 'raw_frags',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	69 'internal_scaled_frags', 'external_scaled_frags', 'fpkm',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	70 'effective_length', 'status' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	71 self.__write_line( header )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	72 for row in self.session.execute( 'SELECT %s FROM %s' % ( ', '.join( columns ), table ) ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	73 self.__write_line( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	74
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	75
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	76 def __get_exp_diff( self, table, data_table, data_table_as, column ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	77 header = [ 'test_id', 'gene_id', 'gene', 'locus', 'sample_1', 'sample_2',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	78 'status', 'value_1', 'value_2', 'log2(fold_change)', 'test_stat',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	79 'p_value', 'q_value', 'significant' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	80 columns = [ '${dtas}.${column}', '${table}.gene_id', '${table}.gene_short_name', '${table}.locus',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	81 '${dtas}.sample_1', '${dtas}.sample_2', '${dtas}.status',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	82 '${dtas}.value_1', '${dtas}.value_2', '${dtas}.log2_fold_change',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	83 '${dtas}.test_stat', '${dtas}.p_value', '${dtas}.q_value',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	84 '${dtas}.significant' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	85 query = string.Template( 'SELECT %s FROM ${dtab} as ${dtas} JOIN ${table} on ${dtas}.${column} = ${table}.${column}' % ', '.join( columns ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	86 self.__write_line( header )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	87 for row in self.session.execute( query.safe_substitute( dtas=data_table_as, dtab=data_table, table=table, column=column ) ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	88 self.__write_line( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	89
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	90 def __get_per_sample_fpkm( self, identifiers, table, column ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	91 columns = []
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	92 for identifier in identifiers:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	93 samples = self.session.execute( "SELECT sample_name FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, identifier[0] ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	94 for sample in samples:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	95 sample_name = sample[0]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	96 columns.extend( [ '%s_FPKM' % sample_name,
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	97 '%s_conf_lo' % sample_name,
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	98 '%s_conf_hi' % sample_name,
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	99 '%s_status' % sample_name ] )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	100 return columns
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	101
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	102 def __get_fpkms( self, table, data_table, column ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	103 tss_columns = [ column, 'class_code', 'nearest_ref_id', 'gene_id',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	104 'gene_short_name', column, 'locus', 'length', 'coverage' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	105 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	106 'tss_id', 'locus', 'length', 'coverage' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	107 tss_groups = self.session.execute( 'SELECT %s FROM %s LIMIT 1' % ( ', '.join( tss_columns ), table ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	108 output_cols.extend( self.__get_per_sample_fpkm( identifiers=tss_groups, column=column, table=data_table ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	109 self.__write_line( output_cols )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	110 tss_groups = self.session.execute( 'SELECT %s FROM %s' % ( ', '.join( tss_columns ), table ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	111 for tss_group in tss_groups:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	112 out_data = list( tss_group )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	113 samples = self.session.execute( "SELECT fpkm, conf_hi, conf_lo, quant_status FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( data_table, column, tss_group[0] ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	114 for sample in samples:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	115 out_data.extend( list( sample ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	116 self.__write_line( out_data )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	117
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	118 def __get_count_data( self, table, column ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	119 output_cols = [ 'tracking_id' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	120 tss_groups = self.session.execute( 'SELECT %s FROM %s LIMIT 1' % ( column, table ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	121 output_cols.extend( self.__get_per_sample_count_cols( identifiers=tss_groups, table=table, column=column ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	122 self.__write_line( output_cols )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	123 self.__get_per_sample_count_data( table=table, column=column )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	124
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	125 def __get_per_sample_count_data( self, table, column ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	126 result = self.session.execute( 'SELECT DISTINCT(%s) FROM %s' % ( column, table ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	127 for row in result:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	128 isoform_id = row[0]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	129 output_data = [ isoform_id ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	130 per_sample = self.session.execute( "SELECT count, variance, uncertainty, dispersion, status FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, isoform_id ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	131 for samplerow in per_sample:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	132 output_data.extend( list( samplerow ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	133 self.__write_line( output_data )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	134
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	135 def __get_per_sample_count_cols( self, identifiers, table, column ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	136 columns = []
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	137 for identifier in identifiers:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	138 samples = self.session.execute( "SELECT sample_name FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, identifier[0] ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	139 for sample in samples:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	140 sample_name = sample[0]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	141 columns.extend( [ '%s_count' % sample_name,
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	142 '%s_count_variance' % sample_name,
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	143 '%s_count_uncertainty_var' % sample_name,
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	144 '%s_count_dispersion_var' % sample_name,
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	145 '%s_status' % sample_name ] )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	146 return columns
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	147
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	148 def splicing_diff( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	149 self.__get_diff_from_table( 'splicingDiffData', 'TSS_group_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	150
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	151 def promoters_diff( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	152 self.__get_diff_from_table( 'promoterDiffData', 'gene_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	153
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	154 def cds_diff( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	155 self.__get_diff_from_table( 'CDSDiffData', 'gene_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	156
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	157 def tss_fpkm( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	158 data_table = 'TSSData'
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	159 table = 'TSS'
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	160 column = 'TSS_group_id'
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	161 self.__get_fpkms( data_table=data_table, table=table, column=column )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	162
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	163 def isoform_fpkm( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	164 data_table = 'isoformData'
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	165 table = 'isoforms'
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	166 column = 'isoform_id'
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	167 self.__get_fpkms( data_table=data_table, table=table, column=column )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	168
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	169 def genes_fpkm( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	170 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	171 'tss_id', 'locus', 'length', 'coverage' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	172 iso_groups = self.session.execute( 'SELECT gene_id FROM genes LIMIT 1' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	173 output_cols.extend( self.__get_per_sample_fpkm( identifiers=iso_groups, column='gene_id', table='geneData' ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	174 self.__write_line( output_cols )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	175 data_columns = [ 'genes.gene_id', 'genes.class_code', 'genes.nearest_ref_id', 'genes.gene_id', 'genes.gene_short_name',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	176 'GROUP_CONCAT(TSS.TSS_group_id)', 'genes.locus', 'genes.length', 'genes.coverage' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	177 query = 'SELECT %s FROM genes JOIN TSS on TSS.gene_id = genes.gene_id GROUP BY genes.gene_id' % ', '.join( data_columns )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	178 result = self.session.execute( query )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	179 for row in result:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	180 gene_id = row[0]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	181 output_data = list( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	182 per_sample = self.session.execute( "SELECT fpkm, conf_lo, conf_hi, quant_status FROM geneData WHERE gene_id = '%s' ORDER BY sample_name ASC" % gene_id )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	183 for samplerow in per_sample:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	184 output_data.extend( list( samplerow ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	185 self.__write_line( output_data )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	186
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	187 def cds_fpkm( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	188 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	189 'tss_id', 'locus', 'length', 'coverage' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	190 iso_groups = self.session.execute( 'SELECT CDS_id FROM CDS LIMIT 1' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	191 output_cols.extend( self.__get_per_sample_fpkm( identifiers=iso_groups, column='CDS_id', table='CDSData' ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	192 self.__write_line( output_cols )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	193 data_columns = [ 'CDS_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	194 'GROUP_CONCAT(TSS_group_id)', 'locus', 'length', 'coverage' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	195 query = 'SELECT %s FROM CDS GROUP BY CDS_id' % ', '.join( data_columns )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	196 result = self.session.execute( query )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	197 for row in result:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	198 CDS_id = row[0]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	199 output_data = list( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	200 per_sample = self.session.execute( "SELECT fpkm, conf_lo, conf_hi, quant_status FROM CDSData WHERE CDS_id = '%s' ORDER BY sample_name ASC" % CDS_id )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	201 for samplerow in per_sample:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	202 output_data.extend( list( samplerow ) )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	203 self.__write_line( output_data )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	204
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	205 def tss_count_tracking( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	206 self.__get_count_data( table='TSSCount', column='TSS_group_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	207
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	208 def isoform_count( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	209 self.__get_count_data( table='isoformCount', column='isoform_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	210
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	211 def genes_count( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	212 self.__get_count_data( table='geneCount', column='gene_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	213
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	214 def cds_count( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	215 self.__get_count_data( table='CDSCount', column='CDS_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	216
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	217 def tss_group_exp( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	218 columns = [ 'TEDD.TSS_group_id', 'TSS.gene_id', 'TSS.gene_short_name', 'TSS.locus',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	219 'TEDD.sample_1', 'TEDD.sample_2', 'TEDD.status',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	220 'TEDD.value_1', 'TEDD.value_2', 'TEDD.log2_fold_change',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	221 'TEDD.test_stat', 'TEDD.p_value', 'TEDD.q_value', 'TEDD.significant' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	222 query = [ 'SELECT %s FROM TSSExpDiffData AS TEDD' % ', '.join(columns),
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	223 'JOIN TSS on TEDD.TSS_group_id = TSS.TSS_group_id' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	224 self.__write_line( [ 'test_id', 'gene_id', 'gene', 'locus',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	225 'sample_1', 'sample_2', 'status', 'value_1',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	226 'value_2', 'log2(fold_change)', 'test_stat',
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	227 'p_value', 'q_value', 'significant' ] )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	228 for row in self.session.execute( ' '.join( query ) ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	229 self.__write_line( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	230
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	231 def run_info( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	232 self.__write_line( [ 'param', 'value' ] )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	233 for row in self.session.execute( 'SELECT param, value FROM runInfo' ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	234 self.__write_line( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	235
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	236 def read_groups( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	237 self.__write_line( [ 'file', 'condition', 'replicate_num', 'total_mass', 'norm_mass', 'internal_scale', 'external_scale' ] )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	238 for row in self.session.execute( 'SELECT file, sample_name, replicate, total_mass, norm_mass, internal_scale, external_scale FROM replicates' ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	239 self.__write_line( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	240
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	241 def isoform_exp_diff( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	242 self.__get_exp_diff( table='isoforms', data_table='isoformExpDiffData', data_table_as='iED', column='isoform_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	243
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	244 def gene_exp_diff( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	245 self.__get_exp_diff( table='genes', data_table='geneExpDiffData', data_table_as='gEDD', column='gene_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	246
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	247 def cds_exp_diff( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	248 self.__get_exp_diff( table='CDS', data_table='CDSExpDiffData', data_table_as='CED', column='CDS_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	249
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	250 def tss_rg( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	251 self.__get_read_group_data( table='TSSReplicateData', identifier='TSS_group_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	252
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	253 def isoform_rg( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	254 self.__get_read_group_data( table='isoformReplicateData', identifier='isoform_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	255
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	256 def gene_rg( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	257 self.__get_read_group_data( table='geneReplicateData', identifier='gene_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	258
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	259 def cds_rg( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	260 self.__get_read_group_data( table='CDSReplicateData', identifier='CDS_id' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	261
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	262 def var_model( self ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	263 header = [ 'condition', 'locus', 'compatible_count_mean', 'compatible_count_var', 'total_count_mean', 'total_count_var', 'fitted_var' ]
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	264 self.__write_line( header )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	265 for row in self.session.execute( 'SELECT %s FROM varModel' % ', '.join( header ) ):
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	266 self.__write_line( row )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	267
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	268 if __name__ == '__main__':
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	269 parser = argparse.ArgumentParser()
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	270 parser.add_argument( '--file', dest='filename' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	271 parser.add_argument( '--tables', dest='tables', action='append' )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	272 opts = parser.parse_args()
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	273 cb = CummerbundParser( opts )
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	274 for table in opts.tables:
648c27c78eed Initial commit with version 1.0.0 of the tool. devteam parents: diff changeset	275 cb.generate_file( table )

0

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

1 import os

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

2 import argparse

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

3 import sys

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

4 import string

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

5

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

6 from galaxy.model.orm import *

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

7 import logging

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

8 from galaxy import eggs

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

9 eggs.require('SQLAlchemy')

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

10 import sqlalchemy

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

11

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

12

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

13 class CummerbundParser(object):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

14

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

15 def __init__(self, opts):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

16 self.cummerbund_db = opts.filename

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

17 self.__connect_database()

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

18

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

19 def generate_file( self, table ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

20 if hasattr( self, table ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

21 with open( '%s.tabular' % table, 'w' ) as self.fh:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

22 getattr( self, table )()

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

23 else:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

24 print 'Table %s is not supported or does not exist.' % table

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

25

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

26 def __connect_database( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

27 database_connection = 'sqlite:///%s' % os.path.abspath( self.cummerbund_db )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

28 # Initialize the database connection.

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

29 engine = create_engine( database_connection )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

30 meta = MetaData( bind=engine )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

31 sa_sesssion = Session = scoped_session( sessionmaker( bind=engine, autoflush=False, autocommit=True ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

32 self.session = sa_sesssion

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

33

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

34 def __write_line(self, line):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

35 columns = []

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

36 for col in line:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

37 if isinstance( col, float ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

38 if str( col ) in [ '-inf', 'inf' ]:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

39 columns.append( str( col ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

40 elif col == int(col):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

41 columns.append( str( int( col ) ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

42 else:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

43 columns.append( str( col ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

44 elif col is None:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

45 columns.append( '-' )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

46 else:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

47 columns.append( str( col ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

48 print >>self.fh, '\t'.join( columns )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

49

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

50 def __get_diff_from_table( self, table, identifier ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

51 columns = [ '${table}.${identifier}', '${table}.gene_id', 'genes.gene_short_name', 'genes.locus',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

52 '${table}.sample_1', '${table}.sample_2', '${table}.status',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

53 '${table}.value_1', '${table}.value_2', '${table}.JS_dist',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

54 '${table}.test_stat', '${table}.p_value', '${table}.q_value',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

55 '${table}.significant' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

56 query = string.Template( 'SELECT %s FROM ${table} JOIN genes on ${table}.gene_id = genes.gene_id' % ', '.join(columns) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

57 result = self.session.execute( query.safe_substitute( table=table, identifier=identifier ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

58 self.__write_line( [ 'test_id', 'gene_id', 'gene', 'locus', 'sample_1',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

59 'sample_2', 'status', 'value_1', 'value_2', 'sqrt(JS)',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

60 'test_stat', 'p_value', 'q_value', 'significant' ] )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

61 for row in result:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

62 self.__write_line( row )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

63

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

64 def __get_read_group_data( self, table, identifier ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

65 header = [ 'tracking_id', 'condition', 'replicate', 'raw_frags',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

66 'internal_scaled_frags', 'external_scaled_frags', 'FPKM',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

67 'effective_length', 'status' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

68 columns = [ identifier, 'sample_name', 'replicate', 'raw_frags',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

69 'internal_scaled_frags', 'external_scaled_frags', 'fpkm',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

70 'effective_length', 'status' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

71 self.__write_line( header )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

72 for row in self.session.execute( 'SELECT %s FROM %s' % ( ', '.join( columns ), table ) ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

73 self.__write_line( row )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

74

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

75

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

76 def __get_exp_diff( self, table, data_table, data_table_as, column ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

77 header = [ 'test_id', 'gene_id', 'gene', 'locus', 'sample_1', 'sample_2',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

78 'status', 'value_1', 'value_2', 'log2(fold_change)', 'test_stat',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

79 'p_value', 'q_value', 'significant' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

80 columns = [ '${dtas}.${column}', '${table}.gene_id', '${table}.gene_short_name', '${table}.locus',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

81 '${dtas}.sample_1', '${dtas}.sample_2', '${dtas}.status',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

82 '${dtas}.value_1', '${dtas}.value_2', '${dtas}.log2_fold_change',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

83 '${dtas}.test_stat', '${dtas}.p_value', '${dtas}.q_value',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

84 '${dtas}.significant' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

85 query = string.Template( 'SELECT %s FROM ${dtab} as ${dtas} JOIN ${table} on ${dtas}.${column} = ${table}.${column}' % ', '.join( columns ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

86 self.__write_line( header )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

87 for row in self.session.execute( query.safe_substitute( dtas=data_table_as, dtab=data_table, table=table, column=column ) ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

88 self.__write_line( row )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

89

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

90 def __get_per_sample_fpkm( self, identifiers, table, column ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

91 columns = []

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

92 for identifier in identifiers:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

93 samples = self.session.execute( "SELECT sample_name FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, identifier[0] ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

94 for sample in samples:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

95 sample_name = sample[0]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

96 columns.extend( [ '%s_FPKM' % sample_name,

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

97 '%s_conf_lo' % sample_name,

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

98 '%s_conf_hi' % sample_name,

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

99 '%s_status' % sample_name ] )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

100 return columns

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

101

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

102 def __get_fpkms( self, table, data_table, column ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

103 tss_columns = [ column, 'class_code', 'nearest_ref_id', 'gene_id',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

104 'gene_short_name', column, 'locus', 'length', 'coverage' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

105 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

106 'tss_id', 'locus', 'length', 'coverage' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

107 tss_groups = self.session.execute( 'SELECT %s FROM %s LIMIT 1' % ( ', '.join( tss_columns ), table ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

108 output_cols.extend( self.__get_per_sample_fpkm( identifiers=tss_groups, column=column, table=data_table ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

109 self.__write_line( output_cols )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

110 tss_groups = self.session.execute( 'SELECT %s FROM %s' % ( ', '.join( tss_columns ), table ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

111 for tss_group in tss_groups:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

112 out_data = list( tss_group )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

113 samples = self.session.execute( "SELECT fpkm, conf_hi, conf_lo, quant_status FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( data_table, column, tss_group[0] ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

114 for sample in samples:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

115 out_data.extend( list( sample ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

116 self.__write_line( out_data )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

117

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

118 def __get_count_data( self, table, column ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

119 output_cols = [ 'tracking_id' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

120 tss_groups = self.session.execute( 'SELECT %s FROM %s LIMIT 1' % ( column, table ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

121 output_cols.extend( self.__get_per_sample_count_cols( identifiers=tss_groups, table=table, column=column ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

122 self.__write_line( output_cols )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

123 self.__get_per_sample_count_data( table=table, column=column )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

124

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

125 def __get_per_sample_count_data( self, table, column ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

126 result = self.session.execute( 'SELECT DISTINCT(%s) FROM %s' % ( column, table ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

127 for row in result:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

128 isoform_id = row[0]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

129 output_data = [ isoform_id ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

130 per_sample = self.session.execute( "SELECT count, variance, uncertainty, dispersion, status FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, isoform_id ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

131 for samplerow in per_sample:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

132 output_data.extend( list( samplerow ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

133 self.__write_line( output_data )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

134

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

135 def __get_per_sample_count_cols( self, identifiers, table, column ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

136 columns = []

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

137 for identifier in identifiers:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

138 samples = self.session.execute( "SELECT sample_name FROM %s WHERE %s = '%s' ORDER BY sample_name ASC" % ( table, column, identifier[0] ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

139 for sample in samples:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

140 sample_name = sample[0]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

141 columns.extend( [ '%s_count' % sample_name,

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

142 '%s_count_variance' % sample_name,

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

143 '%s_count_uncertainty_var' % sample_name,

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

144 '%s_count_dispersion_var' % sample_name,

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

145 '%s_status' % sample_name ] )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

146 return columns

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

147

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

148 def splicing_diff( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

149 self.__get_diff_from_table( 'splicingDiffData', 'TSS_group_id' )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

150

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

151 def promoters_diff( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

152 self.__get_diff_from_table( 'promoterDiffData', 'gene_id' )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

153

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

154 def cds_diff( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

155 self.__get_diff_from_table( 'CDSDiffData', 'gene_id' )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

156

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

157 def tss_fpkm( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

158 data_table = 'TSSData'

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

159 table = 'TSS'

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

160 column = 'TSS_group_id'

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

161 self.__get_fpkms( data_table=data_table, table=table, column=column )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

162

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

163 def isoform_fpkm( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

164 data_table = 'isoformData'

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

165 table = 'isoforms'

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

166 column = 'isoform_id'

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

167 self.__get_fpkms( data_table=data_table, table=table, column=column )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

168

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

169 def genes_fpkm( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

170 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

171 'tss_id', 'locus', 'length', 'coverage' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

172 iso_groups = self.session.execute( 'SELECT gene_id FROM genes LIMIT 1' )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

173 output_cols.extend( self.__get_per_sample_fpkm( identifiers=iso_groups, column='gene_id', table='geneData' ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

174 self.__write_line( output_cols )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

175 data_columns = [ 'genes.gene_id', 'genes.class_code', 'genes.nearest_ref_id', 'genes.gene_id', 'genes.gene_short_name',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

176 'GROUP_CONCAT(TSS.TSS_group_id)', 'genes.locus', 'genes.length', 'genes.coverage' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

177 query = 'SELECT %s FROM genes JOIN TSS on TSS.gene_id = genes.gene_id GROUP BY genes.gene_id' % ', '.join( data_columns )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

178 result = self.session.execute( query )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

179 for row in result:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

180 gene_id = row[0]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

181 output_data = list( row )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

182 per_sample = self.session.execute( "SELECT fpkm, conf_lo, conf_hi, quant_status FROM geneData WHERE gene_id = '%s' ORDER BY sample_name ASC" % gene_id )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

183 for samplerow in per_sample:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

184 output_data.extend( list( samplerow ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

185 self.__write_line( output_data )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

186

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

187 def cds_fpkm( self ):

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

188 output_cols = [ 'tracking_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

189 'tss_id', 'locus', 'length', 'coverage' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

190 iso_groups = self.session.execute( 'SELECT CDS_id FROM CDS LIMIT 1' )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

191 output_cols.extend( self.__get_per_sample_fpkm( identifiers=iso_groups, column='CDS_id', table='CDSData' ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

192 self.__write_line( output_cols )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

193 data_columns = [ 'CDS_id', 'class_code', 'nearest_ref_id', 'gene_id', 'gene_short_name',

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

194 'GROUP_CONCAT(TSS_group_id)', 'locus', 'length', 'coverage' ]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

195 query = 'SELECT %s FROM CDS GROUP BY CDS_id' % ', '.join( data_columns )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

196 result = self.session.execute( query )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

197 for row in result:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

198 CDS_id = row[0]

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

199 output_data = list( row )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

200 per_sample = self.session.execute( "SELECT fpkm, conf_lo, conf_hi, quant_status FROM CDSData WHERE CDS_id = '%s' ORDER BY sample_name ASC" % CDS_id )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

201 for samplerow in per_sample:

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam

parents:

diff changeset

202 output_data.extend( list( samplerow ) )

648c27c78eed Initial commit with version 1.0.0 of the tool.

devteam