Mercurial > repos > louise > export_to_fastq
diff export_to_fastq/README @ 0:97792524cc9c default tip
Migrated tool version 0.1 from old tool shed archive to new tool shed repository
author | louise |
---|---|
date | Tue, 07 Jun 2011 17:21:49 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export_to_fastq/README Tue Jun 07 17:21:49 2011 -0400 @@ -0,0 +1,112 @@ +Here is the needed class to handle Solexa Export file type. + +The tool and class were written by Nicolas Delhomme (delhomme@embl.de). +Released under the GNU GPL: http://www.opensource.org/licenses/gpl-3.0.html + +The threshold parameter was commented but it can very well be used. Just uncomment the commented code and comment the current command tag in the XML file. + +If you want to apply this file as a patch, just run: +patch <path_to_galaxy>/lib/galaxy/datatypes/tabular.py README +--- + +diff -r 50e249442c5a lib/galaxy/datatypes/tabular.py +--- a/lib/galaxy/datatypes/tabular.py Thu Apr 07 08:39:07 2011 -0400 ++++ b/lib/galaxy/datatypes/tabular.py Tue May 24 14:16:12 2011 +0200 +@@ -504,3 +504,95 @@ + + def get_track_type( self ): + return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"} ++ ++class Export( Tabular ): ++ file_ext = 'export' ++ def __init__(self, **kwd): ++ """Initialize export datatype""" ++ Tabular.__init__( self, **kwd ) ++ self.column_names = ['MACHINE', 'RUN', 'LANE', 'TILE', ++ 'X', 'Y', 'MULTIPLEX', 'PAIRID', ++ 'READ', 'QUALITY', 'CHROMOSOME', 'CONTIG', ++ 'POSITION','STRAND','ALN_QUAL','CHASTITY' ++ ] ++ ++ def make_html_table( self, dataset, skipchars=[] ): ++ """Create HTML table, used for displaying peek""" ++ out = ['<table cellspacing="0" cellpadding="3">'] ++ try: ++ # Generate column header ++ out.append( '<tr>' ) ++ for i, name in enumerate( self.column_names ): ++ out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) ++ # This data type requires at least 16 columns in the data ++ if dataset.metadata.columns - len( self.column_names ) > 0: ++ for i in range( len( self.column_names ), dataset.metadata.columns ): ++ out.append( '<th>%s</th>' % str( i+1 ) ) ++ out.append( '</tr>' ) ++ out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) ++ out.append( '</table>' ) ++ out = "".join( out ) ++ except Exception, exc: ++ out = "Can't create peek %s" % exc ++ return out ++ ++ def set_meta( self, dataset, overwrite = True, **kwd ): ++ ++ #we'll arbitrarily only use the first 100 data lines in the export file to calculate tabular attributes (column types) ++ #optional metadata values set in Tabular class will be 'None' ++ Tabular.set_meta( self, dataset, overwrite = overwrite, max_data_lines = 100 ) ++ ++ def sniff( self, filename ): ++ """ ++ Determines whether the file is in Export format ++ ++ A file in Export format consists of lines of tab-separated data. ++ It does not have any header ++ ++ Rules for sniffing as True: ++ There must be 16 columns of data on each line ++ Columns 2 to 8 must be numbers ++ Column 16 should be either Y or N ++ We will only check that up to the first 5 alignments are correctly formatted. ++ ++ """ ++ try: ++ fh = open( filename ) ++ count = 0 ++ while True: ++ line = fh.readline() ++ line = line.strip() ++ if not line: ++ break #EOF ++ if line: ++ if line[0] != '@': ++ linePieces = line.split('\t') ++ if len(linePieces) != 22: ++ return False ++ try: ++ check = int(linePieces[1]) ++ check = int(linePieces[2]) ++ check = int(linePieces[3]) ++ check = int(linePieces[4]) ++ check = int(linePieces[5]) ++ check = int(linePieces[6]) ++ check = int(linePieces[7]) ++ assert linePieces[21] in [ 'Y', 'N' ] ++ except ValueError: ++ return False ++ count += 1 ++ if count == 5: ++ return True ++ fh.close() ++ if count < 5 and count > 0: ++ return True ++ except: ++ pass ++ return False ++ ++class BarcodeSet( Tabular ): ++ file_ext = 'bs' ++ column_names = ['SAMPLE', 'BARCODE'] ++ ++ def sniff( self, filename ): ++ return False + +