Mercurial > repos > louise > export_to_fastq

diff export_to_fastq/README @ 0:97792524cc9c default tip
Migrated tool version 0.1 from old tool shed archive to new tool shed repository
author: louise
date: Tue, 07 Jun 2011 17:21:49 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_to_fastq/README	Tue Jun 07 17:21:49 2011 -0400
@@ -0,0 +1,112 @@
+Here is the needed class to handle Solexa Export file type.
+
+The tool and class were written by Nicolas Delhomme (delhomme@embl.de).
+Released under the GNU GPL: http://www.opensource.org/licenses/gpl-3.0.html
+
+The threshold parameter was commented but it can very well be used. Just uncomment the commented code and comment the current command tag in the XML file.
+
+If you want to apply this file as a patch, just run:
+patch <path_to_galaxy>/lib/galaxy/datatypes/tabular.py README
+---
+
+diff -r 50e249442c5a lib/galaxy/datatypes/tabular.py
+--- a/lib/galaxy/datatypes/tabular.py	Thu Apr 07 08:39:07 2011 -0400
++++ b/lib/galaxy/datatypes/tabular.py	Tue May 24 14:16:12 2011 +0200
+@@ -504,3 +504,95 @@
+         
+     def get_track_type( self ):
+         return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"}
++
++class Export( Tabular ):
++    file_ext = 'export'
++    def __init__(self, **kwd):
++        """Initialize export datatype"""
++        Tabular.__init__( self, **kwd )
++        self.column_names = ['MACHINE', 'RUN', 'LANE', 'TILE',
++                             'X', 'Y', 'MULTIPLEX', 'PAIRID',
++                             'READ', 'QUALITY', 'CHROMOSOME', 'CONTIG',
++                             'POSITION','STRAND','ALN_QUAL','CHASTITY'
++                             ]
++    
++    def make_html_table( self, dataset, skipchars=[] ):
++        """Create HTML table, used for displaying peek"""
++        out = ['<table cellspacing="0" cellpadding="3">']
++        try:
++            # Generate column header
++            out.append( '<tr>' )
++            for i, name in enumerate( self.column_names ):
++                out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) )
++            # This data type requires at least 16 columns in the data
++            if dataset.metadata.columns - len( self.column_names ) > 0:
++                for i in range( len( self.column_names ), dataset.metadata.columns ):
++                    out.append( '<th>%s</th>' % str( i+1 ) )
++                out.append( '</tr>' )
++            out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) )
++            out.append( '</table>' )
++            out = "".join( out )
++        except Exception, exc:
++            out = "Can't create peek %s" % exc
++        return out
++
++    def set_meta( self, dataset, overwrite = True, **kwd ):
++        
++        #we'll arbitrarily only use the first 100 data lines in the export file to calculate tabular attributes (column types)
++        #optional metadata values set in Tabular class will be 'None'
++        Tabular.set_meta( self, dataset, overwrite = overwrite, max_data_lines = 100 )
++    
++    def sniff( self, filename ):
++        """
++        Determines whether the file is in Export format
++        
++        A file in Export format consists of lines of tab-separated data.
++        It does not have any header
++        
++        Rules for sniffing as True:
++            There must be 16 columns of data on each line
++            Columns 2 to 8 must be numbers
++            Column 16 should be either Y or N
++            We will only check that up to the first 5 alignments are correctly formatted.
++        
++        """
++        try:
++            fh = open( filename )
++            count = 0
++            while True:
++                line = fh.readline()
++                line = line.strip()
++                if not line:
++                    break #EOF
++                if line: 
++                    if line[0] != '@':
++                        linePieces = line.split('\t')
++                        if len(linePieces) != 22:
++                            return False
++                        try:
++                            check = int(linePieces[1])
++                            check = int(linePieces[2])
++                            check = int(linePieces[3])
++                            check = int(linePieces[4])
++                            check = int(linePieces[5])
++                            check = int(linePieces[6])
++                            check = int(linePieces[7])
++                            assert linePieces[21] in [ 'Y', 'N' ]
++                        except ValueError:
++                            return False
++                        count += 1
++                        if count == 5:
++                            return True
++            fh.close()
++            if count < 5 and count > 0:
++                return True
++        except:
++            pass
++        return False
++
++class BarcodeSet( Tabular ):
++    file_ext = 'bs'
++    column_names = ['SAMPLE', 'BARCODE']
++      
++    def sniff( self, filename ):
++        return False
+
+
author	louise
date	Tue, 07 Jun 2011 17:21:49 -0400
parents
children