view export_to_fastq/README @ 0:97792524cc9c default tip

Migrated tool version 0.1 from old tool shed archive to new tool shed repository
author louise
date Tue, 07 Jun 2011 17:21:49 -0400
parents
children
line wrap: on
line source

Here is the needed class to handle Solexa Export file type.

The tool and class were written by Nicolas Delhomme (delhomme@embl.de).
Released under the GNU GPL: http://www.opensource.org/licenses/gpl-3.0.html

The threshold parameter was commented but it can very well be used. Just uncomment the commented code and comment the current command tag in the XML file.

If you want to apply this file as a patch, just run:
patch <path_to_galaxy>/lib/galaxy/datatypes/tabular.py README
---

diff -r 50e249442c5a lib/galaxy/datatypes/tabular.py
--- a/lib/galaxy/datatypes/tabular.py	Thu Apr 07 08:39:07 2011 -0400
+++ b/lib/galaxy/datatypes/tabular.py	Tue May 24 14:16:12 2011 +0200
@@ -504,3 +504,95 @@
         
     def get_track_type( self ):
         return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"}
+
+class Export( Tabular ):
+    file_ext = 'export'
+    def __init__(self, **kwd):
+        """Initialize export datatype"""
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['MACHINE', 'RUN', 'LANE', 'TILE',
+                             'X', 'Y', 'MULTIPLEX', 'PAIRID',
+                             'READ', 'QUALITY', 'CHROMOSOME', 'CONTIG',
+                             'POSITION','STRAND','ALN_QUAL','CHASTITY'
+                             ]
+    
+    def make_html_table( self, dataset, skipchars=[] ):
+        """Create HTML table, used for displaying peek"""
+        out = ['<table cellspacing="0" cellpadding="3">']
+        try:
+            # Generate column header
+            out.append( '<tr>' )
+            for i, name in enumerate( self.column_names ):
+                out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) )
+            # This data type requires at least 16 columns in the data
+            if dataset.metadata.columns - len( self.column_names ) > 0:
+                for i in range( len( self.column_names ), dataset.metadata.columns ):
+                    out.append( '<th>%s</th>' % str( i+1 ) )
+                out.append( '</tr>' )
+            out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) )
+            out.append( '</table>' )
+            out = "".join( out )
+        except Exception, exc:
+            out = "Can't create peek %s" % exc
+        return out
+
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        
+        #we'll arbitrarily only use the first 100 data lines in the export file to calculate tabular attributes (column types)
+        #optional metadata values set in Tabular class will be 'None'
+        Tabular.set_meta( self, dataset, overwrite = overwrite, max_data_lines = 100 )
+    
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in Export format
+        
+        A file in Export format consists of lines of tab-separated data.
+        It does not have any header
+        
+        Rules for sniffing as True:
+            There must be 16 columns of data on each line
+            Columns 2 to 8 must be numbers
+            Column 16 should be either Y or N
+            We will only check that up to the first 5 alignments are correctly formatted.
+        
+        """
+        try:
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                line = line.strip()
+                if not line:
+                    break #EOF
+                if line: 
+                    if line[0] != '@':
+                        linePieces = line.split('\t')
+                        if len(linePieces) != 22:
+                            return False
+                        try:
+                            check = int(linePieces[1])
+                            check = int(linePieces[2])
+                            check = int(linePieces[3])
+                            check = int(linePieces[4])
+                            check = int(linePieces[5])
+                            check = int(linePieces[6])
+                            check = int(linePieces[7])
+                            assert linePieces[21] in [ 'Y', 'N' ]
+                        except ValueError:
+                            return False
+                        count += 1
+                        if count == 5:
+                            return True
+            fh.close()
+            if count < 5 and count > 0:
+                return True
+        except:
+            pass
+        return False
+
+class BarcodeSet( Tabular ):
+    file_ext = 'bs'
+    column_names = ['SAMPLE', 'BARCODE']
+      
+    def sniff( self, filename ):
+        return False