Mercurial > repos > louise > export_to_fastq
comparison export_to_fastq/README @ 0:97792524cc9c default tip
Migrated tool version 0.1 from old tool shed archive to new tool shed repository
author | louise |
---|---|
date | Tue, 07 Jun 2011 17:21:49 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:97792524cc9c |
---|---|
1 Here is the needed class to handle Solexa Export file type. | |
2 | |
3 The tool and class were written by Nicolas Delhomme (delhomme@embl.de). | |
4 Released under the GNU GPL: http://www.opensource.org/licenses/gpl-3.0.html | |
5 | |
6 The threshold parameter was commented but it can very well be used. Just uncomment the commented code and comment the current command tag in the XML file. | |
7 | |
8 If you want to apply this file as a patch, just run: | |
9 patch <path_to_galaxy>/lib/galaxy/datatypes/tabular.py README | |
10 --- | |
11 | |
12 diff -r 50e249442c5a lib/galaxy/datatypes/tabular.py | |
13 --- a/lib/galaxy/datatypes/tabular.py Thu Apr 07 08:39:07 2011 -0400 | |
14 +++ b/lib/galaxy/datatypes/tabular.py Tue May 24 14:16:12 2011 +0200 | |
15 @@ -504,3 +504,95 @@ | |
16 | |
17 def get_track_type( self ): | |
18 return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"} | |
19 + | |
20 +class Export( Tabular ): | |
21 + file_ext = 'export' | |
22 + def __init__(self, **kwd): | |
23 + """Initialize export datatype""" | |
24 + Tabular.__init__( self, **kwd ) | |
25 + self.column_names = ['MACHINE', 'RUN', 'LANE', 'TILE', | |
26 + 'X', 'Y', 'MULTIPLEX', 'PAIRID', | |
27 + 'READ', 'QUALITY', 'CHROMOSOME', 'CONTIG', | |
28 + 'POSITION','STRAND','ALN_QUAL','CHASTITY' | |
29 + ] | |
30 + | |
31 + def make_html_table( self, dataset, skipchars=[] ): | |
32 + """Create HTML table, used for displaying peek""" | |
33 + out = ['<table cellspacing="0" cellpadding="3">'] | |
34 + try: | |
35 + # Generate column header | |
36 + out.append( '<tr>' ) | |
37 + for i, name in enumerate( self.column_names ): | |
38 + out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) | |
39 + # This data type requires at least 16 columns in the data | |
40 + if dataset.metadata.columns - len( self.column_names ) > 0: | |
41 + for i in range( len( self.column_names ), dataset.metadata.columns ): | |
42 + out.append( '<th>%s</th>' % str( i+1 ) ) | |
43 + out.append( '</tr>' ) | |
44 + out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) | |
45 + out.append( '</table>' ) | |
46 + out = "".join( out ) | |
47 + except Exception, exc: | |
48 + out = "Can't create peek %s" % exc | |
49 + return out | |
50 + | |
51 + def set_meta( self, dataset, overwrite = True, **kwd ): | |
52 + | |
53 + #we'll arbitrarily only use the first 100 data lines in the export file to calculate tabular attributes (column types) | |
54 + #optional metadata values set in Tabular class will be 'None' | |
55 + Tabular.set_meta( self, dataset, overwrite = overwrite, max_data_lines = 100 ) | |
56 + | |
57 + def sniff( self, filename ): | |
58 + """ | |
59 + Determines whether the file is in Export format | |
60 + | |
61 + A file in Export format consists of lines of tab-separated data. | |
62 + It does not have any header | |
63 + | |
64 + Rules for sniffing as True: | |
65 + There must be 16 columns of data on each line | |
66 + Columns 2 to 8 must be numbers | |
67 + Column 16 should be either Y or N | |
68 + We will only check that up to the first 5 alignments are correctly formatted. | |
69 + | |
70 + """ | |
71 + try: | |
72 + fh = open( filename ) | |
73 + count = 0 | |
74 + while True: | |
75 + line = fh.readline() | |
76 + line = line.strip() | |
77 + if not line: | |
78 + break #EOF | |
79 + if line: | |
80 + if line[0] != '@': | |
81 + linePieces = line.split('\t') | |
82 + if len(linePieces) != 22: | |
83 + return False | |
84 + try: | |
85 + check = int(linePieces[1]) | |
86 + check = int(linePieces[2]) | |
87 + check = int(linePieces[3]) | |
88 + check = int(linePieces[4]) | |
89 + check = int(linePieces[5]) | |
90 + check = int(linePieces[6]) | |
91 + check = int(linePieces[7]) | |
92 + assert linePieces[21] in [ 'Y', 'N' ] | |
93 + except ValueError: | |
94 + return False | |
95 + count += 1 | |
96 + if count == 5: | |
97 + return True | |
98 + fh.close() | |
99 + if count < 5 and count > 0: | |
100 + return True | |
101 + except: | |
102 + pass | |
103 + return False | |
104 + | |
105 +class BarcodeSet( Tabular ): | |
106 + file_ext = 'bs' | |
107 + column_names = ['SAMPLE', 'BARCODE'] | |
108 + | |
109 + def sniff( self, filename ): | |
110 + return False | |
111 | |
112 |