Mercurial > repos > edward-kirton > phrap
diff phrap/jgi_assembly.py @ 0:f9e4e6fe0e73 default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | edward-kirton |
---|---|
date | Tue, 07 Jun 2011 17:41:56 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phrap/jgi_assembly.py Tue Jun 07 17:41:56 2011 -0400 @@ -0,0 +1,117 @@ +""" +Assembly classes +""" + +import data +import logging +import re +import string +from cgi import escape +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata +import galaxy.model +from galaxy import util +from sniff import * + +log = logging.getLogger(__name__) + +class Assembly( data.Text ): + """Class describing an assembly""" + + """Add metadata elements""" + MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 ) + MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 ) + + +class Ace(Assembly): + """Class describing an assembly Ace file""" + + file_ext = "ace" + +# def init_meta( self, dataset, copy_from=None ): +# Assembly.init_meta( self, dataset, copy_from=copy_from ) + + def set_meta( self, dataset, overwrite=True, **kwd ): + """ + Set the number of assembled contigs and read sequences and the number of data lines in dataset. + """ + contigs = 0 + reads = 0 + for line in file( dataset.file_name ): + line = line.strip() + if line and line.startswith( '#' ): + # Don't count comment lines + continue + if line and line.startswith( 'CO' ): + contigs += 1 + if line and line.startswith( 'RD' ): + reads += 1 + dataset.metadata.contigs = contigs + dataset.metadata.reads = reads + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + if dataset.metadata.contigs: + dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) ) + else: + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is in ace format + + An ace file contains these sections + AS \d+ \d+ + + CO \S+ \d+ \d+ \d+ \w + [atcgATCGN\*]+ + + BQ + [\d\s]+ + + AF \S+ [CU] \-?\d+ + + BS \d+ \d+ \S+ + + RD \S+ \d+ \d+ \d+ + [ATCGN\*]+ + + QA \d+ \d+ \d+ \d+ + DS .* + + Currently we only check if file begins with AS + + >>> fname = get_test_fname( 'genome.ace' ) + >>> Ace().sniff( fname ) + True + >>> fname = get_test_fname( 'genome.fasta' ) + >>> Ace().sniff( fname ) + False + """ + + try: + fh = open( filename ) + line = fh.readline() + line = line.strip() + if line: + if line.startswith( 'AS ' ): + fh.close() + return True + fh.close() + return False + except: + pass + return False + +class Velveth(Assembly): + composite_type='basic' + file_ext = "txt" + + def __init__(self,**kwd): + Assembly.__init__(self,**kwd) + self.add_composite_file('Roadmap') + self.add_composite_file('Sequences')