Mercurial > repos > qfab > usearch_map_reads_to_otus
view usearch_map_reads_to_otu/scripts/parseFasta.py @ 0:c10d09023766 draft
Uploaded
author | qfab |
---|---|
date | Thu, 29 May 2014 00:51:18 -0400 |
parents | |
children |
line wrap: on
line source
# parse fasta - id & sequence class Seq(object): def __init__(self,key,records_dict): self.key = key self.records = records_dict def tostring(self): s = self.records[self.key]['sequence'] return s.replace('\n','') class SeqRecord(object): def __init__(self,key,records_dict): self.records = records_dict self.key = key self.seq = Seq(key,records_dict) def format(self,out_format): if out_format == 'fasta': r = self.records[self.key] return ">%s\n%s" % (r['description'],r['sequence']) class FastaParser(object): def __init__(self,fasta_file): self.fasta_file = fasta_file fasta = open(fasta_file,'r').read() self.entries = [x for x in fasta.split('>') if len(x) != 0] self.build_records_dict() def keys(self): keys_list = [] for entry in self.entries: key = [x for x in entry.split('\n')[0].split() if len(x) != 0][0] keys_list.append(key) return [x.strip() for x in keys_list] def __len__(self): return len(self.keys()) def __iter__(self): for k in self.keys(): yield k def build_records_dict(self): records_dict = {} for entry in self.entries: key = [x for x in entry.split('\n')[0].split() if len(x) != 0][0] description = entry.split('\n')[0] sequence = '\n'.join(entry.split('\n')[1:]).strip() records_dict[key] = {'description':description,'sequence':sequence} self.records = records_dict def __getitem__(self,key): return SeqRecord(key,self.records)