annotate uniqprimer-0.5.0/primertools/fastaparser.py @ 6:8218425fda1d draft default tip

Uploaded
author dereeper
date Tue, 04 Jan 2022 16:05:55 +0000
parents 3249d78ecfc2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
1 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
2 Created on Jan 1, 2011
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
3
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
4 @author: John L. Herndon
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
5 @contact: herndon@cs.colostate.edu
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
6 @organization: Colorado State University
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
8 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
9
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
10
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
11 import utils
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
12 import primersequence
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
13
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
14 from Bio import SeqIO
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
15 from Bio import Seq
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
16 from Bio import Alphabet
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
17
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
18 def parseFastaFileAsPrimerSequence( fileName ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
19
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
20 utils.logMessage("fastaparser::parseFastaFileAsPrimerSequence( )", "parsing fasta file {0}".format( fileName ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
21 returnValue = { }
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
22
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
23 sequences = SeqIO.parse( open( fileName ), "fasta" )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
24
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
25 for sequence in sequences:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
26 seqdata = primersequence.PrimerSequence( sequence.id, len( sequence ), sequence.seq )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
27 returnValue[ sequence.id ] = seqdata
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
28
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
29 utils.logMessage("fastaparser::parseFastaFileAsPrimerSequence( )", "read {0} sequences".format( len( returnValue.keys( ) ) ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
30
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
31 return returnValue
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
32
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
33 def parseFastaFile( fileName ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
34 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
35 parse a fasta file and return a list of Bio.Seq
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
36 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
37 utils.logMessage("fastaparser::parseFastaFile( )", "parsing fasta file {0}".format( fileName ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
38
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
39 sequences = SeqIO.parse( open( fileName ), "fasta" )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
40
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
41 return sequences
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
42
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
43 def writeFastaFile( sequences, fileName ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
44 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
45 write a set of sequences to a fasta file.
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
46 returns the name of the new file
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
47 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
48
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
49 primerSequenceIdent = "primer_sequences"
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
50 utils.logMessage( "PrimerManager::writeFastaFile( )", "Writing {0} sequences to fasta file".format( len( sequences ) ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
51 seqRecords = [ ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
52 i = 0
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
53 for sequence in sequences:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
54 seqStr = str( reduce( lambda x, y: str( x )+str( y ), sequence) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
55 seqRecord = SeqIO.SeqRecord( Seq.Seq( seqStr, Alphabet.IUPAC.extended_dna ), id="seq_{0}".format( i ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
56 seqRecords.append( seqRecord )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
57 i += 1
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
58
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
59 SeqIO.write( seqRecords, open( fileName, "w" ), "fasta" )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
60
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
61 utils.logMessage( "PrimerManager::writeFastaFile( )", "writing fasta file complete" )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
62 return fileName
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
63