sharplabtool: tools/fasta_tools/fasta_to

annotate tools/fasta_tools/fasta_to_tabular.py @ 0:9071e359b9a3

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:37:19 -0500
parents
children

rev	line source
0 9071e359b9a3 Uploaded xuebing parents: diff changeset	1 #!/usr/bin/env python
9071e359b9a3 Uploaded xuebing parents: diff changeset	2 # This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
9071e359b9a3 Uploaded xuebing parents: diff changeset	3 """
9071e359b9a3 Uploaded xuebing parents: diff changeset	4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
9071e359b9a3 Uploaded xuebing parents: diff changeset	5 Output: tabular
9071e359b9a3 Uploaded xuebing parents: diff changeset	6 format convert: fasta to tabular
9071e359b9a3 Uploaded xuebing parents: diff changeset	7 """
9071e359b9a3 Uploaded xuebing parents: diff changeset	8
9071e359b9a3 Uploaded xuebing parents: diff changeset	9 import sys, os
9071e359b9a3 Uploaded xuebing parents: diff changeset	10
9071e359b9a3 Uploaded xuebing parents: diff changeset	11 def stop_err( msg ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	12 sys.stderr.write( msg )
9071e359b9a3 Uploaded xuebing parents: diff changeset	13 sys.exit()
9071e359b9a3 Uploaded xuebing parents: diff changeset	14
9071e359b9a3 Uploaded xuebing parents: diff changeset	15 def __main__():
9071e359b9a3 Uploaded xuebing parents: diff changeset	16 if len(sys.argv) != 5:
9071e359b9a3 Uploaded xuebing parents: diff changeset	17 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
9071e359b9a3 Uploaded xuebing parents: diff changeset	18 infile = sys.argv[1]
9071e359b9a3 Uploaded xuebing parents: diff changeset	19 outfile = sys.argv[2]
9071e359b9a3 Uploaded xuebing parents: diff changeset	20 keep_first = int( sys.argv[3] )
9071e359b9a3 Uploaded xuebing parents: diff changeset	21 descr_split = int( sys.argv[4] )
9071e359b9a3 Uploaded xuebing parents: diff changeset	22 fasta_title = fasta_seq = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	23 if keep_first == 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	24 keep_first = None
9071e359b9a3 Uploaded xuebing parents: diff changeset	25 elif descr_split == 1:
9071e359b9a3 Uploaded xuebing parents: diff changeset	26 #Added one for the ">" character
9071e359b9a3 Uploaded xuebing parents: diff changeset	27 #(which is removed if using descr_split > 1)
9071e359b9a3 Uploaded xuebing parents: diff changeset	28 keep_first += 1
9071e359b9a3 Uploaded xuebing parents: diff changeset	29 if descr_split < 1:
9071e359b9a3 Uploaded xuebing parents: diff changeset	30 stop_err("Bad description split value (should be 1 or more)")
9071e359b9a3 Uploaded xuebing parents: diff changeset	31 out = open( outfile, 'w' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	32 for i, line in enumerate( open( infile ) ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	33 line = line.rstrip( '\r\n' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	34 if not line or line.startswith( '#' ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	35 continue
9071e359b9a3 Uploaded xuebing parents: diff changeset	36 if line.startswith( '>' ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	37 #Don't want any existing tabs to trigger extra columns:
9071e359b9a3 Uploaded xuebing parents: diff changeset	38 line = line.replace('\t', ' ')
9071e359b9a3 Uploaded xuebing parents: diff changeset	39 if i > 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	40 out.write('\n')
9071e359b9a3 Uploaded xuebing parents: diff changeset	41 if descr_split == 1:
9071e359b9a3 Uploaded xuebing parents: diff changeset	42 out.write(line[1:keep_first])
9071e359b9a3 Uploaded xuebing parents: diff changeset	43 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	44 words = line[1:].split(None, descr_split-1)
9071e359b9a3 Uploaded xuebing parents: diff changeset	45 #apply any truncation to first word (the id)
9071e359b9a3 Uploaded xuebing parents: diff changeset	46 words[0] = words[0][0:keep_first]
9071e359b9a3 Uploaded xuebing parents: diff changeset	47 #pad with empty columns if required
9071e359b9a3 Uploaded xuebing parents: diff changeset	48 words += [""]*(descr_split-len(words))
9071e359b9a3 Uploaded xuebing parents: diff changeset	49 out.write("\t".join(words))
9071e359b9a3 Uploaded xuebing parents: diff changeset	50 out.write('\t')
9071e359b9a3 Uploaded xuebing parents: diff changeset	51 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	52 out.write(line)
9071e359b9a3 Uploaded xuebing parents: diff changeset	53 if i > 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	54 out.write('\n')
9071e359b9a3 Uploaded xuebing parents: diff changeset	55 out.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	56
9071e359b9a3 Uploaded xuebing parents: diff changeset	57 if __name__ == "__main__" : __main__()

Mercurial > repos > xuebing > sharplabtool

annotate tools/fasta_tools/fasta_to_tabular.py @ 0:9071e359b9a3