0
|
1 #!/usr/bin/env python
|
|
2 # This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
|
|
3 """
|
|
4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
|
|
5 Output: tabular
|
|
6 format convert: fasta to tabular
|
|
7 """
|
|
8
|
|
9 import sys, os
|
|
10
|
|
11 def stop_err( msg ):
|
|
12 sys.stderr.write( msg )
|
|
13 sys.exit()
|
|
14
|
|
15 def __main__():
|
|
16 if len(sys.argv) != 5:
|
|
17 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
|
|
18 infile = sys.argv[1]
|
|
19 outfile = sys.argv[2]
|
|
20 keep_first = int( sys.argv[3] )
|
|
21 descr_split = int( sys.argv[4] )
|
|
22 fasta_title = fasta_seq = ''
|
|
23 if keep_first == 0:
|
|
24 keep_first = None
|
|
25 elif descr_split == 1:
|
|
26 #Added one for the ">" character
|
|
27 #(which is removed if using descr_split > 1)
|
|
28 keep_first += 1
|
|
29 if descr_split < 1:
|
|
30 stop_err("Bad description split value (should be 1 or more)")
|
|
31 out = open( outfile, 'w' )
|
|
32 for i, line in enumerate( open( infile ) ):
|
|
33 line = line.rstrip( '\r\n' )
|
|
34 if not line or line.startswith( '#' ):
|
|
35 continue
|
|
36 if line.startswith( '>' ):
|
|
37 #Don't want any existing tabs to trigger extra columns:
|
|
38 line = line.replace('\t', ' ')
|
|
39 if i > 0:
|
|
40 out.write('\n')
|
|
41 if descr_split == 1:
|
|
42 out.write(line[1:keep_first])
|
|
43 else:
|
|
44 words = line[1:].split(None, descr_split-1)
|
|
45 #apply any truncation to first word (the id)
|
|
46 words[0] = words[0][0:keep_first]
|
|
47 #pad with empty columns if required
|
|
48 words += [""]*(descr_split-len(words))
|
|
49 out.write("\t".join(words))
|
|
50 out.write('\t')
|
|
51 else:
|
|
52 out.write(line)
|
|
53 if i > 0:
|
|
54 out.write('\n')
|
|
55 out.close()
|
|
56
|
|
57 if __name__ == "__main__" : __main__()
|