comparison fasta_to_tabular.py @ 2:091edad7622f draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 07:25:01 -0500
parents 9d189d08f2ad
children
comparison
equal deleted inserted replaced
1:7e801ab2b70e 2:091edad7622f
4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description) 4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
5 Output: tabular 5 Output: tabular
6 format convert: fasta to tabular 6 format convert: fasta to tabular
7 """ 7 """
8 8
9 import sys, os 9 import sys
10 10
11 def stop_err( msg ): 11
12 sys.stderr.write( msg ) 12 def stop_err(msg):
13 sys.exit() 13 sys.exit(msg)
14
14 15
15 def __main__(): 16 def __main__():
16 if len(sys.argv) != 5: 17 if len(sys.argv) != 5:
17 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)") 18 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
18 infile = sys.argv[1] 19 infile = sys.argv[1]
19 outfile = sys.argv[2] 20 outfile = sys.argv[2]
20 keep_first = int( sys.argv[3] ) 21 keep_first = int(sys.argv[3])
21 descr_split = int( sys.argv[4] ) 22 descr_split = int(sys.argv[4])
22 fasta_title = fasta_seq = ''
23 if keep_first == 0: 23 if keep_first == 0:
24 keep_first = None 24 keep_first = None
25 elif descr_split == 1: 25 elif descr_split == 1:
26 #Added one for the ">" character 26 # Added one for the ">" character
27 #(which is removed if using descr_split > 1) 27 # (which is removed if using descr_split > 1)
28 keep_first += 1 28 keep_first += 1
29 if descr_split < 1: 29 if descr_split < 1:
30 stop_err("Bad description split value (should be 1 or more)") 30 stop_err("Bad description split value (should be 1 or more)")
31 out = open( outfile, 'w' ) 31 with open(outfile, 'w') as out, open(infile) as in_fh:
32 for i, line in enumerate( open( infile ) ): 32 for i, line in enumerate(in_fh):
33 line = line.rstrip( '\r\n' ) 33 line = line.rstrip('\r\n')
34 if not line or line.startswith( '#' ): 34 if not line or line.startswith('#'):
35 continue 35 continue
36 if line.startswith( '>' ): 36 if line.startswith('>'):
37 #Don't want any existing tabs to trigger extra columns: 37 # Don't want any existing tabs to trigger extra columns:
38 line = line.replace('\t', ' ') 38 line = line.replace('\t', ' ')
39 if i > 0: 39 if i > 0:
40 out.write('\n') 40 out.write('\n')
41 if descr_split == 1: 41 if descr_split == 1:
42 out.write(line[1:keep_first]) 42 out.write(line[1:keep_first])
43 else:
44 words = line[1:].split(None, descr_split - 1)
45 # apply any truncation to first word (the id)
46 words[0] = words[0][0:keep_first]
47 # pad with empty columns if required
48 words += [""] * (descr_split - len(words))
49 out.write("\t".join(words))
50 out.write('\t')
43 else: 51 else:
44 words = line[1:].split(None, descr_split-1) 52 out.write(line)
45 #apply any truncation to first word (the id) 53 if i > 0:
46 words[0] = words[0][0:keep_first] 54 out.write('\n')
47 #pad with empty columns if required
48 words += [""]*(descr_split-len(words))
49 out.write("\t".join(words))
50 out.write('\t')
51 else:
52 out.write(line)
53 if i > 0:
54 out.write('\n')
55 out.close()
56 55
57 if __name__ == "__main__" : __main__() 56
57 if __name__ == "__main__":
58 __main__()