annotate fasta_to_tabular.py @ 2:091edad7622f draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 07:25:01 -0500
parents 9d189d08f2ad
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
2 # This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
3 """
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
5 Output: tabular
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
6 format convert: fasta to tabular
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
7 """
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
8
2
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
9 import sys
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
10
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
11
2
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
12 def stop_err(msg):
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
13 sys.exit(msg)
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
14
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
15
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
16 def __main__():
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
17 if len(sys.argv) != 5:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
18 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
19 infile = sys.argv[1]
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
20 outfile = sys.argv[2]
2
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
21 keep_first = int(sys.argv[3])
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
22 descr_split = int(sys.argv[4])
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
23 if keep_first == 0:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
24 keep_first = None
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
25 elif descr_split == 1:
2
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
26 # Added one for the ">" character
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
27 # (which is removed if using descr_split > 1)
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
28 keep_first += 1
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
29 if descr_split < 1:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
30 stop_err("Bad description split value (should be 1 or more)")
2
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
31 with open(outfile, 'w') as out, open(infile) as in_fh:
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
32 for i, line in enumerate(in_fh):
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
33 line = line.rstrip('\r\n')
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
34 if not line or line.startswith('#'):
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
35 continue
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
36 if line.startswith('>'):
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
37 # Don't want any existing tabs to trigger extra columns:
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
38 line = line.replace('\t', ' ')
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
39 if i > 0:
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
40 out.write('\n')
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
41 if descr_split == 1:
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
42 out.write(line[1:keep_first])
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
43 else:
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
44 words = line[1:].split(None, descr_split - 1)
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
45 # apply any truncation to first word (the id)
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
46 words[0] = words[0][0:keep_first]
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
47 # pad with empty columns if required
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
48 words += [""] * (descr_split - len(words))
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
49 out.write("\t".join(words))
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
50 out.write('\t')
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
51 else:
2
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
52 out.write(line)
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
53 if i > 0:
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
54 out.write('\n')
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
55
2
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
56
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
57 if __name__ == "__main__":
091edad7622f "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
58 __main__()