comparison make_unique_id.py @ 3:a2258ce2d58c draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 08b7b36e1eba516e4d7eb97086baf7ec8d177c5d"
author brinkmanlab
date Mon, 15 Jun 2020 19:32:32 -0400
parents c8bda09480ae
children f2656e644641
comparison
equal deleted inserted replaced
2:c8bda09480ae 3:a2258ce2d58c
27 27
28 format = sys.argv[1] 28 format = sys.argv[1]
29 ids = defaultdict(int) 29 ids = defaultdict(int)
30 30
31 def makeUnique(seq): 31 def makeUnique(seq):
32 newid = seq.id[:16] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long 32 seqlenlen = len(str(len(seq)))
33 newid = seq.id[:26 - seqlenlen] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long
33 count = ids[newid] 34 count = ids[newid]
34 ids[newid] += 1 35 ids[newid] += 1
35 if count: 36 if count:
36 suffix = "_" + str(count) 37 suffix = "_" + str(count)
37 seqlenlen = len(str(len(seq))) 38 if len(newid) + len(suffix) + 1 + seqlenlen > 26:
38 if len(newid) + len(suffix) + 1 + seqlenlen > 16: 39 newid = newid[:25 - seqlenlen - len(suffix)]
39 newid = newid[:16 - seqlenlen - len(suffix)]
40 40
41 newid += suffix 41 newid += suffix
42 seq.name += suffix 42 seq.name += suffix
43 43
44 if seq.id != newid: 44 if seq.id != newid:
45 print(f"{seq.id}\t{newid}") 45 print(f"{seq.id}\t{newid}")
46 46
47 seq.id = newid 47 seq.id = newid
48 return seq 48 return seq
49 49
50
50 paths = iter(sys.argv[2:]) 51 paths = iter(sys.argv[2:])
51 52
52 for input, output in zip(paths, paths): 53 for input, output in zip(paths, paths):
53 SeqIO.write( 54 SeqIO.write(
54 map(makeUnique, SeqIO.parse(input, format)), 55 map(makeUnique, SeqIO.parse(input, format)),
55 output, 56 output,
56 format 57 format
57 ) 58 )
58 59
60