comparison make_unique_id.py @ 2:c8bda09480ae draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 2e161ea2e4ddf5692f32ee389de42dc7c4fd5fa3"
author brinkmanlab
date Mon, 15 Jun 2020 19:04:53 -0400
parents 061c3402a977
children a2258ce2d58c
comparison
equal deleted inserted replaced
1:061c3402a977 2:c8bda09480ae
27 27
28 format = sys.argv[1] 28 format = sys.argv[1]
29 ids = defaultdict(int) 29 ids = defaultdict(int)
30 30
31 def makeUnique(seq): 31 def makeUnique(seq):
32 newid = seq.id[:28] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long 32 newid = seq.id[:16] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long
33 count = ids[newid] 33 count = ids[newid]
34 ids[newid] += 1 34 ids[newid] += 1
35 if count: 35 if count:
36 suffix = "_" + str(count) 36 suffix = "_" + str(count)
37 seqlenlen = len(str(len(seq))) 37 seqlenlen = len(str(len(seq)))
38 if len(newid) + len(suffix) + 1 + seqlenlen > 28: 38 if len(newid) + len(suffix) + 1 + seqlenlen > 16:
39 newid = newid[:27 - seqlenlen - len(suffix)] 39 newid = newid[:16 - seqlenlen - len(suffix)]
40 40
41 newid += suffix 41 newid += suffix
42 seq.name += suffix 42 seq.name += suffix
43 43
44 if seq.id != newid: 44 if seq.id != newid:
45 print(f"{seq.id}\t{newid}") 45 print(f"{seq.id}\t{newid}")
46 46
47 seq.id = newid 47 seq.id = newid
48 return seq 48 return seq
49 49
50
51 paths = iter(sys.argv[2:]) 50 paths = iter(sys.argv[2:])
52 51
53 for input, output in zip(paths, paths): 52 for input, output in zip(paths, paths):
54 SeqIO.write( 53 SeqIO.write(
55 map(makeUnique, SeqIO.parse(input, format)), 54 map(makeUnique, SeqIO.parse(input, format)),
56 output, 55 output,
57 format 56 format
58 ) 57 )
59 58
60