Mercurial > repos > brinkmanlab > make_unique_id
annotate make_unique_id.py @ 1:061c3402a977 draft
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
| author | brinkmanlab | 
|---|---|
| date | Mon, 15 Jun 2020 18:48:27 -0400 | 
| parents | a3a09dd8d09a | 
| children | c8bda09480ae | 
| rev | line source | 
|---|---|
| 0 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 2 import sys | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 3 from Bio import SeqIO | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 4 from collections import defaultdict | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 5 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 6 usage = """ | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 7 make_unique_id | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 8 Makes all record ids unique across all input data. | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 9 All input data must be the same format. | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 10 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 11 Use: make_unique_id.py [-v] <format> <input1> <output1> [<input2> <output2> ... <inputn> <outputn>] | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 12 \t-v Print version and exit | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 13 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 14 Valid formats: clustal, embl, fasta, fasta-2line, fastq-sanger, fastq, fastq-solexa, fastq-illumina, genbank, gb, imgt, | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 15 nexus, phd, phylip, pir, seqxml, sff, stockholm, tab, qual | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 16 """ | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 17 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 18 if __name__ == '__main__': | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 19 if '-v' in sys.argv: | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 20 print('1.0') | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 21 exit(0) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 22 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 23 if len(sys.argv) < 4: | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 24 print("Missing arguments", file=sys.stderr) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 25 print(usage, file=sys.stderr) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 26 exit(1) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 27 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 28 format = sys.argv[1] | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 29 ids = defaultdict(int) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 30 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 31 def makeUnique(seq): | 
| 1 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 32 newid = seq.id[:28] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long | 
| 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 33 count = ids[newid] | 
| 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 34 ids[newid] += 1 | 
| 0 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 35 if count: | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 36 suffix = "_" + str(count) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 37 seqlenlen = len(str(len(seq))) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 38 if len(newid) + len(suffix) + 1 + seqlenlen > 28: | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 39 newid = newid[:27 - seqlenlen - len(suffix)] | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 40 | 
| 1 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 41 newid += suffix | 
| 0 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 42 seq.name += suffix | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 43 | 
| 1 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 44 if seq.id != newid: | 
| 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 45 print(f"{seq.id}\t{newid}") | 
| 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 46 | 
| 
061c3402a977
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
 brinkmanlab parents: 
0diff
changeset | 47 seq.id = newid | 
| 0 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 48 return seq | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 49 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 50 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 51 paths = iter(sys.argv[2:]) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 52 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 53 for input, output in zip(paths, paths): | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 54 SeqIO.write( | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 55 map(makeUnique, SeqIO.parse(input, format)), | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 56 output, | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 57 format | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 58 ) | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 59 | 
| 
a3a09dd8d09a
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
 brinkmanlab parents: diff
changeset | 60 | 
