Mercurial > repos > yating-l > rename_tracks
annotate rename_tracks.py @ 0:8b19a698d90e draft
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
| author | yating-l | 
|---|---|
| date | Mon, 25 Jun 2018 15:01:41 -0400 | 
| parents | |
| children | 72ccb2f848a0 | 
| rev | line source | 
|---|---|
| 
0
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
1 # -*- coding: utf8 -*- | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
2 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
3 """ | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
4 Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
5 """ | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
6 import sys | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
7 import csv | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
8 import subprocess | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
9 import tempfile | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
10 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
11 def rename_interval(inputFile, nameDict, renamedFile): | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
12 writer = open(renamedFile, 'w') | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
13 with open(inputFile, 'r') as f: | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
14 lines = f.readlines() | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
15 for l in lines: | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
16 if not l.startswith("#"): | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
17 scaffold_name = l.split()[0] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
18 if scaffold_name in nameDict: | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
19 l = l.replace(scaffold_name, nameDict[scaffold_name]) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
20 writer.write(l) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
21 writer.close() | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
22 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
23 def rename_bam(inputFile, nameDict, renamedFile): | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
24 header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
25 array_call = ['sed'] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
26 for k,v in nameDict.items(): | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
27 substitute = "s/%s/%s/" % (str(k), str(v)) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
28 array_call.append('-e') | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
29 array_call.append(substitute) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
30 reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
31 out = open(renamedFile, 'w') | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
32 subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
33 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
34 def rename_bigwig(inputFile, renamedReference, nameDict, renamedFile): | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
35 bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
36 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
37 sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
38 renamed_sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
39 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
40 subprocess.call(['bigWigToBedGraph', inputFile, bedGraphFile.name]) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
41 subprocess.call(['faSize', '-detailed', '-tab', renamedReference], stdout=chrom_sizes) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
42 subprocess.call(['sort', '-k1,1', '-k2,2n', bedGraphFile.name], stdout=sorted_bedGraphFile) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
43 rename_interval(sorted_bedGraphFile.name, nameDict, renamed_sorted_bedGraphFile.name) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
44 subprocess.call(['bedGraphToBigWig', renamed_sorted_bedGraphFile.name, chrom_sizes.name, renamedFile]) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
45 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
46 def getNameDict(nameMapping): | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
47 nameDict = {} | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
48 with open(nameMapping, 'r') as f: | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
49 reader = csv.reader(f) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
50 for row in reader: | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
51 nameDict[row[0]] = row[1] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
52 return nameDict | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
53 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
54 def main(): | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
55 inputFile = sys.argv[1] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
56 nameMapping = sys.argv[2] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
57 inputFormat = sys.argv[3] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
58 outputfile = sys.argv[4] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
59 nameDict = getNameDict(nameMapping) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
60 if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf": | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
61 rename_interval(inputFile, nameDict, outputfile) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
62 elif inputFormat == "bam": | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
63 rename_bam(inputFile, nameDict, outputfile) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
64 elif inputFormat == "bigwig": | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
65 renamedReference = sys.argv[5] | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
66 rename_bigwig(inputFile, renamedReference, nameDict, outputfile) | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
67 | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
68 if __name__ == "__main__": | 
| 
 
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
 
yating-l 
parents:  
diff
changeset
 | 
69 main() | 
