comparison rename_tracks.py @ 0:8b19a698d90e draft

planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
author yating-l
date Mon, 25 Jun 2018 15:01:41 -0400
parents
children 72ccb2f848a0
comparison
equal deleted inserted replaced
-1:000000000000 0:8b19a698d90e
1 # -*- coding: utf8 -*-
2
3 """
4 Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference
5 """
6 import sys
7 import csv
8 import subprocess
9 import tempfile
10
11 def rename_interval(inputFile, nameDict, renamedFile):
12 writer = open(renamedFile, 'w')
13 with open(inputFile, 'r') as f:
14 lines = f.readlines()
15 for l in lines:
16 if not l.startswith("#"):
17 scaffold_name = l.split()[0]
18 if scaffold_name in nameDict:
19 l = l.replace(scaffold_name, nameDict[scaffold_name])
20 writer.write(l)
21 writer.close()
22
23 def rename_bam(inputFile, nameDict, renamedFile):
24 header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE)
25 array_call = ['sed']
26 for k,v in nameDict.items():
27 substitute = "s/%s/%s/" % (str(k), str(v))
28 array_call.append('-e')
29 array_call.append(substitute)
30 reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE)
31 out = open(renamedFile, 'w')
32 subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out)
33
34 def rename_bigwig(inputFile, renamedReference, nameDict, renamedFile):
35 bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0)
36 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0)
37 sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0)
38 renamed_sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0)
39
40 subprocess.call(['bigWigToBedGraph', inputFile, bedGraphFile.name])
41 subprocess.call(['faSize', '-detailed', '-tab', renamedReference], stdout=chrom_sizes)
42 subprocess.call(['sort', '-k1,1', '-k2,2n', bedGraphFile.name], stdout=sorted_bedGraphFile)
43 rename_interval(sorted_bedGraphFile.name, nameDict, renamed_sorted_bedGraphFile.name)
44 subprocess.call(['bedGraphToBigWig', renamed_sorted_bedGraphFile.name, chrom_sizes.name, renamedFile])
45
46 def getNameDict(nameMapping):
47 nameDict = {}
48 with open(nameMapping, 'r') as f:
49 reader = csv.reader(f)
50 for row in reader:
51 nameDict[row[0]] = row[1]
52 return nameDict
53
54 def main():
55 inputFile = sys.argv[1]
56 nameMapping = sys.argv[2]
57 inputFormat = sys.argv[3]
58 outputfile = sys.argv[4]
59 nameDict = getNameDict(nameMapping)
60 if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf":
61 rename_interval(inputFile, nameDict, outputfile)
62 elif inputFormat == "bam":
63 rename_bam(inputFile, nameDict, outputfile)
64 elif inputFormat == "bigwig":
65 renamedReference = sys.argv[5]
66 rename_bigwig(inputFile, renamedReference, nameDict, outputfile)
67
68 if __name__ == "__main__":
69 main()