annotate validator.py @ 0:01ed8e112f2a draft

planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
author yating-l
date Wed, 12 Apr 2017 17:44:58 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
1 import sys
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
2
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
3 """
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
4 Call checkAndFixBed, check the integrity of bed file. If the strand is not "+" or "-" truncate that line and report to users
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
5 create a column and move the score column to that column.
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
6 """
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
7 def checkAndFixBed(bedfile, revised_file):
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
8 # Store the lines that have been removed
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
9 removedLines = []
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
10 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
11 with open(revised_file, 'w') as tmp:
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
12 with open(bedfile, 'r') as f:
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
13 lines = f.readlines()
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
14 i = 1
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
15 for line in lines:
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
16 fields = line.split()
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
17 strand = fields[5]
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
18 score = fields[4]
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
19 fields[4] = '1000'
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
20 fields.append(score)
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
21 if (strand == '+' or strand == '-'):
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
22 tmp.write('\t'.join(map(str, fields)))
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
23 tmp.write("\n")
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
24 else:
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
25 removedLines.append("line" + str(i) + ": " + line)
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
26 i = i+1
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
27
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
28 return removedLines
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
29
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
30 def main():
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
31 inputfile = str(sys.argv[1])
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
32 outputfile = str(sys.argv[2])
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
33 removed = checkAndFixBed(inputfile, outputfile)
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
34 if (removed != []):
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
35 print "\nRemoved invalid lines: \n"
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
36 print "\n".join(removed)
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
37
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
38 if __name__ == "__main__":
01ed8e112f2a planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff changeset
39 main()