comparison MSMS_Extractor.py @ 5:c2f8e3164537 draft

planemo upload
author pravs
date Thu, 03 Aug 2017 18:17:59 -0400
parents aa944e3a353c
children
comparison
equal deleted inserted replaced
4:59654a11787e 5:c2f8e3164537
44 # Read all scan numbers using indexedmzML/indexList/index/offset tags 44 # Read all scan numbers using indexedmzML/indexList/index/offset tags
45 for k in mzml.read(inputPath).iterfind('indexedmzML/indexList/index/offset'): 45 for k in mzml.read(inputPath).iterfind('indexedmzML/indexList/index/offset'):
46 if re.search("scan=(\d+)", k['idRef']): 46 if re.search("scan=(\d+)", k['idRef']):
47 a = re.search("scan=(\d+)", k['idRef']) 47 a = re.search("scan=(\d+)", k['idRef'])
48 allScanList.append(int(a.group(1))) 48 allScanList.append(int(a.group(1)))
49 allScanList = list(set(allScanList))
49 # End of Reading mzML file 50 # End of Reading mzML file
50 51
51 fraction_name = sys.argv[4] 52 fraction_name = sys.argv[4]
52 if scanDict.has_key(fraction_name): 53 if scanDict.has_key(fraction_name):
53 scansInList = scanDict[fraction_name] 54 scansInList = scanDict[fraction_name]
54 else: 55 else:
55 scansInList = [] 56 scansInList = []
56 scansNotInList = list(set(allScanList) - set(scansInList)) 57 scansNotInList = list(set(allScanList) - set(scansInList))
57 58 flag = 0
58 if removeORretain == "remove": 59 if removeORretain == "remove":
59 scan2retain = scansNotInList 60 scan2retain = scansNotInList
61 scan2retain = list(set(scan2retain))
60 scan2retain.sort() 62 scan2retain.sort()
61 scansRemoved = scansInList 63 scansRemoved = scansInList
62 # scan2retain contains scans that is to be retained 64 # scan2retain contains scans that is to be retained
63 65 elif removeORretain == "retain" and randomScans < len(scansNotInList):
64 elif removeORretain == "retain":
65 # Randomly select spectra 66 # Randomly select spectra
66 random_scans = list(map(lambda _: random.choice(scansNotInList), range(randomScans))) 67 random_scans = random.sample(scansNotInList, randomScans)
67 68
68 scan2retain = random_scans + scansInList 69 scan2retain = random_scans + scansInList
70 scan2retain = list(set(scan2retain))
69 scan2retain.sort() 71 scan2retain.sort()
70 scansRemoved = list(set(allScanList) - set(scan2retain)) 72 scansRemoved = list(set(allScanList) - set(scan2retain))
71 # scan2retain contains scans that is to be retained 73 # scan2retain contains scans that is to be retained
74 else:
75 flag = 1
72 76
73 # Print Stats 77 if flag == 1:
74 print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList))) 78 scan2retain = scansInList
75 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) 79 scan2retain = list(set(scan2retain))
76 print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved) 80 scan2retain.sort()
81 scansRemoved = list(set(allScanList) - set(scan2retain))
82
83 # scan2retain contains scans that is to be retained
84 print >> sys.stdout,"ERROR: Number of Random Scans queried is more than available. The result has provided zero random scans."
85 print >> sys.stdout,"Number of available scans for random selection: %d" % len(scansNotInList)
86 print >> sys.stdout,"Try a number less than the available number. Thanks!!"
87 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain)
88 else:
89 # Print Stats
90 print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList)))
91 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain)
92 print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved)
77 93
78 94
79 # Identifying groups of continuous numbers in the scan2retain and creating scanString 95 # Identifying groups of continuous numbers in the scan2retain and creating scanString
80 scanString = "" 96 scanString = ""
81 for a, b in groupby(enumerate(scan2retain), lambda(i,x):i-x): 97 for a, b in groupby(enumerate(scan2retain), lambda(i,x):i-x):
82 x = map(itemgetter(1), b) 98 x = map(itemgetter(1), b)
83 scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] " 99 scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] "
84 # end identifying 100 # end identifying
85 101
86 # start create filter file 102 # start create filter file
87 filter_file = open("filter.txt", "w") 103 filter_file = open("filter.txt", "w")
88 filter_file.write("filter=scanNumber %s\n" % scanString) 104 filter_file.write("filter=scanNumber %s\n" % scanString)
89 filter_file.close() 105 filter_file.close()
90 # end create filter file 106 # end create filter file
91 107
92 # Prepare command for msconvert 108 # Prepare command for msconvert
93 inputFile = fraction_name+".mzML" 109 inputFile = fraction_name+".mzML"
94 os.symlink(inputPath,inputFile) 110 os.symlink(inputPath,inputFile)
95 outFile = "filtered_"+fraction_name+".mzML" 111 outFile = "filtered_"+fraction_name+".mzML"
96 # msconvert_command = "msconvert " + inputFile + " --filter " + "\"scanNumber " + scanString + " \" " + " --outfile " + outFile + " --mzML --zlib" 112 # msconvert_command = "msconvert " + inputFile + " --filter " + "\"scanNumber " + scanString + " \" " + " --outfile " + outFile + " --mzML --zlib"
103 except subprocess.CalledProcessError as e: 119 except subprocess.CalledProcessError as e:
104 sys.stderr.write( "msconvert resulted in error: %s: %s" % ( e.returncode, e.output )) 120 sys.stderr.write( "msconvert resulted in error: %s: %s" % ( e.returncode, e.output ))
105 sys.exit(e.returncode) 121 sys.exit(e.returncode)
106 # Copy output to 122 # Copy output to
107 shutil.copyfile(outFile, outPath) 123 shutil.copyfile(outFile, outPath)
108
109 else: 124 else:
110 print "Please contact the admin. Number of inputs are not sufficient to run the program.\n" 125 print "Please contact the admin. Number of inputs are not sufficient to run the program.\n"
111 126
112 if __name__ == "__main__": 127 if __name__ == "__main__":
113 main() 128 main()