Mercurial > repos > pravs > msms_extractor
comparison MSMS_Extractor.py @ 5:c2f8e3164537 draft
planemo upload
author | pravs |
---|---|
date | Thu, 03 Aug 2017 18:17:59 -0400 |
parents | aa944e3a353c |
children |
comparison
equal
deleted
inserted
replaced
4:59654a11787e | 5:c2f8e3164537 |
---|---|
44 # Read all scan numbers using indexedmzML/indexList/index/offset tags | 44 # Read all scan numbers using indexedmzML/indexList/index/offset tags |
45 for k in mzml.read(inputPath).iterfind('indexedmzML/indexList/index/offset'): | 45 for k in mzml.read(inputPath).iterfind('indexedmzML/indexList/index/offset'): |
46 if re.search("scan=(\d+)", k['idRef']): | 46 if re.search("scan=(\d+)", k['idRef']): |
47 a = re.search("scan=(\d+)", k['idRef']) | 47 a = re.search("scan=(\d+)", k['idRef']) |
48 allScanList.append(int(a.group(1))) | 48 allScanList.append(int(a.group(1))) |
49 allScanList = list(set(allScanList)) | |
49 # End of Reading mzML file | 50 # End of Reading mzML file |
50 | 51 |
51 fraction_name = sys.argv[4] | 52 fraction_name = sys.argv[4] |
52 if scanDict.has_key(fraction_name): | 53 if scanDict.has_key(fraction_name): |
53 scansInList = scanDict[fraction_name] | 54 scansInList = scanDict[fraction_name] |
54 else: | 55 else: |
55 scansInList = [] | 56 scansInList = [] |
56 scansNotInList = list(set(allScanList) - set(scansInList)) | 57 scansNotInList = list(set(allScanList) - set(scansInList)) |
57 | 58 flag = 0 |
58 if removeORretain == "remove": | 59 if removeORretain == "remove": |
59 scan2retain = scansNotInList | 60 scan2retain = scansNotInList |
61 scan2retain = list(set(scan2retain)) | |
60 scan2retain.sort() | 62 scan2retain.sort() |
61 scansRemoved = scansInList | 63 scansRemoved = scansInList |
62 # scan2retain contains scans that is to be retained | 64 # scan2retain contains scans that is to be retained |
63 | 65 elif removeORretain == "retain" and randomScans < len(scansNotInList): |
64 elif removeORretain == "retain": | |
65 # Randomly select spectra | 66 # Randomly select spectra |
66 random_scans = list(map(lambda _: random.choice(scansNotInList), range(randomScans))) | 67 random_scans = random.sample(scansNotInList, randomScans) |
67 | 68 |
68 scan2retain = random_scans + scansInList | 69 scan2retain = random_scans + scansInList |
70 scan2retain = list(set(scan2retain)) | |
69 scan2retain.sort() | 71 scan2retain.sort() |
70 scansRemoved = list(set(allScanList) - set(scan2retain)) | 72 scansRemoved = list(set(allScanList) - set(scan2retain)) |
71 # scan2retain contains scans that is to be retained | 73 # scan2retain contains scans that is to be retained |
74 else: | |
75 flag = 1 | |
72 | 76 |
73 # Print Stats | 77 if flag == 1: |
74 print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList))) | 78 scan2retain = scansInList |
75 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) | 79 scan2retain = list(set(scan2retain)) |
76 print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved) | 80 scan2retain.sort() |
81 scansRemoved = list(set(allScanList) - set(scan2retain)) | |
82 | |
83 # scan2retain contains scans that is to be retained | |
84 print >> sys.stdout,"ERROR: Number of Random Scans queried is more than available. The result has provided zero random scans." | |
85 print >> sys.stdout,"Number of available scans for random selection: %d" % len(scansNotInList) | |
86 print >> sys.stdout,"Try a number less than the available number. Thanks!!" | |
87 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) | |
88 else: | |
89 # Print Stats | |
90 print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList))) | |
91 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) | |
92 print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved) | |
77 | 93 |
78 | 94 |
79 # Identifying groups of continuous numbers in the scan2retain and creating scanString | 95 # Identifying groups of continuous numbers in the scan2retain and creating scanString |
80 scanString = "" | 96 scanString = "" |
81 for a, b in groupby(enumerate(scan2retain), lambda(i,x):i-x): | 97 for a, b in groupby(enumerate(scan2retain), lambda(i,x):i-x): |
82 x = map(itemgetter(1), b) | 98 x = map(itemgetter(1), b) |
83 scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] " | 99 scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] " |
84 # end identifying | 100 # end identifying |
85 | 101 |
86 # start create filter file | 102 # start create filter file |
87 filter_file = open("filter.txt", "w") | 103 filter_file = open("filter.txt", "w") |
88 filter_file.write("filter=scanNumber %s\n" % scanString) | 104 filter_file.write("filter=scanNumber %s\n" % scanString) |
89 filter_file.close() | 105 filter_file.close() |
90 # end create filter file | 106 # end create filter file |
91 | 107 |
92 # Prepare command for msconvert | 108 # Prepare command for msconvert |
93 inputFile = fraction_name+".mzML" | 109 inputFile = fraction_name+".mzML" |
94 os.symlink(inputPath,inputFile) | 110 os.symlink(inputPath,inputFile) |
95 outFile = "filtered_"+fraction_name+".mzML" | 111 outFile = "filtered_"+fraction_name+".mzML" |
96 # msconvert_command = "msconvert " + inputFile + " --filter " + "\"scanNumber " + scanString + " \" " + " --outfile " + outFile + " --mzML --zlib" | 112 # msconvert_command = "msconvert " + inputFile + " --filter " + "\"scanNumber " + scanString + " \" " + " --outfile " + outFile + " --mzML --zlib" |
103 except subprocess.CalledProcessError as e: | 119 except subprocess.CalledProcessError as e: |
104 sys.stderr.write( "msconvert resulted in error: %s: %s" % ( e.returncode, e.output )) | 120 sys.stderr.write( "msconvert resulted in error: %s: %s" % ( e.returncode, e.output )) |
105 sys.exit(e.returncode) | 121 sys.exit(e.returncode) |
106 # Copy output to | 122 # Copy output to |
107 shutil.copyfile(outFile, outPath) | 123 shutil.copyfile(outFile, outPath) |
108 | |
109 else: | 124 else: |
110 print "Please contact the admin. Number of inputs are not sufficient to run the program.\n" | 125 print "Please contact the admin. Number of inputs are not sufficient to run the program.\n" |
111 | 126 |
112 if __name__ == "__main__": | 127 if __name__ == "__main__": |
113 main() | 128 main() |