Mercurial > repos > pravs > msms_extractor
comparison MSMS_Extractor.py @ 5:c2f8e3164537 draft
planemo upload
| author | pravs |
|---|---|
| date | Thu, 03 Aug 2017 18:17:59 -0400 |
| parents | aa944e3a353c |
| children |
comparison
equal
deleted
inserted
replaced
| 4:59654a11787e | 5:c2f8e3164537 |
|---|---|
| 44 # Read all scan numbers using indexedmzML/indexList/index/offset tags | 44 # Read all scan numbers using indexedmzML/indexList/index/offset tags |
| 45 for k in mzml.read(inputPath).iterfind('indexedmzML/indexList/index/offset'): | 45 for k in mzml.read(inputPath).iterfind('indexedmzML/indexList/index/offset'): |
| 46 if re.search("scan=(\d+)", k['idRef']): | 46 if re.search("scan=(\d+)", k['idRef']): |
| 47 a = re.search("scan=(\d+)", k['idRef']) | 47 a = re.search("scan=(\d+)", k['idRef']) |
| 48 allScanList.append(int(a.group(1))) | 48 allScanList.append(int(a.group(1))) |
| 49 allScanList = list(set(allScanList)) | |
| 49 # End of Reading mzML file | 50 # End of Reading mzML file |
| 50 | 51 |
| 51 fraction_name = sys.argv[4] | 52 fraction_name = sys.argv[4] |
| 52 if scanDict.has_key(fraction_name): | 53 if scanDict.has_key(fraction_name): |
| 53 scansInList = scanDict[fraction_name] | 54 scansInList = scanDict[fraction_name] |
| 54 else: | 55 else: |
| 55 scansInList = [] | 56 scansInList = [] |
| 56 scansNotInList = list(set(allScanList) - set(scansInList)) | 57 scansNotInList = list(set(allScanList) - set(scansInList)) |
| 57 | 58 flag = 0 |
| 58 if removeORretain == "remove": | 59 if removeORretain == "remove": |
| 59 scan2retain = scansNotInList | 60 scan2retain = scansNotInList |
| 61 scan2retain = list(set(scan2retain)) | |
| 60 scan2retain.sort() | 62 scan2retain.sort() |
| 61 scansRemoved = scansInList | 63 scansRemoved = scansInList |
| 62 # scan2retain contains scans that is to be retained | 64 # scan2retain contains scans that is to be retained |
| 63 | 65 elif removeORretain == "retain" and randomScans < len(scansNotInList): |
| 64 elif removeORretain == "retain": | |
| 65 # Randomly select spectra | 66 # Randomly select spectra |
| 66 random_scans = list(map(lambda _: random.choice(scansNotInList), range(randomScans))) | 67 random_scans = random.sample(scansNotInList, randomScans) |
| 67 | 68 |
| 68 scan2retain = random_scans + scansInList | 69 scan2retain = random_scans + scansInList |
| 70 scan2retain = list(set(scan2retain)) | |
| 69 scan2retain.sort() | 71 scan2retain.sort() |
| 70 scansRemoved = list(set(allScanList) - set(scan2retain)) | 72 scansRemoved = list(set(allScanList) - set(scan2retain)) |
| 71 # scan2retain contains scans that is to be retained | 73 # scan2retain contains scans that is to be retained |
| 74 else: | |
| 75 flag = 1 | |
| 72 | 76 |
| 73 # Print Stats | 77 if flag == 1: |
| 74 print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList))) | 78 scan2retain = scansInList |
| 75 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) | 79 scan2retain = list(set(scan2retain)) |
| 76 print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved) | 80 scan2retain.sort() |
| 81 scansRemoved = list(set(allScanList) - set(scan2retain)) | |
| 82 | |
| 83 # scan2retain contains scans that is to be retained | |
| 84 print >> sys.stdout,"ERROR: Number of Random Scans queried is more than available. The result has provided zero random scans." | |
| 85 print >> sys.stdout,"Number of available scans for random selection: %d" % len(scansNotInList) | |
| 86 print >> sys.stdout,"Try a number less than the available number. Thanks!!" | |
| 87 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) | |
| 88 else: | |
| 89 # Print Stats | |
| 90 print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList))) | |
| 91 print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) | |
| 92 print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved) | |
| 77 | 93 |
| 78 | 94 |
| 79 # Identifying groups of continuous numbers in the scan2retain and creating scanString | 95 # Identifying groups of continuous numbers in the scan2retain and creating scanString |
| 80 scanString = "" | 96 scanString = "" |
| 81 for a, b in groupby(enumerate(scan2retain), lambda(i,x):i-x): | 97 for a, b in groupby(enumerate(scan2retain), lambda(i,x):i-x): |
| 82 x = map(itemgetter(1), b) | 98 x = map(itemgetter(1), b) |
| 83 scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] " | 99 scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] " |
| 84 # end identifying | 100 # end identifying |
| 85 | 101 |
| 86 # start create filter file | 102 # start create filter file |
| 87 filter_file = open("filter.txt", "w") | 103 filter_file = open("filter.txt", "w") |
| 88 filter_file.write("filter=scanNumber %s\n" % scanString) | 104 filter_file.write("filter=scanNumber %s\n" % scanString) |
| 89 filter_file.close() | 105 filter_file.close() |
| 90 # end create filter file | 106 # end create filter file |
| 91 | 107 |
| 92 # Prepare command for msconvert | 108 # Prepare command for msconvert |
| 93 inputFile = fraction_name+".mzML" | 109 inputFile = fraction_name+".mzML" |
| 94 os.symlink(inputPath,inputFile) | 110 os.symlink(inputPath,inputFile) |
| 95 outFile = "filtered_"+fraction_name+".mzML" | 111 outFile = "filtered_"+fraction_name+".mzML" |
| 96 # msconvert_command = "msconvert " + inputFile + " --filter " + "\"scanNumber " + scanString + " \" " + " --outfile " + outFile + " --mzML --zlib" | 112 # msconvert_command = "msconvert " + inputFile + " --filter " + "\"scanNumber " + scanString + " \" " + " --outfile " + outFile + " --mzML --zlib" |
| 103 except subprocess.CalledProcessError as e: | 119 except subprocess.CalledProcessError as e: |
| 104 sys.stderr.write( "msconvert resulted in error: %s: %s" % ( e.returncode, e.output )) | 120 sys.stderr.write( "msconvert resulted in error: %s: %s" % ( e.returncode, e.output )) |
| 105 sys.exit(e.returncode) | 121 sys.exit(e.returncode) |
| 106 # Copy output to | 122 # Copy output to |
| 107 shutil.copyfile(outFile, outPath) | 123 shutil.copyfile(outFile, outPath) |
| 108 | |
| 109 else: | 124 else: |
| 110 print "Please contact the admin. Number of inputs are not sufficient to run the program.\n" | 125 print "Please contact the admin. Number of inputs are not sufficient to run the program.\n" |
| 111 | 126 |
| 112 if __name__ == "__main__": | 127 if __name__ == "__main__": |
| 113 main() | 128 main() |
