# HG changeset patch # User pravs # Date 1501798679 14400 # Node ID c2f8e3164537271d58629fc0ff9d2b91b4144327 # Parent 59654a11787e777a8f37e9f893a2f48e7e4eb7d9 planemo upload diff -r 59654a11787e -r c2f8e3164537 MSMS_Extractor.py --- a/MSMS_Extractor.py Thu Aug 03 14:09:17 2017 -0400 +++ b/MSMS_Extractor.py Thu Aug 03 18:17:59 2017 -0400 @@ -46,6 +46,7 @@ if re.search("scan=(\d+)", k['idRef']): a = re.search("scan=(\d+)", k['idRef']) allScanList.append(int(a.group(1))) + allScanList = list(set(allScanList)) # End of Reading mzML file fraction_name = sys.argv[4] @@ -54,26 +55,41 @@ else: scansInList = [] scansNotInList = list(set(allScanList) - set(scansInList)) - + flag = 0 if removeORretain == "remove": scan2retain = scansNotInList + scan2retain = list(set(scan2retain)) scan2retain.sort() scansRemoved = scansInList # scan2retain contains scans that is to be retained - - elif removeORretain == "retain": + elif removeORretain == "retain" and randomScans < len(scansNotInList): # Randomly select spectra - random_scans = list(map(lambda _: random.choice(scansNotInList), range(randomScans))) + random_scans = random.sample(scansNotInList, randomScans) scan2retain = random_scans + scansInList + scan2retain = list(set(scan2retain)) scan2retain.sort() scansRemoved = list(set(allScanList) - set(scan2retain)) # scan2retain contains scans that is to be retained + else: + flag = 1 - # Print Stats - print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList))) - print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) - print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved) + if flag == 1: + scan2retain = scansInList + scan2retain = list(set(scan2retain)) + scan2retain.sort() + scansRemoved = list(set(allScanList) - set(scan2retain)) + + # scan2retain contains scans that is to be retained + print >> sys.stdout,"ERROR: Number of Random Scans queried is more than available. The result has provided zero random scans." + print >> sys.stdout,"Number of available scans for random selection: %d" % len(scansNotInList) + print >> sys.stdout,"Try a number less than the available number. Thanks!!" + print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) + else: + # Print Stats + print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList))) + print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain) + print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved) # Identifying groups of continuous numbers in the scan2retain and creating scanString @@ -82,13 +98,13 @@ x = map(itemgetter(1), b) scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] " # end identifying - + # start create filter file filter_file = open("filter.txt", "w") filter_file.write("filter=scanNumber %s\n" % scanString) filter_file.close() # end create filter file - + # Prepare command for msconvert inputFile = fraction_name+".mzML" os.symlink(inputPath,inputFile) @@ -105,7 +121,6 @@ sys.exit(e.returncode) # Copy output to shutil.copyfile(outFile, outPath) - else: print "Please contact the admin. Number of inputs are not sufficient to run the program.\n"