diff MSMS_Extractor.py @ 5:c2f8e3164537 draft

planemo upload
author pravs
date Thu, 03 Aug 2017 18:17:59 -0400
parents aa944e3a353c
children
line wrap: on
line diff
--- a/MSMS_Extractor.py	Thu Aug 03 14:09:17 2017 -0400
+++ b/MSMS_Extractor.py	Thu Aug 03 18:17:59 2017 -0400
@@ -46,6 +46,7 @@
             if re.search("scan=(\d+)", k['idRef']):
                 a = re.search("scan=(\d+)", k['idRef'])
                 allScanList.append(int(a.group(1)))
+        allScanList = list(set(allScanList))
         # End of Reading mzML file
         
         fraction_name = sys.argv[4]
@@ -54,26 +55,41 @@
         else:
             scansInList = []
         scansNotInList = list(set(allScanList) - set(scansInList))
-        
+        flag = 0
         if removeORretain == "remove":
             scan2retain = scansNotInList
+            scan2retain = list(set(scan2retain))
             scan2retain.sort()
             scansRemoved = scansInList
             # scan2retain contains scans that is to be retained
-            
-        elif removeORretain == "retain":
+        elif removeORretain == "retain" and randomScans < len(scansNotInList):
             # Randomly select spectra
-            random_scans = list(map(lambda _: random.choice(scansNotInList), range(randomScans)))
+            random_scans = random.sample(scansNotInList, randomScans)
             
             scan2retain = random_scans + scansInList
+            scan2retain = list(set(scan2retain))
             scan2retain.sort()
             scansRemoved = list(set(allScanList) - set(scan2retain))
             # scan2retain contains scans that is to be retained
+        else:
+            flag = 1
             
-        # Print Stats
-        print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList)))
-        print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain)
-        print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved)
+        if flag == 1:
+            scan2retain = scansInList
+            scan2retain = list(set(scan2retain))
+            scan2retain.sort()
+            scansRemoved = list(set(allScanList) - set(scan2retain))
+            
+            # scan2retain contains scans that is to be retained
+            print >> sys.stdout,"ERROR: Number of Random Scans queried is more than available. The result has provided zero random scans."
+            print >> sys.stdout,"Number of available scans for random selection: %d" % len(scansNotInList)
+            print >> sys.stdout,"Try a number less than the available number. Thanks!!"
+            print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain)
+        else:
+            # Print Stats
+            print >> sys.stdout,"Total number of Scan Numbers: %d" % len(list(set(allScanList)))
+            print >> sys.stdout,"Number of Scans retained: %d" % len(scan2retain)
+            print >> sys.stdout,"Number of Scans removed: %d" % len(scansRemoved)
         
         
         # Identifying groups of continuous numbers in the scan2retain and creating scanString
@@ -82,13 +98,13 @@
             x = map(itemgetter(1), b)
             scanString = scanString + "["+str(x[0])+","+str(x[-1])+"] "
         # end identifying
-        
+    
         # start create filter file
         filter_file = open("filter.txt", "w")
         filter_file.write("filter=scanNumber %s\n" % scanString)
         filter_file.close()
         # end create filter file 
-        
+    
         # Prepare command for msconvert
         inputFile = fraction_name+".mzML"
         os.symlink(inputPath,inputFile)
@@ -105,7 +121,6 @@
             sys.exit(e.returncode)
         # Copy output to 
         shutil.copyfile(outFile, outPath)
-        
     else:
         print "Please contact the admin. Number of inputs are not sufficient to run the program.\n"