changeset 0:4d2a2268a17a draft

"planemo upload"
author mzhuang
date Thu, 23 Sep 2021 21:00:48 +0000
parents
children 0545899061d3
files chess_preprocessor.xml pp_dexela.py preprocess_dexela_h5_filesystem.py preprocess_dexela_h5_galaxy.py
diffstat 4 files changed, 579 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chess_preprocessor.xml	Thu Sep 23 21:00:48 2021 +0000
@@ -0,0 +1,135 @@
+<tool id="chess_preprocessor" name="CHESS Pre-processing Tool" version="0.1.0">
+  <description>for pre-processing raw data</description>
+  <requirements>
+      <requirement type="package" version="0.8.4">hexrd</requirement>
+  </requirements>
+  <command><![CDATA[
+    #import os.path
+
+    ##source ~/.bashrc;conda activate pulsar;
+    ##source ~/.bashrc; conda activate __hexrd@0.8.0; 
+    mkdir -p output;
+    cd output;
+    #set $history_id=$__app__.security.encode_id($output.creating_job.history.id)
+    echo '$history_id' >> '$output';
+
+    #if $input_source_conditional.input_source == "input_source_history"
+        #set $input_type = $input_source_conditional.input_type_conditional.input_type
+        #if $input_type == "single"
+            #set $input_data = $input_source_conditional.input_type_conditional.fastq_input1
+        #elif $input_type == "list_collection"
+            #set $input_data = $input_source_conditional.input_type_conditional.fastq_input
+        #end if#
+
+        #set $var = ""
+        #for $f in $input_data# 
+           #set $var += "${f}\n" 
+        #end for#
+        #set files = '\\n'.join([str($file) for $file in $input_data])
+        ##for $f in $input_data# 
+            #if $output_as_list == "Yes"
+                ##printf "$files"   >> '$output_list_log' 2>&1;
+                printf "$files" | xargs -n 1 -P \${GALAXY_SLOTS} -I{} python  '$__tool_directory__/preprocess_dexela_h5_galaxy.py' {}  -o '$ome_start' -s '$start_frame' -n '$num_frames' -e '$ome_end' -t '$threshold'   >> '$output_list_log' 2>&1;
+            #elif $output_as_list == "No"
+                printf "$files" | xargs -n 1 -P \${GALAXY_SLOTS} -I{} python  '$__tool_directory__/preprocess_dexela_h5_galaxy.py' {}  -o '$ome_start' -s '$start_frame' -n '$num_frames' -e '$ome_end' -t '$threshold'   >> '$output' 2>&1;
+            #end if#
+        ##end for#
+    #elif $input_source_conditional.input_source == "input_source_linux"
+        #set $scans = [i for a in [(int(j[0]),int(j[-1])+1) for j in [x.split(':') for x in $input_source_conditional.scan_numbers.split(',')]] for i in range(a[0],a[1])]
+        ##set $scanlist = '\n'.join($scans)
+
+        #for $n in $scans#
+            #if $output_as_list == "Yes"
+                echo '$input_source_conditional.base_dir' >> '$output_list_log';
+		python  '$__tool_directory__/preprocess_dexela_h5_filesystem.py' '$input_source_conditional.base_dir' '$input_source_conditional.expt_name' '$input_source_conditional.samp_name' '$n' -o '$ome_start' -s '$start_frame' -n '$num_frames' -e '$ome_end' -t '$threshold'   >> '$output_list_log' 2>&1;
+            #elif $output_as_list == "No"
+                echo '$input_source_conditional.base_dir' >> '$output';
+		python  '__tool_directory__/preprocess_dexela_h5_filesystem.py' '$input_source_conditional.base_dir' '$input_source_conditional.expt_name' '$input_source_conditional.samp_name' '$n' -o '$ome_start' -s '$start_frame' -n '$num_frames' -e '$ome_end' -t '$threshold'   >> '$output' 2>&1;
+            #end if#
+        #end for#
+    #end if#
+    conda deactivate    
+  ]]></command>
+  <inputs>
+    <!--
+    <param name="infile_" type="data" multiple="true" label="Any file"/>
+    -->
+    <conditional name="input_source_conditional">
+        <param name="input_source" type="select" label="Location of raw data">
+            <option value="input_source_history" selected="true">Choose from History panel</option>
+            <option value="input_source_linux">Specify a directory in CHESS file system </option>
+        </param>
+        <when value="input_source_history">
+            <conditional name="input_type_conditional">
+            <param name="input_type" type="select" label="Input raw data as">
+                <option value="list_collection" selected="true">List of Datasets</option>
+                <option value="single">Dataset(s)</option>
+            </param>
+            <when value="single">
+                <param name="fastq_input1" type="data" multiple="true" label="Select (a) raw data file(s)" help="Specify (a) raw data file(s) for pre-processing"/>
+            </when>
+            <when value="list_collection">
+                <param name="fastq_input" type="data_collection" collection_type="list" label="Select a list of raw data files" help="Specify a list of raw data files for pre-processing one by one"/>
+            </when>
+            </conditional>
+        </when>
+        <when value="input_source_linux">
+             <param name="base_dir" type="text" label="Please specify the directory that contains raw data">
+                 <sanitizer>
+                     <valid initial="string.ascii_letters,string.digits">
+                         <add value="#,/,*,@,.,_,-" />
+                         <add value="_" />
+                     </valid>
+                 </sanitizer>
+             </param>
+             <param name="expt_name" type="text" label="Please specify the experiment name"/>
+             <param name="samp_name" type="text" label="Please specify the sample name"/>
+             <param name="scan_numbers" type="text" label="Please specify a scan number or scan numbers separated by comma (for example, 5,7,8,9,20,21,22,23,30 or 5,7:9,20:23,30)"/>
+        </when>
+    </conditional>
+      <!--
+    <param name="which_cluster" type="select" label="Send your job to below cluster">
+      <option value="SDSC" selected="true">SDSC</option>
+      <option value="Cornell" selected="true">Cornell CHESS</option>
+    </param>
+      -->
+    <!--
+    <param name="output_dir" type="text" hidden="true"/>
+    -->
+    <param name="num_frames" type="integer" value="999"  label="Number of frames to read"/>
+    <param name="start_frame" type="integer" value="0"  label="Index of first data frame"/>
+    <param name="threshold" type="integer" value="50"  label="Threshold for frame caches"/>
+    <param name="ome_start" type="float" value="-50.0"  label="Start omega"/>
+    <param name="ome_end" type="float" value="49.9"  label="End omega"/>
+    <param name="output_as_list" type="select" label="Would you like the tool to output result(s) as a list?">
+      <option value="Yes" selected="true">Yes. Let the results be contained in a list.</option>
+      <option value="No" >No. Show them individually.</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="txt" name="output_list_log" label="${tool.name} on ${on_string}">
+        <!--filter>input_type_conditional['input_type'] == 'single'</filter-->
+        <filter>output_as_list == 'Yes'</filter>
+    </data>
+    <!--
+    <collection name="output_list" type="list" label="${tool.name} on ${on_string}: processed_data" structured_like="fastq_input" format="npz">
+    <collection name="output_list" type="list" label="${tool.name} on ${on_string}: processed_data" format="npz">
+    -->
+    <collection name="output_list" type="list" label="${tool.name} on ${on_string}: processed_data">
+        <!-- discover_datasets pattern="__name_and_ext__" directory="output" ext="npz" visible="false"/-->
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.npz" ext="hexrd.npz" directory="output"/>
+        <!--discover_datasets pattern="(?P&lt;designation&gt;.+)\.npz" ext="npz" directory="output"/-->
+        <!--discover_datasets pattern="(?P&lt;designation&gt;.npz)" ext="hexrd.npz" directory="output"/-->
+        <!--filter>input_type_conditional['input_type'] == 'list_collection'</filter-->
+        <filter>output_as_list == 'Yes'</filter>
+    </collection>
+    <data  format="txt"  name="output" label="${tool.name} on ${on_string}">
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.npz"  directory="output" ext="hexrd.npz" visible="true" />
+        <filter>output_as_list == 'No'</filter>
+    </data>
+    <!--data format="txt" name="output_list" label="${tool.name} on ${on_string}: listfiles">
+        <filter>input_type_conditional['input_type'] == 'list_collection'</filter>
+    </data -->
+  </outputs>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pp_dexela.py	Thu Sep 23 21:00:48 2021 +0000
@@ -0,0 +1,153 @@
+from __future__ import print_function
+
+import time
+import os
+import numpy as np
+
+from hexrd import imageseries
+from hexrd.imageseries.process import ProcessedImageSeries
+
+
+
+
+class ProcessedDexelaIMS(ProcessedImageSeries):
+
+    ADDROW = 'add-row'
+    ADDCOL = 'add-column'
+
+    def __init__(self, imser, oplist, **kwargs):
+        super(ProcessedDexelaIMS, self).__init__(imser, oplist, **kwargs)
+        self.addop(self.ADDROW, self._addrow)
+        self.addop(self.ADDCOL, self._addcol)
+
+    def _addrow(self, img, k):
+        """insert row into position k"""
+        shp = img.shape
+        pimg = np.insert(img, k, 0, axis=0)
+        if k==0:
+            pimg[0] = pimg[1]
+        elif k==shp[0]:
+            pimg[k] = pimg[k-1]
+        else: # in middle
+            pimg[k] = (pimg[k-1] + pimg[k+1])/2
+
+        return pimg
+
+    def _addcol(self, img, k):
+        """insert row into position k"""
+        shp = img.shape
+        pimg = np.insert(img, k, 0, axis=1)
+        if k==0:
+            pimg[:,0] = pimg[:,1]
+        elif k==shp[0]:
+            pimg[:,k] = pimg[:,k-1]
+        else: # in middle
+            pimg[:,k] = (pimg[:,k-1] + pimg[:,k+1])/2
+
+        return pimg
+
+
+DexelaIMS=ProcessedDexelaIMS
+
+class PP_Dexela(object):
+    """PP_Dexela"""
+    PROCFMT = 'frame-cache'
+    RAWFMT = 'hdf5'
+    RAWPATH = '/imageseries'
+    DARKPCTILE = 50
+
+    def __init__(self,
+                 fname, omw, panel_opts, panel_id,
+                 frame_start=0, raw_format='hdf5',dark=None):
+        """Constructor for PP_Dexela"""
+        self._panel_id = panel_id
+        self.fname = fname
+        self.omwedges = omw
+        self.panel_opts = panel_opts
+        self.frame_start = frame_start
+        self.use_frame_list = (self.frame_start > 0)
+        if raw_format.lower() == 'hdf5':
+            self.raw = imageseries.open(
+                self.fname, self.RAWFMT, path=self.RAWPATH
+                )
+        else:
+            self.raw = imageseries.open(self.fname, raw_format.lower())
+        self._dark = dark
+
+        #print(
+        #    'On Init:\n\t%s, %d frames, %d omw, %d total'
+        #    % (self.fname, self.nframes, self.omwedges.nframes, len(self.raw))
+        #)
+
+    @property
+    def panel_id(self):
+        return self._panel_id
+
+    @property
+    def oplist(self):
+        return [('dark', self.dark)]+self.panel_opts
+
+    @property
+    def framelist(self):
+        return range(self.frame_start, self.nframes + self.frame_start)
+
+    #
+    # ============================== API
+    #
+    @property
+    def nframes(self):
+        return self.omwedges.nframes
+
+    @property
+    def omegas(self):
+        return self.omwedges.omegas
+
+    def processed(self):
+        kw = {}
+        if self.use_frame_list:
+            kw = dict(frame_list=self.framelist)
+        return DexelaIMS(self.raw, self.oplist, **kw)
+
+    @property
+    def dark(self, nframes=100):
+        """build and return dark image"""
+        if self._dark is None:
+            usenframes = min(nframes, self.nframes)
+            print(
+                "building dark images using %s frames (may take a while)..."
+                % usenframes
+            )
+            start = time.time()
+#            self._dark = imageseries.stats.percentile(
+#                    self.raw, self.DARKPCTILE, nframes=usenframes
+#            )
+            self._dark = imageseries.stats.median(
+                    self.raw, nframes=usenframes
+            )#changed to median by DCP 11/18/17
+            elapsed = (time.time() - start)
+            print(
+                "done building background (dark) image: " +
+                "elapsed time is %f seconds" % elapsed
+            )
+
+        return self._dark
+
+    def save_processed(self, name, threshold, output_dir=None):
+        if output_dir is None:
+            output_dir = os.getcwd()
+        else:
+            os.mkdir(output_dir)
+
+        # add omegas
+        pims = self.processed()
+        metad = pims.metadata
+        metad['omega'] = self.omegas
+        metad['panel_id'] = self.panel_id
+        cache = '%s-cachefile.npz' % name
+        imageseries.write(pims, "dummy", self.PROCFMT,
+                          style="npz",
+                          threshold=threshold,
+                          cache_file=cache)
+    pass  # end class
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocess_dexela_h5_filesystem.py	Thu Sep 23 21:00:48 2021 +0000
@@ -0,0 +1,140 @@
+import argparse
+import glob
+import os
+
+import numpy as np
+import time
+from datetime import datetime
+
+from hexrd.imageseries import omega
+import pp_dexela
+#from IPython import embed
+
+# =============================================================================
+# USER INPUT
+# =============================================================================
+
+# panel keys to MATCH INSTRUMENT FILE
+panel_keys = ['FF1', 'FF2']
+panel_opts = dict.fromkeys(panel_keys)
+
+# !!!: hard coded options for each dexela for April 2017
+panel_opts['FF1'] = [('add-row', 1944), ('add-column', 1296),('flip', 'v'), ]
+panel_opts['FF2'] = [('add-row', 1944), ('add-column', 1296),('flip', 'h'), ]
+
+# ==================== End Inputs (should not need to alter below this line)
+
+
+
+def process_dexelas(file_names, samp_name, scan_number,
+                    ostart, ostep, num_frames,
+                    panel_opts, threshold):
+    """
+    wrapper for F2 dexela setup
+    """
+    ostop = ostart + num_frames*ostep
+    omw = omega.OmegaWedges(num_frames)
+    omw.addwedge(ostart, ostop, num_frames)
+
+    for file_name in file_names:
+        for key in panel_keys:
+            if key.lower() in file_name:
+                ppd = pp_dexela.PP_Dexela(
+                    file_name,
+                    omw,
+                    panel_opts[key],
+                    panel_id=key,
+                    frame_start=fstart)
+#                embed()
+#                ppd=add_missing_pixel_gap(ppd)
+
+                output_name = samp_name + '_' + \
+                    str(scan_number) + '_' + \
+                    file_name.split('/')[-1].split('.')[0]
+                ppd.save_processed(output_name, threshold)
+                head_tail = os.path.split(file_name)
+                if head_tail[0] is not None:
+                    os.system('cp -p ' + './' + output_name + '* ' + head_tail[0] + ' 2>/dev/null')
+
+if __name__ == '__main__':
+    #
+    #  Run preprocessor
+    #
+    print("Running preprocessor....")
+    parser = argparse.ArgumentParser(
+        description="pre-process double Dexela images from F2")
+
+    parser.add_argument('base_dir',
+                        help="raw data path on chess daq", type=str)
+    parser.add_argument('expt_name',
+                        help="experiment name", type=str)
+    parser.add_argument('samp_name',
+                        help="sample name", type=str)
+    parser.add_argument('scan_number',
+                        help="ff scan number", type=int)
+
+    parser.add_argument('-n', '--num-frames',
+                        help="number of frames to read",
+                        type=int, default=1441)
+    parser.add_argument('-s', '--start-frame',
+                        help="index of first data frame",
+                        type=int, default=4)
+    parser.add_argument('-t', '--threshold',
+                        help="threshold for frame caches",
+                        type=int, default=50)
+    parser.add_argument('-o', '--ome-start',
+                        help="start omega",
+                        type=float, default=0.)
+#    parser.add_argument('-d', '--ome-delta',
+#                        help="delta omega",
+#                        type=float, default=0.2498265093684941)
+
+    parser.add_argument('-e', '--ome-end',
+                        help="end omega",
+                        type=float, default=360.)
+
+
+    args = parser.parse_args()
+
+    # strip args
+    data_dir = args.base_dir
+    expt_name = args.expt_name
+    samp_name = args.samp_name
+    scan_number = args.scan_number
+    num_frames = args.num_frames
+    fstart = args.start_frame
+    threshold = args.threshold
+    ostart = args.ome_start
+    oend = args.ome_end
+    ostep = (oend-ostart)/float(num_frames)
+
+    print("== Running preprocessor .... processing ")
+    now = datetime.now()
+    print(now.strftime("%m/%d/%Y %H:%M:%S"))
+
+    file_names = glob.glob(
+        os.path.join(
+            data_dir,
+            expt_name,
+            samp_name,
+            str(scan_number),
+            'ff',
+            '*.h5')
+    )
+    path = os.path.join(data_dir, expt_name, samp_name, str(scan_number), 'ff', '*.h5')
+    print("path = " + path)
+    for f in file_names:
+        print(f)
+
+    check_files_exist = [os.path.exists(file_name) for file_name in file_names]
+    if not np.all(check_files_exist):
+        raise RuntimeError("files don't exist!")
+
+    process_dexelas(file_names, samp_name, scan_number,
+                    ostart, ostep, num_frames,
+                    panel_opts, threshold)
+    now = datetime.now()
+    print(now.strftime("%m/%d/%Y %H:%M:%S"))
+    print("== End of processing " + " ==")
+    print("                                   ")
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocess_dexela_h5_galaxy.py	Thu Sep 23 21:00:48 2021 +0000
@@ -0,0 +1,151 @@
+import argparse
+import glob
+import os
+
+import numpy as np
+
+import time
+from datetime import datetime
+
+from hexrd.imageseries import omega
+import pp_dexela
+#from IPython import embed
+
+# =============================================================================
+# USER INPUT
+# =============================================================================
+
+# panel keys to MATCH INSTRUMENT FILE
+panel_keys = ['FF1', 'FF2']
+panel_opts = dict.fromkeys(panel_keys)
+
+# !!!: hard coded options for each dexela for April 2017
+panel_opts['FF1'] = [('add-row', 1944), ('add-column', 1296),('flip', 'v'), ]
+panel_opts['FF2'] = [('add-row', 1944), ('add-column', 1296),('flip', 'h'), ]
+
+# ==================== End Inputs (should not need to alter below this line)
+
+
+
+def process_dexelas(file_names, samp_name, scan_number,
+                    ostart, ostep, num_frames,
+                    panel_opts, threshold):
+    """
+    wrapper for F2 dexela setup
+    """
+    ostop = ostart + num_frames*ostep
+    omw = omega.OmegaWedges(num_frames)
+    omw.addwedge(ostart, ostop, num_frames)
+
+    for file_name in file_names:
+        for key in panel_keys:
+            if key.lower() in file_name:
+                ppd = pp_dexela.PP_Dexela(
+                    file_name,
+                    omw,
+                    panel_opts[key],
+                    panel_id=key,
+                    frame_start=fstart)
+#                embed()
+#                ppd=add_missing_pixel_gap(ppd)
+
+                #output_name = samp_name + '_' + \
+                #    str(scan_number) + '_' + \
+                output_name = file_name.split('/')[-4] + '_' + \
+                    file_name.split('/')[-3]+ '_' + \
+                    file_name.split('/')[-1].split('.')[0]
+                ppd.save_processed(output_name, threshold)
+                head_tail = os.path.split(file_name)
+                if head_tail[0] is not None:
+                    os.system('cp -p ' + './' + output_name + '* ' + head_tail[0] + ' 2>/dev/null')
+if __name__ == '__main__':
+    #
+    #  Run preprocessor
+    #
+
+    parser = argparse.ArgumentParser(
+        description="pre-process double Dexela images from F2")
+
+    parser.add_argument('file_name',
+                        help="raw data path on chess daq", type=str)
+
+    #parser.add_argument('base_dir',
+    #                    help="raw data path on chess daq", type=str)
+    #parser.add_argument('expt_name',
+    #                    help="experiment name", type=str)
+    #parser.add_argument('samp_name',
+    #                    help="sample name", type=str)
+    #parser.add_argument('scan_number',
+    #                    help="ff scan number", type=int)
+
+    parser.add_argument('-n', '--num-frames',
+                        help="number of frames to read",
+                        type=int, default=1441)
+    parser.add_argument('-s', '--start-frame',
+                        help="index of first data frame",
+                        type=int, default=4)
+    parser.add_argument('-t', '--threshold',
+                        help="threshold for frame caches",
+                        type=int, default=50)
+    parser.add_argument('-o', '--ome-start',
+                        help="start omega",
+                        type=float, default=0.)
+#    parser.add_argument('-d', '--ome-delta',
+#                        help="delta omega",
+#                        type=float, default=0.2498265093684941)
+
+    parser.add_argument('-e', '--ome-end',
+                        help="end omega",
+                        type=float, default=360.)
+
+
+    args = parser.parse_args()
+
+    print(args.file_name)
+    # strip args
+    file_name = args.file_name
+    #data_dir = args.base_dir
+    #expt_name = args.expt_name
+    #samp_name = args.samp_name
+    #scan_number = args.scan_number
+    samp_name = file_name.split('/')[-4]
+    scan_number = file_name.split('/')[-3]
+    num_frames = args.num_frames
+    fstart = args.start_frame
+    threshold = args.threshold
+    ostart = args.ome_start
+    oend = args.ome_end
+    ostep = (oend-ostart)/float(num_frames)
+
+    print("== Running preprocessor .... processing " + file_name + " ==")
+    now = datetime.now()
+    print(now.strftime("%m/%d/%Y %H:%M:%S"))
+
+    #file_names = glob.glob(
+    #    os.path.join(
+    #        data_dir,
+    #        expt_name,
+    #        samp_name,
+    #        str(scan_number),
+    #        'ff',
+    #        '*.h5')
+    #)
+    file_names = [file_name]
+    #path = os.path.join(data_dir, expt_name, samp_name, str(scan_number), 'ff', '*.h5')
+    #print("Preprocessor: after getting file_names ...." + path)
+    for f in file_names:
+        print(f)
+
+    check_files_exist = [os.path.exists(file_name) for file_name in file_names]
+    if not np.all(check_files_exist):
+        raise RuntimeError("files don't exist!")
+
+    process_dexelas(file_names, samp_name, scan_number,
+                    ostart, ostep, num_frames,
+                    panel_opts, threshold)
+    now = datetime.now()
+    print(now.strftime("%m/%d/%Y %H:%M:%S"))
+    print("== End of processing " + file_name + " ==")
+    print("                                   ")
+    print("                                   ")
+