From 2965f7d8812373a88ac4a675dbfe05de924c30b2 Mon Sep 17 00:00:00 2001 From: Aaron Brewster Date: Fri, 3 Mar 2023 12:28:50 -0800 Subject: [PATCH 1/4] Add nxmx_writer capability to nanoBragg - Uses dxtbx's general purpose nxmx_writer - Need to set values for material and thickness for NXmx - Hook up some missing pipes for ImageSetData and FormatBraggInMemory so they can work with imageset.get_spectrum - Add example test to tst_gauss_argchk.py --- simtbx/nanoBragg/__init__.py | 56 +++++++++++++++++++++++++--- simtbx/nanoBragg/tst_gauss_argchk.py | 1 + 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/simtbx/nanoBragg/__init__.py b/simtbx/nanoBragg/__init__.py index 310242bf69..433882df6a 100644 --- a/simtbx/nanoBragg/__init__.py +++ b/simtbx/nanoBragg/__init__.py @@ -2,6 +2,7 @@ import boost_adaptbx.boost.python as bp import cctbx.uctbx # possibly implicit ext = bp.import_ext("simtbx_nanoBragg_ext") +from libtbx.phil import parse from scitbx.array_family import flex from simtbx_nanoBragg_ext import * from scitbx.matrix import col, sqr @@ -12,7 +13,8 @@ from dxtbx.model import CrystalFactory from dxtbx.model import BeamFactory from dxtbx.model import DetectorFactory -from dxtbx.format import cbf_writer +from dxtbx.format.Format import Format +from dxtbx.format import cbf_writer, nxmx_writer @bp.inject_into(ext.nanoBragg) @@ -152,15 +154,15 @@ def detector(self): 'identifier': '', 'image_size': im_shape, 'mask': [], - 'material': '', # TODO + 'material': 'Si', 'mu': 0.0, # TODO 'name': 'Panel', 'origin': origin, 'pedestal': 0.0, 'pixel_size': (pixsize, pixsize), 'px_mm_strategy': {'type': 'SimplePxMmStrategy'}, - 'raw_image_offset': (0, 0), # TODO - 'thickness': 0.0, # TODO + 'raw_image_offset': (0, 0), + 'thickness': 0.00001, # TODO 'trusted_range': (-1e3, 1e10), # TODO 'type': ''}]} detector = DetectorFactory.from_dict(det_descr) @@ -171,7 +173,7 @@ def imageset(self): format_class = FormatBraggInMemory(self.raw_pixels) reader = MemReaderNamedPath("virtual_Bragg_path", [format_class]) reader.format_class = FormatBraggInMemory - imageset_data = ImageSetData(reader, None) + imageset_data = ImageSetData(reader, None, vendor="", params={'raw_pixels': self.raw_pixels}, format=FormatBraggInMemory) imageset = ImageSet(imageset_data) imageset.set_beam(self.beam) imageset.set_detector(self.detector) @@ -242,6 +244,44 @@ def to_cbf(self, cbf_filename, toggle_conventions=False, intfile_scale=1.0): self.raw_pixels = cache_pixels # print("switch back to cached") + def to_nexus_nxmx(self, nxmx_filename, toggle_conventions=False, intfile_scale=1.0): + """write a NeXus NXmx-format image file to disk from the raw pixel array + intfile_scale: multiplicative factor applied to raw pixels before output + intfile_scale > 0 : value of the multiplicative factor + intfile_scale = 1 (default): do not apply a factor + intfile_scale = 0 : compute a reasonable scale factor to set max pixel to 55000; given by get_intfile_scale()""" + + if intfile_scale != 1.0: + cache_pixels = self.raw_pixels + if intfile_scale > 0: self.raw_pixels = self.raw_pixels * intfile_scale + else: self.raw_pixels = self.raw_pixels * self.get_intfile_scale() + # print("switch to scaled") + + if toggle_conventions: + # switch to DIALS convention before writing CBF + CURRENT_CONV = self.beamcenter_convention + self.beamcenter_convention=DIALS + + params = nxmx_writer.phil_scope.fetch(parse(""" + output_file=%s + nexus_details { + instrument_name=nanoBragg + source_name=nanoBragg + start_time=NA + end_time_estimated=NA + sample_name=nanoBragg + } + """%nxmx_filename)).extract() + writer = nxmx_writer.NXmxWriter(params) + writer(imageset=self.imageset) + + if toggle_conventions: + self.beamcenter_convention=CURRENT_CONV + + if intfile_scale != 1.0: + self.raw_pixels = cache_pixels + # print("switch back to cached") + def make_imageset(data, beam, detector): format_class = FormatBraggInMemoryMultiPanel(data) reader = MemReaderNamedPath("virtual_Bragg_path", [format_class]) @@ -279,7 +319,7 @@ def get_mask(self, goniometer=None): """dummie place holder for mask, consider using internal nanoBragg mask""" return self.mask -class FormatBraggInMemory: +class FormatBraggInMemory(Format): def __init__(self, raw_pixels): self.raw_pixels = raw_pixels @@ -305,6 +345,10 @@ def get_mask(self, goniometer=None): """dummie place holder for mask, consider using internal nanoBragg mask""" return self.mask, + @classmethod + def get_instance(Class, filename, **kwargs): + return Class(raw_pixels = kwargs.pop('raw_pixels'), **kwargs) + #def paths(self): # return ["InMemoryBraggPath"] # TODO: CBFLib complains if no datablock path provided which comes from path diff --git a/simtbx/nanoBragg/tst_gauss_argchk.py b/simtbx/nanoBragg/tst_gauss_argchk.py index 87368ac24f..91ba3043ff 100644 --- a/simtbx/nanoBragg/tst_gauss_argchk.py +++ b/simtbx/nanoBragg/tst_gauss_argchk.py @@ -231,6 +231,7 @@ def simple_monochromatic_case(bragg_engine, BEAM, DETECTOR, CRYSTAL, SF_model, a SIM.to_smv_format(fileout="test_full_001.img", intfile_scale=output_scale) assert approx_equal(SIM.raw_pixels, SIM2.raw_pixels) SIM.to_cbf("test_full_001.cbf", intfile_scale=output_scale) + SIM.to_nexus_nxmx("test_full_001.h5", intfile_scale=output_scale) if runmode=="GPU": bragg_engine = nanoBragg.add_nanoBragg_spots_cuda From fa5a5695120f0e1b90036883e0a9a4a861525815 Mon Sep 17 00:00:00 2001 From: Nicholas K Sauter Date: Wed, 22 Mar 2023 17:57:54 -0700 Subject: [PATCH 2/4] Work in progress writing simulations to NeXus --- simtbx/nanoBragg/__init__.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/simtbx/nanoBragg/__init__.py b/simtbx/nanoBragg/__init__.py index 433882df6a..ebf0b00c34 100644 --- a/simtbx/nanoBragg/__init__.py +++ b/simtbx/nanoBragg/__init__.py @@ -282,6 +282,22 @@ def to_nexus_nxmx(self, nxmx_filename, toggle_conventions=False, intfile_scale=1 self.raw_pixels = cache_pixels # print("switch back to cached") +def nexus_factory(nxmx_filename): + params = nxmx_writer.phil_scope.fetch(parse(""" + output_file=%s + nexus_details { + instrument_name=nanoBragg + source_name=nanoBragg + start_time=NA + end_time_estimated=NA + sample_name=nanoBragg + } + dtype=int32 + """%nxmx_filename)).extract() + writer = nxmx_writer.NXmxWriter(params) + return writer + + def make_imageset(data, beam, detector): format_class = FormatBraggInMemoryMultiPanel(data) reader = MemReaderNamedPath("virtual_Bragg_path", [format_class]) From fb2a766e39dcf8bb5ddc21f1aa7e5a2bb3070826 Mon Sep 17 00:00:00 2001 From: vganapati Date: Tue, 25 Apr 2023 14:38:20 -0700 Subject: [PATCH 3/4] Updated to use DeviceWrapper --- simtbx/command_line/integrate.py | 279 ++++++++++++++++--------------- 1 file changed, 140 insertions(+), 139 deletions(-) diff --git a/simtbx/command_line/integrate.py b/simtbx/command_line/integrate.py index c23e6011e9..1bcb4a6b03 100644 --- a/simtbx/command_line/integrate.py +++ b/simtbx/command_line/integrate.py @@ -61,6 +61,7 @@ def print0(*args, **kwargs): from copy import deepcopy from collections import Counter +from simtbx.diffBragg.device import DeviceWrapper def filter_refls(R): vec3_dbl_keys = 'xyzcal.px', 'xyzcal.mm', 'xyzobs.px.value', 'xyzobs.px.value', 'rlp', 's1' @@ -338,148 +339,148 @@ def refls_from_sims(panel_imgs, detector, beam, thresh=0, filter=None, panel_ids os.makedirs(args.outdir) COMM.barrier() - params = utils.get_extracted_params_from_phil_sources(args.predPhil, args.cmdlinePhil) - if os.path.isfile(args.inputGlob): - df_all = pandas.read_pickle(args.inputGlob) - df_all.reset_index(inplace=True, drop=True) - def df_iter(): - for i_f in range(len(df_all)): - if i_f % COMM.size != COMM.rank: - continue - df_i = df_all.iloc[i_f:i_f+1].copy().reset_index(drop=True) - yield i_f, df_i - Nf = len(df_all) - else: - if os.path.isdir(args.inputGlob): - glob_s = os.path.join(args.inputGlob, "pandas/rank*/*.pkl") - fnames = glob.glob(glob_s) + with DeviceWrapper(script.dev) as _: + params = utils.get_extracted_params_from_phil_sources(args.predPhil, args.cmdlinePhil) + if os.path.isfile(args.inputGlob): + df_all = pandas.read_pickle(args.inputGlob) + df_all.reset_index(inplace=True, drop=True) + def df_iter(): + for i_f in range(len(df_all)): + if i_f % COMM.size != COMM.rank: + continue + df_i = df_all.iloc[i_f:i_f+1].copy().reset_index(drop=True) + yield i_f, df_i + Nf = len(df_all) else: - fnames = glob.glob(args.inputGlob) - def df_iter(): - for i_f,f in enumerate(fnames): - if i_f % COMM.size != COMM.rank: - continue - df = pandas.read_pickle(f) - yield i_f, df - Nf = len(fnames) - - if params.predictions.verbose: - params.predictions.verbose = COMM.rank==0 - - dev = COMM.rank % args.numdev - - print0("Found %d input files" % Nf) - - all_dfs = [] - all_pred_names = [] - exp_ref_spec_lines = [] - for i_f, df in df_iter(): - printR("Shot %d / %d" % (i_f+1, Nf), flush=True) - - expt_name = df.opt_exp_name.values[0] - tag = os.path.splitext(os.path.basename(expt_name))[0] - new_expt_name = "%s/%s_%d_predicted.expt" % (args.outdir,tag, i_f) - new_expt_name = os.path.abspath(new_expt_name) - df["opt_exp_name"] = new_expt_name - - shutil.copyfile(expt_name, new_expt_name) - - data_exptList = ExperimentList.from_file(expt_name) - data_expt = data_exptList[0] - - try: - spectrum_override = None - if params.spectrum_from_imageset: - spectrum_override = downsamp_spec_from_params(params, data_expt) - pred = predictions.get_predicted_from_pandas( - df, params, strong=None, device_Id=dev, spectrum_override=spectrum_override) - if args.filterDupes: - pred = filter_refls(pred) - except ValueError: - os.remove(new_expt_name) - continue - - data = utils.image_data_from_expt(data_expt) - Rstrong = refls_from_sims(data, data_expt.detector, data_expt.beam, phil_file=args.procPhil ) - Rstrong['id'] = flex.int(len(Rstrong), 0) - num_panels = len(data_expt.detector) - if num_panels > 1: - assert params.predictions.label_weak_col == "rlp" - - Rstrong.centroid_px_to_mm(data_exptList) - Rstrong.map_centroids_to_reciprocal_space(data_exptList) - predictions.label_weak_predictions(pred, Rstrong, q_cutoff=params.predictions.qcut, col=params.predictions.label_weak_col ) - - pred['is_strong'] = flex.bool(np.logical_not(pred['is_weak'])) - strong_sel = np.logical_not(pred['is_weak']) - - pred["refl_idx"] = flex.int(np.arange(len(pred))) - weaks = pred.select(pred['is_weak']) - weaks_sorted = np.argsort(weaks["scatter"])[::-1] - nweak = len(weaks) - num_keep = int(nweak*params.predictions.weak_fraction) - weak_refl_inds_keep = set(np.array(weaks["refl_idx"])[weaks_sorted[:num_keep]]) - - weak_sel = flex.bool([i in weak_refl_inds_keep for i in pred['refl_idx']]) - keeps = np.logical_or( pred['is_strong'], weak_sel) - #printR("Sum keeps=%d; num_strong=%d, num_kept_weak=%d" % (sum(keeps), sum(strong_sel), sum(weak_sel))) - pred = pred.select(flex.bool(keeps)) - nstrong = np.sum(strong_sel) - printR("Will save %d refls (%d strong, %d weak)" % (len(pred), np.sum(strong_sel), np.sum(weak_sel))) - pred_file = os.path.abspath("%s/%s_%d_predicted.refl" % ( args.outdir, tag, i_f)) - pred.as_file(pred_file) - - Rindexed = Rstrong.select(Rstrong['indexed']) - if len(Rindexed)==0: - print("No strong indexed refls for shot %s" % new_expt_name) - continue - - utils.refls_to_hkl(Rindexed, data_expt.detector, data_expt.beam, data_expt.crystal, update_table=True) - try: - int_expt, int_refl = integrate(args.procPhil, data_exptList, Rindexed, pred) + if os.path.isdir(args.inputGlob): + glob_s = os.path.join(args.inputGlob, "pandas/rank*/*.pkl") + fnames = glob.glob(glob_s) + else: + fnames = glob.glob(args.inputGlob) + def df_iter(): + for i_f,f in enumerate(fnames): + if i_f % COMM.size != COMM.rank: + continue + df = pandas.read_pickle(f) + yield i_f, df + Nf = len(fnames) + + if params.predictions.verbose: + params.predictions.verbose = COMM.rank==0 + + dev = COMM.rank % args.numdev + + print0("Found %d input files" % Nf) + + all_dfs = [] + all_pred_names = [] + exp_ref_spec_lines = [] + for i_f, df in df_iter(): + printR("Shot %d / %d" % (i_f+1, Nf), flush=True) + + expt_name = df.opt_exp_name.values[0] + tag = os.path.splitext(os.path.basename(expt_name))[0] + new_expt_name = "%s/%s_%d_predicted.expt" % (args.outdir,tag, i_f) + new_expt_name = os.path.abspath(new_expt_name) + df["opt_exp_name"] = new_expt_name + + shutil.copyfile(expt_name, new_expt_name) + + data_exptList = ExperimentList.from_file(expt_name) + data_expt = data_exptList[0] + + try: + spectrum_override = None + if params.spectrum_from_imageset: + spectrum_override = downsamp_spec_from_params(params, data_expt) + pred = predictions.get_predicted_from_pandas( + df, params, strong=None, device_Id=dev, spectrum_override=spectrum_override) + if args.filterDupes: + pred = filter_refls(pred) + except ValueError: + os.remove(new_expt_name) + continue + + data = utils.image_data_from_expt(data_expt) + Rstrong = refls_from_sims(data, data_expt.detector, data_expt.beam, phil_file=args.procPhil ) + Rstrong['id'] = flex.int(len(Rstrong), 0) + num_panels = len(data_expt.detector) + if num_panels > 1: + assert params.predictions.label_weak_col == "rlp" + + Rstrong.centroid_px_to_mm(data_exptList) + Rstrong.map_centroids_to_reciprocal_space(data_exptList) + predictions.label_weak_predictions(pred, Rstrong, q_cutoff=params.predictions.qcut, col=params.predictions.label_weak_col ) + + pred['is_strong'] = flex.bool(np.logical_not(pred['is_weak'])) + strong_sel = np.logical_not(pred['is_weak']) + + pred["refl_idx"] = flex.int(np.arange(len(pred))) + weaks = pred.select(pred['is_weak']) + weaks_sorted = np.argsort(weaks["scatter"])[::-1] + nweak = len(weaks) + num_keep = int(nweak*params.predictions.weak_fraction) + weak_refl_inds_keep = set(np.array(weaks["refl_idx"])[weaks_sorted[:num_keep]]) + + weak_sel = flex.bool([i in weak_refl_inds_keep for i in pred['refl_idx']]) + keeps = np.logical_or( pred['is_strong'], weak_sel) + #printR("Sum keeps=%d; num_strong=%d, num_kept_weak=%d" % (sum(keeps), sum(strong_sel), sum(weak_sel))) + pred = pred.select(flex.bool(keeps)) + nstrong = np.sum(strong_sel) + printR("Will save %d refls (%d strong, %d weak)" % (len(pred), np.sum(strong_sel), np.sum(weak_sel))) + pred_file = os.path.abspath("%s/%s_%d_predicted.refl" % ( args.outdir, tag, i_f)) + pred.as_file(pred_file) + + Rindexed = Rstrong.select(Rstrong['indexed']) + if len(Rindexed)==0: + print("No strong indexed refls for shot %s" % new_expt_name) + continue + + utils.refls_to_hkl(Rindexed, data_expt.detector, data_expt.beam, data_expt.crystal, update_table=True) + try: + int_expt, int_refl = integrate(args.procPhil, data_exptList, Rindexed, pred) + except RuntimeError: + print("Integration failed for %s" % pred_file) + continue int_expt_name = "%s/%s_%d_integrated.expt" % ( args.outdir,tag, i_f) int_expt.as_file(int_expt_name) int_refl['bbox'] = int_refl['shoebox'].bounding_boxes() int_refl_name = int_expt_name.replace(".expt", ".refl") int_refl.as_file(int_refl_name) - except RuntimeError as err: - print("Integration failed for %s because: '%s'" % (pred_file, str(err))) - - all_dfs.append(df) - all_pred_names.append(pred_file) - spec_name = df.spectrum_filename.values[0] - if spec_name is None: - spec_name = "" - exp_ref_spec_lines.append("%s %s %s\n" % (new_expt_name, pred_file, spec_name)) - - if all_dfs: - all_dfs = pandas.concat(all_dfs) - all_dfs["predicted_refls"] = all_pred_names - all_dfs["predictions"] = all_pred_names - else: - all_dfs = None - - all_dfs = COMM.gather(all_dfs) - exp_ref_spec_lines = COMM.reduce(exp_ref_spec_lines) - print0("\nReflections written to folder %s.\n" % args.outdir) - if COMM.rank==0: - hopper_input_name = os.path.abspath(os.path.join(args.outdir , "%s.txt" % args.hopInputName)) - o = open(hopper_input_name, "w") - for l in exp_ref_spec_lines: - o.write(l) - o.close() - all_dfs = [df for df in all_dfs if df is not None] - if not all_dfs: - raise ValueError("No dataframes to concat: prediction/integration failed for all shots..") - - all_dfs = pandas.concat([df for df in all_dfs if df is not None]) - all_dfs.reset_index(inplace=True, drop=True) - best_pkl_name = os.path.abspath(os.path.join(args.outdir , "%s.pkl" % args.hopInputName)) - all_dfs.to_pickle(best_pkl_name) - print("Wrote %s (best_pickle option for simtbx.diffBragg.hopper) and %s (exp_ref_spec option for simtbx.diffBragg.hopper). Use them to run the predictions through hopper. Use the centroid=cal option to specify the predictions" % (best_pkl_name, hopper_input_name)) - - with open(args.outdir +".exectution.txt", "w") as o: - o.write("integrate was run from folder: %s\n" % os.getcwd()) - o.write("The command line input was:\n") - o.write(" ".join(sys.argv)) - #TODO: write the diff phils here: + all_dfs.append(df) + all_pred_names.append(pred_file) + spec_name = df.spectrum_filename.values[0] + if spec_name is None: + spec_name = "" + exp_ref_spec_lines.append("%s %s %s\n" % (new_expt_name, int_refl_name, spec_name)) + + if all_dfs: + all_dfs = pandas.concat(all_dfs) + all_dfs["predicted_refls"] = all_pred_names + else: + all_dfs = None + + all_dfs = COMM.gather(all_dfs) + exp_ref_spec_lines = COMM.reduce(exp_ref_spec_lines) + print0("\nReflections written to folder %s.\n" % args.outdir) + if COMM.rank==0: + hopper_input_name = os.path.abspath(os.path.join(args.outdir , "%s.txt" % args.hopInputName)) + o = open(hopper_input_name, "w") + for l in exp_ref_spec_lines: + o.write(l) + o.close() + all_dfs = [df for df in all_dfs if df is not None] + if not all_dfs: + raise ValueError("No dataframes to concat: prediction/integration failed for all shots..") + + all_dfs = pandas.concat([df for df in all_dfs if df is not None]) + all_dfs.reset_index(inplace=True, drop=True) + best_pkl_name = os.path.abspath(os.path.join(args.outdir , "%s.pkl" % args.hopInputName)) + all_dfs.to_pickle(best_pkl_name) + print("Wrote %s (best_pickle option for simtbx.diffBragg.hopper) and %s (exp_ref_spec option for simtbx.diffBragg.hopper). Use them to run the predictions through hopper. Use the centroid=cal option to specify the predictions" % (best_pkl_name, hopper_input_name)) + + with open(args.outdir +".exectution.txt", "w") as o: + o.write("integrate was run from folder: %s\n" % os.getcwd()) + o.write("The command line input was:\n") + o.write(" ".join(sys.argv)) + #TODO: write the diff phils here: From ff51ff27de35453c1691cfda62f9baba89974175 Mon Sep 17 00:00:00 2001 From: vganapati Date: Tue, 25 Apr 2023 16:41:59 -0700 Subject: [PATCH 4/4] Bug fix for DeviceWrapper --- simtbx/command_line/integrate.py | 64 ++++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/simtbx/command_line/integrate.py b/simtbx/command_line/integrate.py index 1bcb4a6b03..0279a47206 100644 --- a/simtbx/command_line/integrate.py +++ b/simtbx/command_line/integrate.py @@ -339,39 +339,39 @@ def refls_from_sims(panel_imgs, detector, beam, thresh=0, filter=None, panel_ids os.makedirs(args.outdir) COMM.barrier() - with DeviceWrapper(script.dev) as _: - params = utils.get_extracted_params_from_phil_sources(args.predPhil, args.cmdlinePhil) - if os.path.isfile(args.inputGlob): - df_all = pandas.read_pickle(args.inputGlob) - df_all.reset_index(inplace=True, drop=True) - def df_iter(): - for i_f in range(len(df_all)): - if i_f % COMM.size != COMM.rank: - continue - df_i = df_all.iloc[i_f:i_f+1].copy().reset_index(drop=True) - yield i_f, df_i - Nf = len(df_all) + + params = utils.get_extracted_params_from_phil_sources(args.predPhil, args.cmdlinePhil) + if os.path.isfile(args.inputGlob): + df_all = pandas.read_pickle(args.inputGlob) + df_all.reset_index(inplace=True, drop=True) + def df_iter(): + for i_f in range(len(df_all)): + if i_f % COMM.size != COMM.rank: + continue + df_i = df_all.iloc[i_f:i_f+1].copy().reset_index(drop=True) + yield i_f, df_i + Nf = len(df_all) + else: + if os.path.isdir(args.inputGlob): + glob_s = os.path.join(args.inputGlob, "pandas/rank*/*.pkl") + fnames = glob.glob(glob_s) else: - if os.path.isdir(args.inputGlob): - glob_s = os.path.join(args.inputGlob, "pandas/rank*/*.pkl") - fnames = glob.glob(glob_s) - else: - fnames = glob.glob(args.inputGlob) - def df_iter(): - for i_f,f in enumerate(fnames): - if i_f % COMM.size != COMM.rank: - continue - df = pandas.read_pickle(f) - yield i_f, df - Nf = len(fnames) - - if params.predictions.verbose: - params.predictions.verbose = COMM.rank==0 - - dev = COMM.rank % args.numdev - - print0("Found %d input files" % Nf) - + fnames = glob.glob(args.inputGlob) + def df_iter(): + for i_f,f in enumerate(fnames): + if i_f % COMM.size != COMM.rank: + continue + df = pandas.read_pickle(f) + yield i_f, df + Nf = len(fnames) + + if params.predictions.verbose: + params.predictions.verbose = COMM.rank==0 + + dev = COMM.rank % args.numdev + + print0("Found %d input files" % Nf) + with DeviceWrapper(dev) as _: all_dfs = [] all_pred_names = [] exp_ref_spec_lines = []