Configuration#
The config module contains a mechanism to collect configuration parameters from various sources and configuration files, and to combine them in a hierarchical manner into a single, consistent configuration dictionary. It will load an (optional) provided config file, or alternatively use a passed python dictionary as initial config dictionary, and subsequently look for the following additional config files to load:
folder_config
: A config file of name :file:sed_config.yaml
in the current working directory. This is mostly intended to pass calibration parameters of the workflow between different notebook instances.user_config
: A config file provided by the user, stored as :file:.sed/config.yaml
in the current user’s home directly. This is intended to give a user the option for individual configuration modifications of system settings.system_config
: A config file provided by the system administrator, stored as :file:/etc/sed/config.yaml
on Linux-based systems, and :file:%ALLUSERSPROFILE%/sed/config.yaml
on Windows. This should provide all necessary default parameters for using the sed processor with a given setup. For an example for an mpes setup, see :ref:example_config
default_config
: The default configuration shipped with the package. Typically, all parameters here should be overwritten by any of the other configuration files.
The config mechanism returns the combined dictionary, and reports the loaded configuration files. In order to disable or overwrite any of the configuration files, they can be also given as optional parameters (path to a file, or python dictionary).
Default configuration settings#
core:
# The loader to use. The generic loader allows for loading various files types directly using dask.
loader: generic
dataframe:
# dataframe column containing x coordinates
x_column: "X"
# dataframe column containing y coordinates
y_column: "Y"
# dataframe column containing time-of-flight data
tof_column: "t"
# dataframe column containing time-of-flight data in nanoseconds
tof_ns_column: "t_ns"
# dataframe column containing analog-to-digital data
adc_column: "ADC"
# dataframe column containing bias voltage data
bias_column: "sampleBias"
# dataframe column containing corrected x coordinates
corrected_x_column: "Xm"
# dataframe column containing corrected y coordinates
corrected_y_column: "Ym"
# dataframe column containing corrected time-of-flight data
corrected_tof_column: "tm"
# dataframe column containing kx coordinates
kx_column: "kx"
# dataframe column containing ky coordinates
ky_column: "ky"
# dataframe column containing energy data
energy_column: "energy"
# dataframe column containing delay data
delay_column: "delay"
# time length of a base time-of-flight bin in s
tof_binwidth: 4.125e-12
# Binning factor of the tof_column-data compared to tof_binwidth (2^(tof_binning-1))
tof_binning: 1
# binning factor used for the adc coordinate (2^(adc_binning-1))
adc_binning: 1
# list of columns to apply jitter to.
jitter_cols: ["@x_column", "@y_column", "@tof_column"]
# Jitter amplitude or list of jitter amplitudes. Should equal half the digital step size of each jitter_column
jitter_amps: 0.5
# Time stepping in seconds of the successive events in the timed dataframe
timed_dataframe_unit_time: 0.001
energy:
# Number of bins to use for energy calibration traces
bins: 1000
# Bin ranges to use for energy calibration curves (for tof_binning=0)
ranges: [100000, 150000]
# Option to normalize energy calibration traces
normalize: True
# Pixel range for smoothing
normalize_span: 7
# Spline order for smoothing
normalize_order: 1
# Radius parameter for fastdtw algorithm to find path correspondence
fastdtw_radius: 2
# Window around a peak to make sure that no other peaks are present
peak_window: 7
# Method to use for energy calibration
calibration_method: "lmfit"
# Energy scale to use for energy calibration
energy_scale: "kinetic"
# Approximate position of the high-energy-cutoff in tof_column bins,
# used for displaying a graph to choose the energy correction function parameters.
tof_fermi: 132250
# TOF range to visualize for the correction tool around tof_fermi
tof_width: [-600, 1000]
# x-integration range for the correction tool around the center pixel
x_width: [-20, 20]
# y-integration range for the correction tool around the center pixel
y_width: [-20, 20]
# High intensity cutoff for the visualization tool
color_clip: 300
momentum:
# binning axes to use for momentum correction/calibration.
# Axes names starting with "@" refer to keys in the "dataframe" section
axes: ["@x_column", "@y_column", "@tof_column"]
# Bin numbers used for the respective axes
bins: [512, 512, 300]
# bin ranges to use (in unbinned detector coordinates)
ranges: [[-256, 1792], [-256, 1792], [132000, 138000]]
# The x/y pixel ranges of the detector
detector_ranges: [[0, 2048], [0, 2048]]
# The center pixel of the detector in the binned x/y coordinates
center_pixel: [256, 256]
# Sigma parameter for feature selection (intensity above background)
sigma: 5
# FWHM parameter for feature selection (width of features to extract)
fwhm: 8
# Sigma_radius parameter for feature selection (variation of radius size)
sigma_radius: 1
delay:
# value ranges of the analog-to-digital converter axes used for encoding the delay stage position
# (in unbinned coordinates)
adc_range: [1900, 25600]
# pump probe time overlap in ps
time0: 0
# if to flip the time axis
flip_time_axis: False
binning:
# Histogram computation mode to use.
hist_mode: "numba"
# Mode for histogram recombination to use
mode: fast
# Whether to display a progress bar
pbar: True
# Number of multithreading threads per worker thread
threads_per_worker: 4
# API for numpy multithreading
threadpool_API: "blas"
histogram:
# number of bins used for histogram visualization
bins: [80, 80, 80]
# default axes to use for histogram visualization.
# Axes names starting with "@" refer to keys in the "dataframe" section
axes: ["@x_column", "@y_column", "@tof_column"]
# default ranges to use for histogram visualization (in unbinned detector coordinates)
ranges: [[0, 1800], [0, 1800], [0, 150000]]
Example configuration file for mpes (METIS momentum microscope at FHI-Berlin)#
core:
# The loader to use. The mpes loader allows for loading hdf5 files from the METIS momentum microscope.
loader: mpes
# Option to use the copy tool to mirror data to a local storage location before processing.
use_copy_tool: False
# path to the root of the source data directory
copy_tool_source: "/path/to/data/"
# path to the root or the local data storage
copy_tool_dest: "/path/to/localDataStore/"
# optional keywords for the copy tool:
copy_tool_kwds:
# number of parallel copy jobs
ntasks: 20
# group id to set for copied files and folders
gid: 1001
dataframe:
# hdf5 group names to read from the h5 files (for mpes reader)
hdf5_groupnames: ["Stream_0", "Stream_1", "Stream_2", "Stream_4"]
# aliases to assign to the dataframe columns for the corresponding hdf5 streams
hdf5_aliases:
Stream_0: "X"
Stream_1: "Y"
Stream_2: "t"
Stream_4: "ADC"
# dataframe column name for the time stamp column
time_stamp_alias: "timeStamps"
# hdf5 group name containing eventIDs occurring at every millisecond (used to calculate timestamps)
ms_markers_group: "msMarkers"
# hdf5 attribute containing the timestamp of the first event in a file
first_event_time_stamp_key: "FirstEventTimeStamp"
# Time stepping in seconds of the successive events in the timed dataframe
timed_dataframe_unit_time: 0.001
# list of columns to apply jitter to
jitter_cols: ["X", "Y", "t", "ADC"]
# dataframe column containing x coordinates
x_column: "X"
# dataframe column containing y coordinates
y_column: "Y"
# dataframe column containing time-of-flight data
tof_column: "t"
# dataframe column containing analog-to-digital data
adc_column: "ADC"
# dataframe column containing corrected x coordinates
corrected_x_column: "Xm"
# dataframe column containing corrected y coordinates
corrected_y_column: "Ym"
# dataframe column containing corrected time-of-flight data
corrected_tof_column: "tm"
# dataframe column containing kx coordinates
kx_column: "kx"
# dataframe column containing ky coordinates
ky_column: "ky"
# dataframe column containing energy data
energy_column: "energy"
# dataframe column containing delay data
delay_column: "delay"
# time length of a base time-of-flight bin in ns
tof_binwidth: 4.125e-12
# Binning factor of the tof_column-data compared to tof_binwidth (2^(tof_binning-1))
tof_binning: 2
# binning factor used for the adc coordinate (2^(adc_binning-1))
adc_binning: 3
# Default units for dataframe entries
units:
X: 'step'
Y: 'step'
t: 'step'
tof_voltage: 'V'
extractor_voltage: 'V'
extractor_current: 'A'
cryo_temperature: 'K'
sample_temperature: 'K'
dld_time: 'ns'
delay: 'ps'
timeStamp: 's'
energy: 'eV'
E: 'eV'
kx: '1/A'
ky: '1/A'
energy:
# Number of bins to use for energy calibration traces
bins: 1000
# Bin ranges to use for energy calibration curves (for tof_binning=0)
ranges: [128000, 138000]
# hdf5 path to attribute storing bias information for a given file
bias_key: "@KTOF:Lens:Sample:V"
# Option to normalize energy calibration traces
normalize: True
# Pixel range for smoothing
normalize_span: 7
# Spline order for smoothing
normalize_order: 1
# Radius parameter for fastdtw algorithm to find path correspondence
fastdtw_radius: 2
# Window around a peak to make sure that no other peaks are present
peak_window: 7
# Method to use for energy calibration
calibration_method: "lmfit"
# Energy scale to use for energy calibration
energy_scale: "kinetic"
# Approximate position of the high-energy-cutoff in tof_column bins,
# used for displaying a graph to choose the energy correction function parameters.
tof_fermi: 132250
# TOF range to visualize for the correction tool around tof_fermi
tof_width: [-600, 1000]
# x-integration range for the correction tool around the center pixel
x_width: [-20, 20]
# y-integration range for the correction tool around the center pixel
y_width: [-20, 20]
# High intensity cutoff for the visualization tool
color_clip: 300
correction:
# Correction type
correction_type: "Lorentzian"
# Correction amplitude
amplitude: 2.5
# center coordinates for the correction (in detector coordinates)
center: [730.0, 730.0]
# gamma value for the Lorentzian correction (same for x and y)
gamma: 920.0
# sigma value for the gaussian correction (same for x and y)
sigma: 700.0
# diameter value for the radial correction (same for x and y)
diameter: 3000.0
# Default energy calibration
calibration:
# time-of-flight distance (in m)
d: 1.058206295066418
# time offset (in ns)
t0: 7.684410678887588e-07
# energy offset (in eV)
E0: -30.440035779171833
# energy scale of calibration
energy_scale: "kinetic"
momentum:
# binning axes to use for momentum correction/calibration.
# Axes names starting with "@" refer to keys in the "dataframe" section
axes: ["@x_column", "@y_column", "@tof_column"]
# Bin numbers used for the respective axes
bins: [512, 512, 300]
# bin ranges to use (in unbinned detector coordinates)
ranges: [[-256, 1792], [-256, 1792], [132000, 136000]]
# The x/y pixel ranges of the detector
detector_ranges: [[0, 2048], [0, 2048]]
# The center pixel of the detector in the binned x/y coordinates
center_pixel: [256, 256]
# Sigma parameter for feature selection (intensity above background)
sigma: 5
# FWHM parameter for feature selection (width of features to extract)
fwhm: 8
# Sigma_radius parameter for feature selection (variation of radius size)
sigma_radius: 1
# default momentum calibration
calibration:
# x momentum scaling factor
kx_scale: 0.010729535670610963
# y momentum scaling factor
ky_scale: 0.010729535670610963
# x BZ center pixel
x_center: 256.0
# y BZ center pixel
y_center: 256.0
# x start value of the calibration dataset
rstart: -256.
# y start value of the calibration dataset
cstart: -256.
# x direction pixel stepping of the calibration dataset
rstep: 4.0
# y direction pixel stepping of the calibration dataset
cstep: 4.0
correction:
# default feature points used for calculating the distortion correction.
feature_points: [[203.2, 341.96], [299.16, 345.32], [350.25, 243.70], [304.38, 149.88], [199.52, 152.48], [154.28, 242.27], [248.29, 248.62]]
# rotational symmetry of the structure used for correction. Should be an even number
rotation_symmetry: 6
# Option whether the center of the structure is included in the feature points.
include_center: True
# Option whether the center should be included in the correction algorithm
use_center: True
delay:
# value ranges of the analog-to-digital converter axes used for encoding the delay stage position
# (in unbinned coordinates)
adc_range: [1900, 25600]
# hdf5 attribute containing the starting point of the delay stage
p1_key: "@trARPES:DelayStage:p1"
# hdf5 attribute containing the end point of the delay stage
p2_key: "@trARPES:DelayStage:p2"
# hdf5 attribute containing the t0 value of the delay stage
t0_key: "@trARPES:DelayStage:t0"
binning:
# Histogram computation mode to use.
hist_mode: "numba"
# Mode for histogram recombination to use
mode: "fast"
# Whether to display a progress bar
pbar: True
# Number of parallel binning threads to use
num_cores: 20
# Number of multithreading threads per worker thread
threads_per_worker: 4
# API for numpy multithreading
threadpool_API: "blas"
histogram:
# number of bins used for histogram visualization
bins: [80, 80, 80, 80]
# default axes to use for histogram visualization.
# Axes names starting with "@" refer to keys in the "dataframe" section
axes: ["@x_column", "@y_column", "@tof_column", "@adc_column"]
# default ranges to use for histogram visualization (in unbinned detector coordinates)
ranges: [[0, 1800], [0, 1800], [128000, 138000], [0, 32000]]
metadata:
# URL of the epics archiver request engine
archiver_url: "http://aa0.fhi-berlin.mpg.de:17668/retrieval/data/getData.json?pv="
# EPICS channels to collect from EPICS archiver
epics_pvs: ["KTOF:Lens:Extr:I", "trARPES:Carving:TEMP_RBV", "trARPES:XGS600:PressureAC:P_RD", "KTOF:Lens:UDLD:V", "KTOF:Lens:Sample:V", "KTOF:Apertures:m1.RBV", "KTOF:Apertures:m2.RBV", "KTOF:Apertures:m3.RBV", "trARPES:Carving:TRX.RBV", "trARPES:Carving:TRY.RBV", "trARPES:Carving:TRZ.RBV", "trARPES:Carving:THT.RBV", "trARPES:Carving:PHI.RBV", "trARPES:Carving:OMG.RBV"]
# hdf5 attribute containing the field aperture "in" motor position
fa_in_channel: 'KTOF:Apertures:m1.RBV'
# hdf5 attribute containing the field aperture "hor" motor position
fa_hor_channel: 'KTOF:Apertures:m2.RBV'
# hdf5 attribute containing the contrast aperture "in" motor position
ca_in_channel: 'KTOF:Apertures:m3.RBV'
# dictionary containing contrast and field aperture motor positions and sizes
aperture_config:
"2018-01-23T19:35:15":
fa_size:
'750': [[-3.0, -1.4], [-5.4, -4.6]]
grid: [[-3.0, -1.4], [0.15, 1.75]]
'1500': [[-3.0, -1.4], [6.25, 7.75]]
'200': [[3.3, 4.4], [-5.4, -4.6]]
'500': [[3.3, 4.4], [0.15, 1.75]]
'1000': [[3.3, 4.4], [6.25, 7.75]]
'20': [[9.6, 10.1], [-5.4, -4.6]]
'50': [[9.6, 10.1], [0.15, 1.75]]
'100': [[9.6, 10.1], [6.25, 7.75]]
open: [[-15, -9.0], [-15, -8.9]]
ca_size:
'50': [8.0, 8.4]
'200': [-0.5, -0.9]
'100': [3.4, 3.8]
grid: [-5.3, -5.9]
open: [-12.0, -8]
"2020-01-23T19:35:15":
fa_size:
'750': [[-6.2, -4.8], [5.0, 6.0]]
grid: [[-6.2, -4.8], [-0.7, -0.3]]
'500': [[-6.2, -4.8], [-7.0, -6.0]]
'200': [[0.5, 0.9], [-0.7, -0.3]]
'100': [[0.5, 0.9], [-7.0, -6.0]]
'300': [[0.5, 0.9], [5.0, 6.0]]
'10': [[6.5, 6.9], [-7.0, -6.0]]
'20': [[6.5, 6.9], [-0.7, -0.3]]
'50': [[6.5, 6.9], [5.0, 6.0]]
open: [[-15, -8.5], [-15, -8.9]]
ca_size:
'50': [9.0, 11.0]
'300': [-0.1, 0.1]
'200': [0.7, 1.5]
'100': [5.1, 5.9]
grid: [-5.5, -5.2]
open: [-15, -8.5]
# dictionary containing lens mode configurations
lens_mode_config:
"6kV_kmodem4.0_20VTOF_v3.sav":
Extr: 6000.0
UCA: 1200
UFA: 600.0
Z1: 2452.9
Z2: 1489.9
A: 420.07
B: 2494.8
C: 489.2
D: 228.05
E: 113.82
F: 54.232
G: 20.0
H: 25.5
I: 36.0
TOF: 20.0
MCPfront: 20.0
"6kV_kmodem4.0_30VTOF_453ns_focus.sav":
Extr: 6000.0
UCA: 1200
UFA: 600.0
Z1: 2452.9
Z2: 1489.9
A: 403.07
B: 2500
C: 422.25
D: 208.88
E: 199.49
F: 68.735
G: 30.0
H: 30.0
I: 44.5
TOF: 30.0
MCPfront: 30.0
nexus:
# pynxtools reader to use for saving NXmpes files
reader: "mpes"
# NeXus application definition to use for saving
definition: "NXmpes"
# List containing additional input files to be handed to the pynxtools converter tool,
# e.g. containing a configuration file, and additional metadata.
input_files: ["../sed/config/NXmpes_config.json"]
Example configuration file for flash (HEXTOF momentum microscope at FLASH, Desy)#
# This file contains the default configuration for the flash loader.
core:
# defines the loader
loader: flash
# the beamline where experiment took place
beamline: pg2
# the ID number of the beamtime
beamtime_id: 11019101
# the year of the beamtime
year: 2023
# The paths to the raw and parquet data directories. If these are not
# provided, the loader will try to find the data based on year beamtimeID etc
paths:
# location of the raw data.
data_raw_dir: ""
# location of the intermediate parquet files.
data_parquet_dir: ""
binning:
# Since this will run on maxwell most probably, we have a lot of cores at our disposal
num_cores: 100
dataframe:
# The name of the DAQ system to use. Necessary to resolve the filenames/paths.
daq: fl1user3
# The offset correction to the pulseId
ubid_offset: 5
# the number of iterations to fill the pulseId forward.
forward_fill_iterations: 2
# if true, removes the 3 bits reserved for dldSectorID from the dldTimeSteps column
split_sector_id_from_dld_time: True
# bits reserved for dldSectorID in the dldTimeSteps column
sector_id_reserved_bits: 3
# dataframe column containing x coordinates
x_column: dldPosX
# dataframe column containing corrected x coordinates
corrected_x_column: "X"
# dataframe column containing kx coordinates
kx_column: "kx"
# dataframe column containing y coordinates
y_column: dldPosY
# dataframe column containing corrected y coordinates
corrected_y_column: "Y"
# dataframe column containing kx coordinates
ky_column: "ky"
# dataframe column containing time-of-flight data
tof_column: dldTimeSteps
# dataframe column containing time-of-flight data in ns
tof_ns_column: dldTime
# dataframe column containing corrected time-of-flight data
corrected_tof_column: "tm"
# time length of a base time-of-flight bin in seconds
tof_binwidth: 2.0576131995767355E-11
# binning parameter for time-of-flight data. 2**tof_binning bins per base bin
tof_binning: 3 # power of 2, 3 means 8 bins per step
# dataframe column containing sector ID. obtained from dldTimeSteps column
sector_id_column: dldSectorID
sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.]
# the delay stage column
delay_column: delayStage
# the corrected pump-probe time axis
corrected_delay_column: pumpProbeTime
jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"]
units:
# These are the units of the columns
dldPosX: 'step'
dldPosY: 'step'
dldTimeSteps: 'step'
tof_voltage: 'V'
extractorVoltage: 'V'
extractorCurrent: 'A'
cryoTemperature: 'K'
sampleTemperature: 'K'
dldTime: 'ns'
delay: 'ps'
delayStage: 'ps'
timeStamp: 's'
energy: 'eV'
E: 'eV'
kx: '1/A'
ky: '1/A'
# The channels to load.
# channels have the following structure:
# channelAlias:
# format: per_pulse/per_electron/per_train
# group_name: the hdf5 group path
# slice: if the group contains multidimensional data, where to slice
channels:
# The timestamp
timeStamp:
format: per_train
group_name: "/uncategorised/FLASH.DIAG/TIMINGINFO/TIME1.BUNCH_FIRST_INDEX.1/"
# pulse ID is a necessary channel for using the loader.
pulseId:
format: per_electron
group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/"
slice: 2
# detector x position
dldPosX:
format: per_electron
group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/"
slice: 1
# detector y position
dldPosY:
format: per_electron
group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/"
slice: 0
# Detector time-of-flight channel
# if split_sector_id_from_dld_time is set to True, This this will generate
# also the dldSectorID channel
dldTimeSteps:
format: per_electron
group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/"
slice: 3
# The auxiliary channel has a special structure where the group further contains
# a multidimensional structure so further aliases are defined below
dldAux:
format: per_pulse
group_name: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/"
slice: 4
dldAuxChannels:
sampleBias: 0
tofVoltage: 1
extractorVoltage: 2
extractorCurrent: 3
cryoTemperature: 4
sampleTemperature: 5
dldTimeBinSize: 15
# ADC containing the pulser sign (1: value approx. 35000, 0: 33000)
pulserSignAdc:
format: per_pulse
group_name: "/FL1/Experiment/PG/SIS8300 100MHz ADC/CH6/TD/"
# the energy of the monochromatized beam. This is a quasi-static value.
# there is a better channel which still needs implementation.
monochromatorPhotonEnergy:
format: per_train
group_name: "/FL1/Beamlines/PG/Monochromator/monochromator photon energy/"
# The GMDs can not be read yet...
gmdBda:
format: per_train
group_name: "/FL1/Photon Diagnostic/GMD/Average energy/energy BDA/"
# Beam Arrival Monitor, vital for pump-probe experiments as it can compensate sase
# timing fluctuations.
# Here we use the DBC2 BAM as the "normal" one is broken.
bam:
format: per_pulse
group_name: "/uncategorised/FLASH.SDIAG/BAM.DAQ/FL0.DBC2.ARRIVAL_TIME.ABSOLUTE.SA1.COMP/"
# The delay Stage position, encoding the pump-probe delay
delayStage:
format: per_train
group_name: "/zraw/FLASH.SYNC/LASER.LOCK.EXP/F1.PG.OSC/FMC0.MD22.1.ENCODER_POSITION.RD/dGroup/"
# The prefixes of the stream names for different DAQ systems for parsing filenames
# (Not to be changed by user)
stream_name_prefixes:
pbd: "GMD_DATA_gmd_data"
pbd2: "FL2PhotDiag_pbd2_gmd_data"
fl1user1: "FLASH1_USER1_stream_2"
fl1user2: "FLASH1_USER2_stream_2"
fl1user3: "FLASH1_USER3_stream_2"
fl2user1: "FLASH2_USER1_stream_2"
fl2user2: "FLASH2_USER2_stream_2"
# The beamtime directories for different DAQ systems.
# (Not to be changed by user)
beamtime_dir:
pg2: "/asap3/flash/gpfs/pg2/"
# metadata collection from scicat
# metadata:
# scicat_url: <URL>
# scicat_token: <TOKEN>
# The nexus collection routine shall be finalized soon for both instruments
nexus:
reader: "mpes"
definition: "NXmpes"
input_files: ["../sed/config/NXmpes_config-HEXTOF.json"]