Source code for toolbox_scs.misc.bunch_pattern

# -*- coding: utf-8 -*-
""" Toolbox for SCS.

    Various utilities function to quickly process data
    measured at the SCS instruments.

    Copyright (2019) SCS Team.
"""

import os
import logging

import numpy as np
import xarray as xr
from extra_data.read_machinery import find_proposal
from extra_data import RunDirectory

# import and hide variable, such that it does not alter namespace.
from ..constants import mnemonics as _mnemonics_bp
from ..mnemonics_machinery import mnemonics_for_run
from .bunch_pattern_external import is_pulse_at

__all__ = [
    'extractBunchPattern',
    'get_sase_pId',
    'npulses_has_changed',
    'pulsePatternInfo',
    'repRate',
]

log = logging.getLogger(__name__)


[docs]def npulses_has_changed(run, loc='sase3', run_mnemonics=None):
    """
    Checks if the number of pulses has changed during the run for
    a specific location `loc` (='sase1', 'sase3', 'scs_ppl' or 'laser')
    If the source is not found in the run, returns True.

    Parameters
    ----------
    run: extra_data.DataCollection
        DataCollection containing the data.
    loc: str
        The location where to check: {'sase1', 'sase3', 'scs_ppl'}
    run_mnemonics: dict
        the mnemonics for the run (see `menonics_for_run`)

    Returns
    -------
    ret: bool
        True if the number of pulses has changed or the source was not
        found, False if the number of pulses did not change.
    """
    sase_list = ['sase1', 'sase3', 'laser', 'scs_ppl']
    if loc not in sase_list:
        raise ValueError(f"Unknow sase location '{loc}'. Expected one in"
                         f"{sase_list}")
    if run_mnemonics is None:
        run_mnemonics = mnemonics_for_run(run)
    if loc == 'scs_ppl':
        loc = 'laser'
    if loc not in run_mnemonics:
        return True
    if run_mnemonics[loc]['key'] not in run[run_mnemonics[loc]['source']].keys():
        log.info(f'Mnemonic {loc} not found in run.')
        return True
    npulses = run.get_array(*run_mnemonics['npulses_'+loc].values())
    if len(np.unique(npulses)) == 1:
        return False
    return True


def get_unique_sase_pId(run, loc='sase3', run_mnemonics=None):
    """
    Assuming that the number of pulses did not change during the run,
    returns the pulse Ids as the run value of the sase mnemonic.

    Parameters
    ----------
    run: extra_data.DataCollection
        DataCollection containing the data.
    loc: str
        The location where to check: {'sase1', 'sase3', 'scs_ppl'}
    run_mnemonics: dict
        the mnemonics for the run (see `menonics_for_run`)

    Returns
    -------
    pulseIds: np.array
        the pulse ids at the specified location. Returns None if the
        mnemonic is not in the run.
    """
    if run_mnemonics is None:
        run_mnemonics = mnemonics_for_run(run)
    if loc == 'scs_ppl':
        loc = 'laser'
    if loc not in run_mnemonics:
        # bunch pattern not recorded
        return None
    npulses = run.get_run_value(run_mnemonics['npulses_'+loc]['source'],
                                run_mnemonics['npulses_'+loc]['key'])
    pulseIds = run.get_run_value(run_mnemonics[loc]['source'],
                                 run_mnemonics[loc]['key'])[:npulses]
    return pulseIds


[docs]def get_sase_pId(run, loc='sase3', run_mnemonics=None,
                 bpt=None, merge_with=None):
    """
    Returns the pulse Ids of the specified `loc` during a run.
    If the number of pulses has changed during the run, it loads the
    bunch pattern table and extract all pulse Ids used.
    
    Parameters
    ----------
    run: extra_data.DataCollection
        DataCollection containing the data.
    loc: str
        The location where to check: {'sase1', 'sase3', 'scs_ppl'}
    run_mnemonics: dict
        the mnemonics for the run (see `menonics_for_run`)
    bpt: 2D-array
        The bunch pattern table. Used only if the number of pulses
        has changed. If None, it is loaded on the fly.
    merge_with: xarray.Dataset
        dataset that may contain the bunch pattern table to use in
        case the number of pulses has changed. If merge_with does
        not contain the bunch pattern table, it is loaded and added
        as a variable 'bunchPatternTable' to merge_with.

    Returns
    -------
    pulseIds: np.array
        the pulse ids at the specified location. Returns None if the
        mnemonic is not in the run.
    """
    if npulses_has_changed(run, loc, run_mnemonics) is False:
        return get_unique_sase_pId(run, loc, run_mnemonics)
    if bpt is None:
        bpt = load_bpt(run, merge_with, run_mnemonics)
    if bpt is not None:
        mask = is_pulse_at(bpt, loc)
        return np.unique(np.nonzero(mask.values)[1])
    return None


def load_bpt(run, merge_with=None, run_mnemonics=None):
    """
    Load the bunch pattern table. It returns the one contained in
    merge_with if possible. Or, it adds it to merge_with once it is
    loaded.
    
    Parameters
    ----------
    run: extra_data.DataCollection
        DataCollection containing the data.
    merge_with: xarray.Dataset
        dataset that may contain the bunch pattern table or to which
        add the bunch pattern table once loaded.
    run_mnemonics: dict
        the mnemonics for the run (see `menonics_for_run`)

    Returns
    -------
    bpt: xarray.Dataset
        the bunch pattern table as specified by the mnemonics
        'bunchPatternTable'
    """
    if run_mnemonics is None:
        run_mnemonics = mnemonics_for_run(run)
    for key in ['bunchPatternTable', 'bunchPatternTable_SA3']:
        if merge_with is not None and key in merge_with:
            log.debug(f'Using {key} from merge_with dataset.')
            return merge_with[key]
        if key in run_mnemonics:
            bpt = run.get_array(*run_mnemonics[key].values(),
                                name='bunchPatternTable')
            log.debug(f'Loaded {key} from DataCollection.')
            if merge_with is not None:
                merge_with.update(merge_with.merge(bpt, join='inner'))
            return bpt
    log.debug('Could not find bunch pattern table.')
    return None


[docs]def extractBunchPattern(bp_table=None, key='sase3', runDir=None):
    ''' generate the bunch pattern and number of pulses of a source directly from the
        bunch pattern table and not using the MDL device BUNCH_DECODER. This is 
        inspired by the euxfel_bunch_pattern package, 
        https://git.xfel.eu/gitlab/karaboDevices/euxfel_bunch_pattern
        Inputs:
            bp_table: DataArray corresponding to the mnemonics "bunchPatternTable".
                      If None, the bunch pattern table is loaded using runDir.
            key: str, ['sase1', 'sase2', 'sase3', 'scs_ppl']
            runDir: extra-data DataCollection. Required only if bp_table is None.
            
        Outputs:
            bunchPattern: DataArray containing indices of the sase/laser pulses for 
            each train
            npulses: DataArray containing the number of pulses for each train
            matched: 2-D DataArray mask (trainId x 2700), True where 'key' has pulses 
                  
    '''
    keys=['sase1', 'sase2', 'sase3', 'scs_ppl']
    if key not in keys:
        raise ValueError(f'Invalid key "{key}", possible values are {keys}')
    if bp_table is None:
        if runDir is None:
            raise ValueError('bp_table and runDir cannot both be None')
        bp_mnemo = _mnemonics_bp['bunchPatternTable']
        if bp_mnemo['source'] not in runDir.all_sources:
            raise ValueError('Source {} not found in run'.format(
                                bp_mnemo['source']))
        else:
            bp_table = runDir.get_array(bp_mnemo['source'],bp_mnemo['key'], 
                                        extra_dims=bp_mnemo['dim'])
    # define relevant masks, see euxfel_bunch_pattern package for details
    DESTINATION_MASK = 0xf << 18
    DESTINATION_T4D = 4 << 18   # SASE1/3 dump
    DESTINATION_T5D = 2 << 18  # SASE2 dump
    PHOTON_LINE_DEFLECTION = 1 << 27  # Soft kick (e.g. SA3)
    LASER_SEED6 = 1 << 13
    if 'sase' in key:
        sase = int(key[4])
        destination = DESTINATION_T5D if (sase == 2) else DESTINATION_T4D
        matched = (bp_table & DESTINATION_MASK) == destination
        if sase == 1:
            # Pulses to SASE 1 when soft kick is off
            matched &= (bp_table & PHOTON_LINE_DEFLECTION) == 0
        elif sase == 3:
            # Pulses to SASE 3 when soft kick is on
            matched &= (bp_table & PHOTON_LINE_DEFLECTION) != 0
    elif key=='scs_ppl':
        matched = (bp_table & LASER_SEED6) != 0
    
    # create table of indices where bunch pattern and mask match
    nz = np.nonzero(matched.values)
    dim_pId = matched.shape[1]
    bunchPattern = np.ones(matched.shape, dtype=np.uint64)*dim_pId
    bunchPattern[nz] = nz[1]
    bunchPattern = np.sort(bunchPattern)
    npulses = np.count_nonzero(bunchPattern<dim_pId, axis=1)
    bunchPattern[bunchPattern == dim_pId] = 0

    bunchPattern = xr.DataArray(bunchPattern[:,:1000], dims=['trainId', 'bunchId'],
                          coords={'trainId':matched.trainId}, 
                          name=key)
    npulses = xr.DataArray(npulses, dims=['trainId'],
                                coords={'trainId':matched.trainId}, 
                                name=f'npulses_{key}')
    return bunchPattern, npulses, matched


[docs]def pulsePatternInfo(data, plot=False):
    ''' display general information on the pulse patterns operated by SASE1 and SASE3.
        This is useful to track changes of number of pulses or mode of operation of
        SASE1 and SASE3. It also determines which SASE comes first in the train and
        the minimum separation between the two SASE sub-trains.
        
        Inputs:
            data: xarray Dataset containing pulse pattern info from the bunch decoder MDL: 
            {'sase1, sase3', 'npulses_sase1', 'npulses_sase3'}
            plot: bool enabling/disabling the plotting of the pulse patterns
            
        Outputs:
            print of pulse pattern info. If plot==True, plot of the pulse pattern.
    '''
    #Which SASE comes first?
    npulses_sa3 = data['npulses_sase3']       
    npulses_sa1 = data['npulses_sase1']  
    dedicated = False
    if np.all(npulses_sa1.where(npulses_sa3 !=0, drop=True) == 0):
        dedicated = True
        print('No SASE 1 pulses during SASE 3 operation')
    if np.all(npulses_sa3.where(npulses_sa1 !=0, drop=True) == 0):
        dedicated = True
        print('No SASE 3 pulses during SASE 1 operation')
    if dedicated==False:
        pulseIdmin_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).min().values
        pulseIdmax_sa1 = data['sase1'].where(npulses_sa1 != 0).where(data['sase1']>1).max().values
        pulseIdmin_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).min().values
        pulseIdmax_sa3 = data['sase3'].where(npulses_sa3 != 0).where(data['sase3']>1).max().values
        #print(pulseIdmin_sa1, pulseIdmax_sa1, pulseIdmin_sa3, pulseIdmax_sa3)
        if pulseIdmin_sa1 > pulseIdmax_sa3:
            t = 0.220*(pulseIdmin_sa1 - pulseIdmax_sa3 + 1)
            print('SASE 3 pulses come before SASE 1 pulses (minimum separation %.1f µs)'%t)
        elif pulseIdmin_sa3 > pulseIdmax_sa1:
            t = 0.220*(pulseIdmin_sa3 - pulseIdmax_sa1 + 1)
            print('SASE 1 pulses come before SASE 3 pulses (minimum separation %.1f µs)'%t)
        else:
            print('Interleaved mode')
    
    #What is the pulse pattern of each SASE?
    for key in['sase3', 'sase1']:
        print('\n*** %s pulse pattern: ***'%key.upper())
        npulses = data['npulses_%s'%key]
        sase = data[key]
        if not np.all(npulses == npulses[0]):
            print('Warning: number of pulses per train changed during the run!')
        #take the derivative along the trainId to track changes in pulse number:
        diff = npulses.diff(dim='trainId')
        #only keep trainIds where a change occured:
        diff = diff.where(diff !=0, drop=True)
        #get a list of indices where a change occured:
        idx_change = np.argwhere(np.isin(npulses.trainId.values,
                                         diff.trainId.values, assume_unique=True))[:,0]
        #add index 0 to get the initial pulse number per train:
        idx_change = np.insert(idx_change, 0, 0)
        print('npulses\tindex From\tindex To\ttrainId From\ttrainId To\trep. rate [kHz]')
        for i,idx in enumerate(idx_change):
            n = npulses[idx]
            idxFrom = idx
            trainIdFrom = npulses.trainId[idx]
            if i < len(idx_change)-1:
                idxTo = idx_change[i+1]-1
            else:
                idxTo = npulses.shape[0]-1
            trainIdTo = npulses.trainId[idxTo]
            if n <= 1:
                print('%i\t%i\t\t%i\t\t%i\t%i'%(n, idxFrom, idxTo, trainIdFrom, trainIdTo))
            else:
                f = 1/((sase[idxFrom,1] - sase[idxFrom,0])*222e-6)
                print('%i\t%i\t\t%i\t\t%i\t%i\t%.0f'%(n, idxFrom, idxTo, trainIdFrom, trainIdTo, f))
    print('\n')
    if plot:
        plt.figure(figsize=(6,3))
        plt.plot(data['npulses_sase3'].trainId, data['npulses_sase3'], 'o-', 
                 ms=3, label='SASE 3')
        plt.xlabel('trainId')
        plt.ylabel('pulses per train')
        plt.plot(data['npulses_sase1'].trainId, data['npulses_sase1'], '^-',
                 ms=3, color='C2', label='SASE 1')
        plt.legend()
        

[docs]def repRate(data=None, runNB=None, proposalNB=None, key='sase3'):
    ''' Calculates the pulse repetition rate (in kHz) in sase
        according to the bunch pattern and assuming a grid of
        4.5 MHz.
        
        Inputs:
        -------
            data: xarray Dataset containing pulse pattern, needed if runNB is none
            runNB: int or str, run number. Needed if data is None
            proposal: int or str, proposal where to find the run. Needed if data is None
            key: str in [sase1, sase2, sase3, scs_ppl], source for which the
                 repetition rate is calculated
        Output:
        -------
            f: repetition rate in kHz
    '''
    if runNB is None and data is None:
        raise ValueError('Please provide either the runNB + proposal or the data argument.')
    if runNB is not None and proposalNB is None:
        raise ValueError('Proposal is missing.')
    if runNB is not None:
        if isinstance(runNB, int):
            runNB = 'r{:04d}'.format(runNB)
        if isinstance(proposalNB,int):
            proposalNB = 'p{:06d}'.format(proposalNB)
        runFolder = os.path.join(find_proposal(proposalNB), 'raw', runNB)
        runDir = RunDirectory(runFolder)
        bp_mnemo = _mnemonics_bp['bunchPatternTable']
        if bp_mnemo['source'] not in runDir.all_sources:
            raise ValueError('Source {} not found in run'.format(
                                bp_mnemo['source']))
        else:
            bp_table = runDir.get_array(bp_mnemo['source'],bp_mnemo['key'], 
                                        extra_dims=bp_mnemo['dim'])
        a, b, mask = extractBunchPattern(bp_table, key=key)
    else:
        if key not in ['sase1', 'sase3']:
            a, b, mask = extractBunchPattern(key=key, runDir=data.attrs['run'])
        else:
            a = data[key]
            b = data[f'npulses_{key}']
    a = a.where(b > 1, drop = True).values
    if len(a)==0:
        print('Not enough pulses to extract repetition rate')
        return 0
    f = 1/((a[0,1] - a[0,0])*12e-3/54.1666667)
    return f