Source code for pylorenzmie.analysis.MLPEstimator

'''Fast parameter estimator using a pre-trained radial-profile MLP.'''

from dataclasses import dataclass, field
from importlib.resources import files
from pathlib import Path

import numpy as np
import pandas as pd
import joblib

from pylorenzmie.lib import Azimuthal
from pylorenzmie.lib.lmtypes import Properties, Result
from pylorenzmie.analysis.BaseEstimator import BaseEstimator
from pylorenzmie.analysis.Hologram import Hologram
from pylorenzmie.theory import LorenzMie


def _default_weights() -> Path:
    return Path(str(files('pylorenzmie.analysis').joinpath('mlp_estimator.joblib')))


def _log_targets(y: np.ndarray) -> np.ndarray:
    '''Log-scale z_p (col 0) and a_p (col 1); n_p (col 2) unchanged.

    Defined here (not in the training script) so that joblib can locate
    this function when deserializing the pipeline from mlp_estimator.joblib.
    '''
    out = y.astype(float, copy=True)
    out[:, 0] = np.log(out[:, 0])
    out[:, 1] = np.log(out[:, 1])
    return out


def _exp_targets(y: np.ndarray) -> np.ndarray:
    out = y.astype(float, copy=True)
    out[:, 0] = np.exp(out[:, 0])
    out[:, 1] = np.exp(out[:, 1])
    return out



[docs]
@dataclass
class MLPEstimator(BaseEstimator):
    '''Fast particle parameter estimator using a pre-trained MLP.

    Computes the azimuthal average of the hologram, pads or truncates it
    to a fixed length, and runs a single forward pass through a scikit-learn
    :class:`MLPRegressor` to predict *z_p*, *a_p*, and *n_p* in one shot.
    Inference takes < 1 ms regardless of image size.

    The MLP was trained on synthetic radial profiles generated by the
    1-D radial trick: ``LorenzMie`` evaluated at ``(r, 0)`` for
    ``r = 0 .. n_features-1``, which equals the azimuthal average for a
    rotationally symmetric scatterer. Parameters are drawn log-uniformly
    over *z_p* ∈ [20, 500] px, *a_p* ∈ [0.2, 4] μm and uniformly over
    *n_p* ∈ [1.3, 2.5].  Each profile is randomly truncated during
    training so the network handles both large and small crops gracefully.
    Pre-trained weights cover the default instrument (447 nm laser,
    0.048 μm/px, water). Retrain with ``devel/train_mlp_estimator.py``
    for other configurations.

    Inherits from :class:`BaseEstimator`.

    Parameters
    ----------
    model : LorenzMie
        Generative scattering model shared with :class:`Optimizer`.
        Particle parameters are updated in-place on each :meth:`estimate`
        call; instrument parameters are read but not modified.
    weights : Path or str, optional
        Path to a ``joblib``-serialized scikit-learn ``Pipeline``
        (``StandardScaler`` → ``TransformedTargetRegressor(MLPRegressor)``).
        Default: the pre-trained weights bundled with the package.
    n_features : int, optional
        Fixed input length passed to the MLP. Must match the value used
        during training. Default: 100.

    Notes
    -----
    ``x_p`` and ``y_p`` are pinned to the pixel-coordinate means of the
    hologram (same convention as :class:`DEEstimator` and
    :class:`RadialEstimator`).

    The profile from :func:`~pylorenzmie.lib.Azimuthal.avg` is truncated
    to the first *n_features* values. Positions beyond the profile end
    are padded with ``0.0`` — a sentinel that is clearly outside the
    normalised hologram range (which is positive and O(1)).  The training
    data uses the same sentinel after random truncation, so the network
    has seen this pattern and uses the zero tail to infer crop size.

    Predicted values are clipped to the training bounds before being
    written to the model; the :class:`Optimizer` will refine them further.
    '''

    model: LorenzMie
    weights: Path = field(default_factory=_default_weights)
    n_features: int = 100

    def __post_init__(self) -> None:
        self._pipeline = joblib.load(self.weights)

    @BaseEstimator.properties.getter
    def properties(self) -> Properties:
        '''MLPEstimator configuration.'''
        return dict(weights=str(self.weights), n_features=self.n_features)


[docs]
    def estimate(self, hologram: Hologram) -> Result:
        '''Estimate particle parameters from the azimuthal radial profile.

        Parameters
        ----------
        hologram : Hologram
            Normalised hologram crop to analyse.

        Returns
        -------
        result : pandas.Series
            Estimated particle properties (same keys as
            :attr:`~pylorenzmie.theory.Particle.properties`).
        '''
        x_p = float(hologram.coordinates[0].mean())
        y_p = float(hologram.coordinates[1].mean())

        cx, cy = hologram.corner
        profile = Azimuthal.avg(hologram.data,
                                center=(x_p - cx, y_p - cy))

        features = np.zeros(self.n_features)   # 0 = sentinel for "no data"
        n = min(len(profile), self.n_features)
        features[:n] = profile[:n]

        z_p, a_p, n_p = self._pipeline.predict(
            features.reshape(1, -1))[0]

        self.model.particle.x_p = x_p
        self.model.particle.y_p = y_p
        self.model.particle.z_p = float(np.clip(z_p,   10.,   600.))
        self.model.particle.a_p = float(np.clip(a_p,   0.1,    10.))
        self.model.particle.n_p = float(np.clip(n_p,   1.0,     3.0))

        return pd.Series(self.model.particle.properties)



[docs]
    @classmethod
    def example(cls) -> None:  # pragma: no cover
        from time import perf_counter
        from pylorenzmie.utilities import example_hologram

        model = LorenzMie()
        model.instrument.wavelength    = 0.447
        model.instrument.magnification = 0.048
        model.instrument.n_m           = 1.34

        estimator = cls(model=model)

        print(f'{cls.__name__} example')
        start = perf_counter()
        result = estimator.estimate(example_hologram())
        print(f'Time: {perf_counter() - start:.3f} s')
        print(result)




if __name__ == '__main__':  # pragma: no cover
    MLPEstimator.example()