project-ida
diff --git a/‎libs/surface_viewer/__init__.py‎
Lines changed: 56 additions & 0 deletions b/‎libs/surface_viewer/__init__.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎libs/surface_viewer/calibration.py‎
Lines changed: 78 additions & 0 deletions b/‎libs/surface_viewer/calibration.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎libs/surface_viewer/io.py‎
Lines changed: 193 additions & 0 deletions b/‎libs/surface_viewer/io.py‎
Lines changed: 193 additions & 0 deletions
@@ -0,0 +1,56 @@
+from .io import (
+    get_roi_name_from_api_url,
+    load_roi_api,
+    infer_dataset_base_from_api,
+    add_json_urls,
+    fetch_json_items,
+    build_spectrum_index,
+    attach_spectra,
+    get_selection_grid_url,
+    load_all_cells_from_selection_grid,
+)
+
+from .spectra import (
+    stack_spectra,
+    stack_spectra_trim,
+    band_sum,
+    summarize_band_values,
+    resolve_band_to_channels,
+    band_label_text,
+    print_cli_suggestions,
+)
+
+from .calibration import (
+    load_config_txt,
+    get_energy_cal_from_dataset,
+    make_energy_axis,
+    make_energy_axis_from_length,
+    channel_to_keV,
+    keV_to_channel,
+    maybe_get_calibration,
+)
+
+from .peaks import (
+    baseline_als,
+    preprocess,
+    estimate_noise,
+    detect_peaks,
+    line_library,
+    identify_elements,
+)
+
+from .plotting import (
+    get_plot_axis,
+    get_band_span,
+    add_energy_top_axis,
+    plot_cumulative,
+    plot_overlay,
+    plot_with_peaks,
+    plot_identified_elements_confident,
+)
+
+from .overlays import (
+    get_api_auth,
+    create_overlay,
+    delete_overlay,
+)
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import re
+from urllib.parse import urljoin
+
+import numpy as np
+import requests
+
+from .io import infer_dataset_base_from_api
+
+
+def load_config_txt(dataset_base: str) -> dict:
+    """Parse ``config.txt`` from a dataset folder in the same way as the viewer."""
+    url = urljoin(dataset_base, "config.txt")
+    r = requests.get(url, timeout=30)
+    if not r.ok:
+        return {}
+
+    cfg = {}
+    for line in r.text.splitlines():
+        s = re.sub(r"#.*$", "", line).strip()
+        if not s or "=" not in s:
+            continue
+        k, v = s.split("=", 1)
+        cfg[k.strip().lower()] = v.strip()
+    return cfg
+
+
+def get_energy_cal_from_dataset(
+    api_url: str,
+    default_eV_per_ch=20.000347,
+    default_start_eV=-192.768,
+):
+    dataset_base = infer_dataset_base_from_api(api_url)
+    cfg = load_config_txt(dataset_base)
+
+    eV_per_ch = float(cfg.get("eds_ev_per_ch", default_eV_per_ch))
+    start_eV = float(cfg.get("eds_start_ev", default_start_eV))
+
+    n_channels = cfg.get("eds_n_channels", None)
+    n_channels = int(n_channels) if n_channels and str(n_channels).isdigit() else None
+
+    return {
+        "dataset_base": dataset_base,
+        "eV_per_ch": eV_per_ch,
+        "start_eV": start_eV,
+        "n_channels": n_channels,
+        "raw_cfg": cfg,
+    }
+
+
+def make_energy_axis(cum, cal: dict):
+    n = len(cum) if cal.get("n_channels") is None else min(len(cum), cal["n_channels"])
+    return (cal["start_eV"] + np.arange(n) * cal["eV_per_ch"]) / 1000.0
+
+
+def make_energy_axis_from_length(n, cal: dict):
+    return (cal["start_eV"] + np.arange(n) * cal["eV_per_ch"]) / 1000.0
+
+
+def channel_to_keV(ch, cal: dict):
+    return (cal["start_eV"] + ch * cal["eV_per_ch"]) / 1000.0
+
+
+def maybe_get_calibration(roi_api_urls, need_calibration=False, allow_defaults=True):
+    if not need_calibration:
+        return None
+    first_url = roi_api_urls[0]
+    cal = get_energy_cal_from_dataset(first_url)
+    raw_cfg = cal.get("raw_cfg", {}) or {}
+    cal["from_config"] = ("eds_ev_per_ch" in raw_cfg and "eds_start_ev" in raw_cfg)
+    if not cal["from_config"] and not allow_defaults:
+        return None
+    return cal
+
+
+def keV_to_channel(keV, cal: dict):
+    return int(round((float(keV) * 1000.0 - cal["start_eV"]) / cal["eV_per_ch"]))
@@ -0,0 +1,193 @@
+from __future__ import annotations
+
+from urllib.parse import parse_qs, quote, urljoin, urlparse
+
+import pandas as pd
+import requests
+from tqdm.auto import tqdm
+
+
+def get_roi_name_from_api_url(api_url: str) -> str:
+    """Extract ROI name from the ROI API URL query parameter ``name``."""
+    p = urlparse(api_url)
+    qs = parse_qs(p.query)
+    roi_name = (qs.get("name") or [None])[0]
+    if not roi_name:
+        raise ValueError("Could not find 'name' parameter in the ROI API URL.")
+    return roi_name
+
+
+def load_roi_api(api_url: str) -> pd.DataFrame:
+    """Load ROI selections from the surface-viewer ROI API into a DataFrame."""
+    r = requests.get(api_url, timeout=60)
+    r.raise_for_status()
+    data = r.json()
+    selections = data.get("selections", [])
+    df = pd.DataFrame(selections)
+
+    if df.empty:
+        return df
+
+    for c in ["row", "col"]:
+        if c in df.columns:
+            df[c] = df[c].astype("int64")
+
+    for c in ["srcJson", "basename", "foldername"]:
+        if c in df.columns:
+            df[c] = df[c].astype("string")
+
+    return df
+
+
+def infer_dataset_base_from_api(api_url: str) -> str:
+    """Infer the dataset base URL ending in ``/`` from an ROI API URL."""
+    p = urlparse(api_url)
+    qs = parse_qs(p.query)
+    dataset = (qs.get("dataset") or [None])[0]
+    if not dataset:
+        raise ValueError("Could not find 'dataset' parameter in the API URL.")
+
+    root = f"{p.scheme}://{p.netloc}/surface-viewer/data/"
+    dataset_encoded = quote(dataset, safe="")
+    return urljoin(root, dataset_encoded + "/")
+
+
+def add_json_urls(df: pd.DataFrame, api_url: str) -> pd.DataFrame:
+    """Add a ``json_url`` column by resolving ``srcJson`` against the dataset base."""
+    if df.empty:
+        df = df.copy()
+        df["json_url"] = pd.Series(dtype="string")
+        return df
+
+    dataset_base = infer_dataset_base_from_api(api_url)
+    df = df.copy()
+    df["json_url"] = df["srcJson"].apply(lambda p: urljoin(dataset_base, str(p)))
+    return df
+
+
+def _new_session() -> requests.Session:
+    s = requests.Session()
+    s.headers.update({"User-Agent": "eds-demo-notebook/0.1"})
+    return s
+
+
+def fetch_json_items(url: str, session: requests.Session | None = None) -> list:
+    """GET a JSON file and return a list of records from either a list or ``{'items': ...}``."""
+    session = session or _new_session()
+    try:
+        r = session.get(url, timeout=60)
+        r.raise_for_status()
+        data = r.json()
+        if isinstance(data, dict) and isinstance(data.get("items"), list):
+            return data["items"]
+        if isinstance(data, list):
+            return data
+        return []
+    except Exception:
+        return []
+
+
+def build_spectrum_index(
+    urls: list[str],
+    *,
+    progress: bool = True,
+    session: requests.Session | None = None,
+) -> dict[tuple[str, int, int], list[int]]:
+    """Build a lookup ``(json_url, row, col) -> spectrum`` by reading each JSON file once."""
+    session = session or _new_session()
+    index: dict[tuple[str, int, int], list[int]] = {}
+
+    for url in tqdm(urls, desc="Downloading JSON files", disable=not progress):
+        items = fetch_json_items(url, session)
+        for rec in items:
+            r = rec.get("rownum", rec.get("row"))
+            c = rec.get("colnum", rec.get("col"))
+            spec = rec.get("aggregatedspectrum") or rec.get("aggregatedSpectrum") or rec.get("spectrum")
+            if r is None or c is None or spec is None:
+                continue
+            try:
+                key = (url, int(r), int(c))
+                index[key] = [int(x) for x in spec]
+            except Exception:
+                pass
+
+    return index
+
+
+def attach_spectra(
+    df: pd.DataFrame,
+    index: dict[tuple[str, int, int], list[int]],
+    *,
+    progress: bool = True,
+) -> pd.DataFrame:
+    """Add a ``spectrum`` column to a DataFrame using the pre-built spectrum index."""
+
+    def pick(row):
+        return index.get((row["json_url"], int(row["row"]), int(row["col"])), None)
+
+    df = df.copy()
+    if progress:
+        tqdm.pandas(desc="Indexing spectra")
+        df["spectrum"] = df.progress_apply(pick, axis=1)
+    else:
+        df["spectrum"] = df.apply(pick, axis=1)
+    return df
+
+
+def get_selection_grid_url(api_url: str) -> str:
+    """Return the first matching selection-grid JSON URL from the dataset overlays folder."""
+    dataset_base = infer_dataset_base_from_api(api_url)
+    candidates = [
+        "overlays/selection-grid.json",
+        "overlays/selection_grid.json",
+        "selection-grid.json",
+        "selection_grid.json",
+    ]
+    session = _new_session()
+    for rel in candidates:
+        url = urljoin(dataset_base, rel)
+        try:
+            r = session.get(url, timeout=30)
+            if r.ok:
+                return url
+        except Exception:
+            pass
+    raise FileNotFoundError("Could not find selection-grid.json in the dataset overlays folder.")
+
+
+def load_all_cells_from_selection_grid(api_url: str) -> pd.DataFrame:
+    """Load the full cell grid from ``overlays/selection-grid.json`` into a DataFrame."""
+    grid_url = get_selection_grid_url(api_url)
+    session = _new_session()
+    items = fetch_json_items(grid_url, session)
+
+    rows = []
+    for rec in items:
+        if rec.get("type") != "rect":
+            continue
+        r = rec.get("rownum", rec.get("row"))
+        c = rec.get("colnum", rec.get("col"))
+        src = rec.get("srcJson")
+        if r is None or c is None or src is None:
+            continue
+        rows.append({
+            "row": int(r),
+            "col": int(c),
+            "srcJson": str(src),
+            "basename": rec.get("basename"),
+            "label": rec.get("label"),
+            "x": rec.get("x"),
+            "y": rec.get("y"),
+            "width": rec.get("width"),
+            "height": rec.get("height"),
+        })
+
+    df = pd.DataFrame(rows)
+    if df.empty:
+        return df
+    for c in ["srcJson", "basename", "label"]:
+        if c in df.columns:
+            df[c] = df[c].astype("string")
+    df = add_json_urls(df, api_url)
+    df = df.drop_duplicates(subset=["json_url", "row", "col"]).reset_index(drop=True)
+    return df