canvod.audit API Reference¶

Audit, comparison, and regression verification for canvodpy GNSS-VOD pipelines.

Core¶

Core comparison engine for canvod-audit.

The main entry point is compare_datasets(), which aligns two xarray Datasets on shared coordinates and computes per-variable statistics with configurable tolerance tiers.

`ComparisonResult` `dataclass` ¶

Result of comparing two datasets.

Attributes¶

label : str Human-readable label for this comparison. variable_stats : dict[str, VariableStats] Per-variable statistics. tier : ToleranceTier Tolerance tier used. passed : bool True if all variables are within tolerance for the given tier. For EXACT tier this is equivalent to bit-identical; for SCIENTIFIC tier it means within the stated per-variable tolerances. failures : dict[str, str] Variables that failed, with a reason string. metadata : dict[str, Any] Free-form metadata (source paths, timestamps, configs). alignment : AlignmentInfo Information about coordinate alignment.

Source code in packages/canvod-audit/src/canvod/audit/core.py

@dataclass(frozen=True)
class ComparisonResult:
    """Result of comparing two datasets.

    Attributes
    ----------
    label : str
        Human-readable label for this comparison.
    variable_stats : dict[str, VariableStats]
        Per-variable statistics.
    tier : ToleranceTier
        Tolerance tier used.
    passed : bool
        True if all variables are within tolerance for the given tier.
        For EXACT tier this is equivalent to bit-identical; for SCIENTIFIC
        tier it means within the stated per-variable tolerances.
    failures : dict[str, str]
        Variables that failed, with a reason string.
    metadata : dict[str, Any]
        Free-form metadata (source paths, timestamps, configs).
    alignment : AlignmentInfo
        Information about coordinate alignment.
    """

    label: str
    variable_stats: dict[str, VariableStats]
    tier: ToleranceTier
    passed: bool
    failures: dict[str, str] = field(default_factory=dict)
    metadata: dict[str, Any] = field(default_factory=dict)
    alignment: AlignmentInfo | None = None
    coverage: CoverageReport | None = None

    def to_polars(self) -> Any:
        """Return per-variable stats as a polars DataFrame."""
        import polars as pl

        rows = [vs.as_dict() for vs in self.variable_stats.values()]
        if not rows:
            return pl.DataFrame(schema={"variable": pl.Utf8})
        return pl.DataFrame(rows)

    def summary(self) -> str:
        """Human-readable summary string."""
        status = "PASSED" if self.passed else "FAILED"
        lines = [
            f"── Comparison: {self.label} ──",
            f"Tier: {self.tier.value} | Status: {status} | Variables: {len(self.variable_stats)}",
        ]

        # Alignment info
        if self.alignment:
            a = self.alignment
            lines.append(
                f"Domain: {a.n_shared_epochs:,} epochs × {a.n_shared_sids} sids"
                + (
                    f"  (dropped: {a.n_dropped_epochs_a + a.n_dropped_epochs_b} epochs, "
                    f"{a.n_dropped_sids_a + a.n_dropped_sids_b} sids)"
                    if a.n_dropped_epochs_a
                    + a.n_dropped_epochs_b
                    + a.n_dropped_sids_a
                    + a.n_dropped_sids_b
                    else ""
                )
            )

        # Coverage: vars unique to each dataset
        if self.coverage:
            c = self.coverage
            if c.vars_a_only:
                lines.append(f"A-only vars : {c.vars_a_only}")
            if c.vars_b_only:
                lines.append(f"B-only vars : {c.vars_b_only}")

        # Per-variable stats table
        if self.variable_stats:
            lines.append(
                f"\n{'var':<14} {'exact':>5}  {'n_cmp':>9}  {'max_abs':>12}  "
                f"{'rmse':>12}  {'bias':>12}  {'p99':>12}  {'nan_agr':>7}"
            )
            lines.append("─" * 92)
            for vname, vs in self.variable_stats.items():
                exact_str = "✓" if vs.exact_match else "✗"
                if vs.n_compared == 0:
                    lines.append(
                        f"  {vname:<12} {exact_str:>5}  {'—':>9}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>7}"
                    )
                else:
                    lines.append(
                        f"  {vname:<12} {exact_str:>5}  {vs.n_compared:>9,}  "
                        f"{vs.max_abs_diff:>12.6g}  {vs.rmse:>12.6g}  "
                        f"{vs.bias:>12.6g}  {vs.p99:>12.6g}  {vs.nan_agreement_rate:>7.4f}"
                    )

        # Coverage: per-variable valid counts (only show non-trivial rows)
        if self.coverage:
            c = self.coverage
            asymmetric = [
                v
                for v in c.valid_both
                if c.valid_a_only.get(v, 0) or c.valid_b_only.get(v, 0)
            ]
            if asymmetric:
                lines.append("\nValidity asymmetry (valid in one, NaN in other):")
                for var in asymmetric:
                    lines.append(
                        f"  {var:<14}  both={c.valid_both[var]:>9,}  "
                        f"A-only={c.valid_a_only[var]:>8,}  "
                        f"B-only={c.valid_b_only[var]:>8,}  "
                        f"neither={c.neither_valid[var]:>8,}"
                    )

        # Failures / annotations
        if self.failures:
            lines.append("\nAnnotations / failures:")
            for var, reason in self.failures.items():
                lines.append(f"  {var}: {reason}")

        return "\n".join(lines)

`to_polars()` ¶

Return per-variable stats as a polars DataFrame.

Source code in packages/canvod-audit/src/canvod/audit/core.py

def to_polars(self) -> Any:
    """Return per-variable stats as a polars DataFrame."""
    import polars as pl

    rows = [vs.as_dict() for vs in self.variable_stats.values()]
    if not rows:
        return pl.DataFrame(schema={"variable": pl.Utf8})
    return pl.DataFrame(rows)

`summary()` ¶

Human-readable summary string.

Source code in packages/canvod-audit/src/canvod/audit/core.py

def summary(self) -> str:
    """Human-readable summary string."""
    status = "PASSED" if self.passed else "FAILED"
    lines = [
        f"── Comparison: {self.label} ──",
        f"Tier: {self.tier.value} | Status: {status} | Variables: {len(self.variable_stats)}",
    ]

    # Alignment info
    if self.alignment:
        a = self.alignment
        lines.append(
            f"Domain: {a.n_shared_epochs:,} epochs × {a.n_shared_sids} sids"
            + (
                f"  (dropped: {a.n_dropped_epochs_a + a.n_dropped_epochs_b} epochs, "
                f"{a.n_dropped_sids_a + a.n_dropped_sids_b} sids)"
                if a.n_dropped_epochs_a
                + a.n_dropped_epochs_b
                + a.n_dropped_sids_a
                + a.n_dropped_sids_b
                else ""
            )
        )

    # Coverage: vars unique to each dataset
    if self.coverage:
        c = self.coverage
        if c.vars_a_only:
            lines.append(f"A-only vars : {c.vars_a_only}")
        if c.vars_b_only:
            lines.append(f"B-only vars : {c.vars_b_only}")

    # Per-variable stats table
    if self.variable_stats:
        lines.append(
            f"\n{'var':<14} {'exact':>5}  {'n_cmp':>9}  {'max_abs':>12}  "
            f"{'rmse':>12}  {'bias':>12}  {'p99':>12}  {'nan_agr':>7}"
        )
        lines.append("─" * 92)
        for vname, vs in self.variable_stats.items():
            exact_str = "✓" if vs.exact_match else "✗"
            if vs.n_compared == 0:
                lines.append(
                    f"  {vname:<12} {exact_str:>5}  {'—':>9}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>7}"
                )
            else:
                lines.append(
                    f"  {vname:<12} {exact_str:>5}  {vs.n_compared:>9,}  "
                    f"{vs.max_abs_diff:>12.6g}  {vs.rmse:>12.6g}  "
                    f"{vs.bias:>12.6g}  {vs.p99:>12.6g}  {vs.nan_agreement_rate:>7.4f}"
                )

    # Coverage: per-variable valid counts (only show non-trivial rows)
    if self.coverage:
        c = self.coverage
        asymmetric = [
            v
            for v in c.valid_both
            if c.valid_a_only.get(v, 0) or c.valid_b_only.get(v, 0)
        ]
        if asymmetric:
            lines.append("\nValidity asymmetry (valid in one, NaN in other):")
            for var in asymmetric:
                lines.append(
                    f"  {var:<14}  both={c.valid_both[var]:>9,}  "
                    f"A-only={c.valid_a_only[var]:>8,}  "
                    f"B-only={c.valid_b_only[var]:>8,}  "
                    f"neither={c.neither_valid[var]:>8,}"
                )

    # Failures / annotations
    if self.failures:
        lines.append("\nAnnotations / failures:")
        for var, reason in self.failures.items():
            lines.append(f"  {var}: {reason}")

    return "\n".join(lines)

`AlignmentInfo` `dataclass` ¶

Information about how two datasets were aligned.

Source code in packages/canvod-audit/src/canvod/audit/core.py

@dataclass(frozen=True)
class AlignmentInfo:
    """Information about how two datasets were aligned."""

    n_shared_epochs: int
    n_shared_sids: int
    n_epochs_a: int
    n_epochs_b: int
    n_sids_a: int
    n_sids_b: int

    @property
    def n_dropped_epochs_a(self) -> int:
        return self.n_epochs_a - self.n_shared_epochs

    @property
    def n_dropped_epochs_b(self) -> int:
        return self.n_epochs_b - self.n_shared_epochs

    @property
    def n_dropped_sids_a(self) -> int:
        return self.n_sids_a - self.n_shared_sids

    @property
    def n_dropped_sids_b(self) -> int:
        return self.n_sids_b - self.n_shared_sids

`compare_datasets(ds_a, ds_b, *, variables=None, tier=ToleranceTier.NUMERICAL, tolerance_overrides=None, label='', align=True, metadata=None, report_coverage=False)` ¶

Compare two xarray Datasets variable-by-variable.

Parameters¶

ds_a, ds_b : xarray.Dataset Datasets to compare. ds_a is typically the candidate (canvodpy), ds_b the reference. variables : list[str], optional Variables to compare. If None, uses the intersection of both datasets' data variables. tier : ToleranceTier Comparison strictness level. tolerance_overrides : dict[str, Tolerance], optional Per-variable tolerance overrides. label : str Human-readable label for this comparison. align : bool If True, align datasets on shared (epoch, sid) coordinates. metadata : dict, optional Free-form metadata to attach to the result.

Returns¶

ComparisonResult

Source code in packages/canvod-audit/src/canvod/audit/core.py

def compare_datasets(
    ds_a: Any,
    ds_b: Any,
    *,
    variables: list[str] | None = None,
    tier: ToleranceTier = ToleranceTier.NUMERICAL,
    tolerance_overrides: dict[str, Tolerance] | None = None,
    label: str = "",
    align: bool = True,
    metadata: dict[str, Any] | None = None,
    report_coverage: bool = False,
) -> ComparisonResult:
    """Compare two xarray Datasets variable-by-variable.

    Parameters
    ----------
    ds_a, ds_b : xarray.Dataset
        Datasets to compare. ``ds_a`` is typically the candidate (canvodpy),
        ``ds_b`` the reference.
    variables : list[str], optional
        Variables to compare. If None, uses the intersection of both datasets'
        data variables.
    tier : ToleranceTier
        Comparison strictness level.
    tolerance_overrides : dict[str, Tolerance], optional
        Per-variable tolerance overrides.
    label : str
        Human-readable label for this comparison.
    align : bool
        If True, align datasets on shared (epoch, sid) coordinates.
    metadata : dict, optional
        Free-form metadata to attach to the result.

    Returns
    -------
    ComparisonResult
    """
    alignment = None
    if align:
        ds_a, ds_b, alignment = _align_datasets(ds_a, ds_b)

        # Guard: fail if alignment produced zero overlap
        if alignment.n_shared_epochs == 0 or alignment.n_shared_sids == 0:
            return ComparisonResult(
                label=label or f"{tier.value} comparison",
                variable_stats={},
                tier=tier,
                passed=False,
                failures={"_alignment": "No shared coordinates after alignment"},
                metadata=metadata or {},
                alignment=alignment,
            )
        # Warn when >5 % of the union is dropped — significant data loss
        total_epochs = (
            alignment.n_shared_epochs
            + alignment.n_dropped_epochs_a
            + alignment.n_dropped_epochs_b
        )
        total_sids = (
            alignment.n_shared_sids
            + alignment.n_dropped_sids_a
            + alignment.n_dropped_sids_b
        )
        drop_epoch_pct = (
            (alignment.n_dropped_epochs_a + alignment.n_dropped_epochs_b) / total_epochs
            if total_epochs > 0
            else 0.0
        )
        drop_sid_pct = (
            (alignment.n_dropped_sids_a + alignment.n_dropped_sids_b) / total_sids
            if total_sids > 0
            else 0.0
        )
        if drop_epoch_pct > 0.05 or drop_sid_pct > 0.05:
            warnings.warn(
                f"[{label or 'compare_datasets'}] Alignment dropped "
                f"{drop_epoch_pct:.1%} of epochs "
                f"({alignment.n_dropped_epochs_a} from A, "
                f"{alignment.n_dropped_epochs_b} from B) and "
                f"{drop_sid_pct:.1%} of SIDs "
                f"({alignment.n_dropped_sids_a} from A, "
                f"{alignment.n_dropped_sids_b} from B). "
                "Results cover only the intersection.",
                stacklevel=2,
            )

    # Determine variables to compare
    vars_a = set(ds_a.data_vars)
    vars_b = set(ds_b.data_vars)
    if variables is None:
        variables = sorted(vars_a & vars_b)

    # Compute per-variable stats
    variable_stats: dict[str, VariableStats] = {}
    failures: dict[str, str] = {}
    cov_valid_both: dict[str, int] = {}
    cov_valid_a_only: dict[str, int] = {}
    cov_valid_b_only: dict[str, int] = {}
    cov_neither: dict[str, int] = {}

    for var in variables:
        if var not in ds_a.data_vars or var not in ds_b.data_vars:
            continue

        try:
            a_vals = ds_a[var].values.astype(np.float64)
            b_vals = ds_b[var].values.astype(np.float64)
        except ValueError, TypeError:
            failures[var] = "non-numeric dtype, skipped"
            continue

        if a_vals.shape != b_vals.shape:
            failures[var] = f"shape mismatch: {a_vals.shape} vs {b_vals.shape}"
            continue

        # Strip attrs from the DataArray and all its coordinates before
        # comparing — da.equals() already ignores attrs per xarray spec, but
        # this makes the contract explicit.  Coordinate VALUES and dtypes are
        # still checked; if they differ between stores that is meaningful.
        exact_match = bool(_strip_attrs(ds_a[var]).equals(_strip_attrs(ds_b[var])))

        vs = compute_variable_stats(var, a_vals, b_vals, exact_match=exact_match)
        variable_stats[var] = vs

        # Tolerance check is annotation-only — never overrides exact_match for `passed`
        tol = get_tolerance(var, tier, tolerance_overrides)
        reason = _check_tolerance(vs, tol)
        if reason:
            failures[var] = reason

        # Structural diagnostic: values are identical but exact_match=False —
        # use xr.testing.assert_equal to pinpoint what structural element
        # differs (coord dtype, missing coord, dimension order, etc.).
        # Only runs when no numeric or NaN-rate failure was already recorded,
        # so it doesn't shadow a real science difference.
        if not exact_match and vs.max_abs_diff == 0.0 and var not in failures:
            msg = _structural_diff(ds_a[var], ds_b[var])
            failures[var] = f"structural (values identical): {msg}"

        if report_coverage:
            mask_a = ~np.isnan(a_vals)
            mask_b = ~np.isnan(b_vals)
            cov_valid_both[var] = int(np.sum(mask_a & mask_b))
            cov_valid_a_only[var] = int(np.sum(mask_a & ~mask_b))
            cov_valid_b_only[var] = int(np.sum(~mask_a & mask_b))
            cov_neither[var] = int(np.sum(~mask_a & ~mask_b))

    coverage = (
        CoverageReport(
            vars_a_only=sorted(vars_a - vars_b),
            vars_b_only=sorted(vars_b - vars_a),
            valid_both=cov_valid_both,
            valid_a_only=cov_valid_a_only,
            valid_b_only=cov_valid_b_only,
            neither_valid=cov_neither,
        )
        if report_coverage
        else None
    )

    # passed iff every compared variable is within tolerance for the given tier.
    # For EXACT tier (atol=0): a variable with any diff is in failures, so
    # len(failures)==0 implies bit-identical.  For SCIENTIFIC tier: implies
    # within stated tolerances.  exact_match is still recorded per-variable for
    # informational purposes (shown in the Typst ✓/✗ column).
    passed = bool(variable_stats) and len(failures) == 0

    return ComparisonResult(
        label=label or f"{tier.value} comparison",
        variable_stats=variable_stats,
        tier=tier,
        passed=passed,
        failures=failures,
        metadata=metadata or {},
        alignment=alignment,
        coverage=coverage,
    )

Statistics¶

Statistical comparison functions for paired arrays.

All functions operate on flat numpy arrays and handle NaN masking consistently: statistics are computed only over mutually non-NaN values.

This module also provides the observable-difference reporting framework: VariableBudget, VarDiffStats, compute_diff_report, and print_diff_report. These support the scientific principle that every observable difference must be reported with actual numbers and annotated against a physically-grounded expected budget — never hidden in tolerances.