Skip to content

canvod.audit API Reference

Audit, comparison, and regression verification for canvodpy GNSS-VOD pipelines.

Core

Core comparison engine for canvod-audit.

The main entry point is compare_datasets(), which aligns two xarray Datasets on shared coordinates and computes per-variable statistics with configurable tolerance tiers.

ComparisonResult dataclass

Result of comparing two datasets.

Attributes

label : str Human-readable label for this comparison. variable_stats : dict[str, VariableStats] Per-variable statistics. tier : ToleranceTier Tolerance tier used. passed : bool True if all variables are within tolerance for the given tier. For EXACT tier this is equivalent to bit-identical; for SCIENTIFIC tier it means within the stated per-variable tolerances. failures : dict[str, str] Variables that failed, with a reason string. metadata : dict[str, Any] Free-form metadata (source paths, timestamps, configs). alignment : AlignmentInfo Information about coordinate alignment.

Source code in packages/canvod-audit/src/canvod/audit/core.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@dataclass(frozen=True)
class ComparisonResult:
    """Result of comparing two datasets.

    Attributes
    ----------
    label : str
        Human-readable label for this comparison.
    variable_stats : dict[str, VariableStats]
        Per-variable statistics.
    tier : ToleranceTier
        Tolerance tier used.
    passed : bool
        True if all variables are within tolerance for the given tier.
        For EXACT tier this is equivalent to bit-identical; for SCIENTIFIC
        tier it means within the stated per-variable tolerances.
    failures : dict[str, str]
        Variables that failed, with a reason string.
    metadata : dict[str, Any]
        Free-form metadata (source paths, timestamps, configs).
    alignment : AlignmentInfo
        Information about coordinate alignment.
    """

    label: str
    variable_stats: dict[str, VariableStats]
    tier: ToleranceTier
    passed: bool
    failures: dict[str, str] = field(default_factory=dict)
    metadata: dict[str, Any] = field(default_factory=dict)
    alignment: AlignmentInfo | None = None
    coverage: CoverageReport | None = None

    def to_polars(self) -> Any:
        """Return per-variable stats as a polars DataFrame."""
        import polars as pl

        rows = [vs.as_dict() for vs in self.variable_stats.values()]
        if not rows:
            return pl.DataFrame(schema={"variable": pl.Utf8})
        return pl.DataFrame(rows)

    def summary(self) -> str:
        """Human-readable summary string."""
        status = "PASSED" if self.passed else "FAILED"
        lines = [
            f"── Comparison: {self.label} ──",
            f"Tier: {self.tier.value} | Status: {status} | Variables: {len(self.variable_stats)}",
        ]

        # Alignment info
        if self.alignment:
            a = self.alignment
            lines.append(
                f"Domain: {a.n_shared_epochs:,} epochs × {a.n_shared_sids} sids"
                + (
                    f"  (dropped: {a.n_dropped_epochs_a + a.n_dropped_epochs_b} epochs, "
                    f"{a.n_dropped_sids_a + a.n_dropped_sids_b} sids)"
                    if a.n_dropped_epochs_a
                    + a.n_dropped_epochs_b
                    + a.n_dropped_sids_a
                    + a.n_dropped_sids_b
                    else ""
                )
            )

        # Coverage: vars unique to each dataset
        if self.coverage:
            c = self.coverage
            if c.vars_a_only:
                lines.append(f"A-only vars : {c.vars_a_only}")
            if c.vars_b_only:
                lines.append(f"B-only vars : {c.vars_b_only}")

        # Per-variable stats table
        if self.variable_stats:
            lines.append(
                f"\n{'var':<14} {'exact':>5}  {'n_cmp':>9}  {'max_abs':>12}  "
                f"{'rmse':>12}  {'bias':>12}  {'p99':>12}  {'nan_agr':>7}"
            )
            lines.append("─" * 92)
            for vname, vs in self.variable_stats.items():
                exact_str = "✓" if vs.exact_match else "✗"
                if vs.n_compared == 0:
                    lines.append(
                        f"  {vname:<12} {exact_str:>5}  {'—':>9}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>7}"
                    )
                else:
                    lines.append(
                        f"  {vname:<12} {exact_str:>5}  {vs.n_compared:>9,}  "
                        f"{vs.max_abs_diff:>12.6g}  {vs.rmse:>12.6g}  "
                        f"{vs.bias:>12.6g}  {vs.p99:>12.6g}  {vs.nan_agreement_rate:>7.4f}"
                    )

        # Coverage: per-variable valid counts (only show non-trivial rows)
        if self.coverage:
            c = self.coverage
            asymmetric = [
                v
                for v in c.valid_both
                if c.valid_a_only.get(v, 0) or c.valid_b_only.get(v, 0)
            ]
            if asymmetric:
                lines.append("\nValidity asymmetry (valid in one, NaN in other):")
                for var in asymmetric:
                    lines.append(
                        f"  {var:<14}  both={c.valid_both[var]:>9,}  "
                        f"A-only={c.valid_a_only[var]:>8,}  "
                        f"B-only={c.valid_b_only[var]:>8,}  "
                        f"neither={c.neither_valid[var]:>8,}"
                    )

        # Failures / annotations
        if self.failures:
            lines.append("\nAnnotations / failures:")
            for var, reason in self.failures.items():
                lines.append(f"  {var}: {reason}")

        return "\n".join(lines)

to_polars()

Return per-variable stats as a polars DataFrame.

Source code in packages/canvod-audit/src/canvod/audit/core.py
57
58
59
60
61
62
63
64
def to_polars(self) -> Any:
    """Return per-variable stats as a polars DataFrame."""
    import polars as pl

    rows = [vs.as_dict() for vs in self.variable_stats.values()]
    if not rows:
        return pl.DataFrame(schema={"variable": pl.Utf8})
    return pl.DataFrame(rows)

summary()

Human-readable summary string.

Source code in packages/canvod-audit/src/canvod/audit/core.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def summary(self) -> str:
    """Human-readable summary string."""
    status = "PASSED" if self.passed else "FAILED"
    lines = [
        f"── Comparison: {self.label} ──",
        f"Tier: {self.tier.value} | Status: {status} | Variables: {len(self.variable_stats)}",
    ]

    # Alignment info
    if self.alignment:
        a = self.alignment
        lines.append(
            f"Domain: {a.n_shared_epochs:,} epochs × {a.n_shared_sids} sids"
            + (
                f"  (dropped: {a.n_dropped_epochs_a + a.n_dropped_epochs_b} epochs, "
                f"{a.n_dropped_sids_a + a.n_dropped_sids_b} sids)"
                if a.n_dropped_epochs_a
                + a.n_dropped_epochs_b
                + a.n_dropped_sids_a
                + a.n_dropped_sids_b
                else ""
            )
        )

    # Coverage: vars unique to each dataset
    if self.coverage:
        c = self.coverage
        if c.vars_a_only:
            lines.append(f"A-only vars : {c.vars_a_only}")
        if c.vars_b_only:
            lines.append(f"B-only vars : {c.vars_b_only}")

    # Per-variable stats table
    if self.variable_stats:
        lines.append(
            f"\n{'var':<14} {'exact':>5}  {'n_cmp':>9}  {'max_abs':>12}  "
            f"{'rmse':>12}  {'bias':>12}  {'p99':>12}  {'nan_agr':>7}"
        )
        lines.append("─" * 92)
        for vname, vs in self.variable_stats.items():
            exact_str = "✓" if vs.exact_match else "✗"
            if vs.n_compared == 0:
                lines.append(
                    f"  {vname:<12} {exact_str:>5}  {'—':>9}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>12}  {'—':>7}"
                )
            else:
                lines.append(
                    f"  {vname:<12} {exact_str:>5}  {vs.n_compared:>9,}  "
                    f"{vs.max_abs_diff:>12.6g}  {vs.rmse:>12.6g}  "
                    f"{vs.bias:>12.6g}  {vs.p99:>12.6g}  {vs.nan_agreement_rate:>7.4f}"
                )

    # Coverage: per-variable valid counts (only show non-trivial rows)
    if self.coverage:
        c = self.coverage
        asymmetric = [
            v
            for v in c.valid_both
            if c.valid_a_only.get(v, 0) or c.valid_b_only.get(v, 0)
        ]
        if asymmetric:
            lines.append("\nValidity asymmetry (valid in one, NaN in other):")
            for var in asymmetric:
                lines.append(
                    f"  {var:<14}  both={c.valid_both[var]:>9,}  "
                    f"A-only={c.valid_a_only[var]:>8,}  "
                    f"B-only={c.valid_b_only[var]:>8,}  "
                    f"neither={c.neither_valid[var]:>8,}"
                )

    # Failures / annotations
    if self.failures:
        lines.append("\nAnnotations / failures:")
        for var, reason in self.failures.items():
            lines.append(f"  {var}: {reason}")

    return "\n".join(lines)

AlignmentInfo dataclass

Information about how two datasets were aligned.

Source code in packages/canvod-audit/src/canvod/audit/core.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
@dataclass(frozen=True)
class AlignmentInfo:
    """Information about how two datasets were aligned."""

    n_shared_epochs: int
    n_shared_sids: int
    n_epochs_a: int
    n_epochs_b: int
    n_sids_a: int
    n_sids_b: int

    @property
    def n_dropped_epochs_a(self) -> int:
        return self.n_epochs_a - self.n_shared_epochs

    @property
    def n_dropped_epochs_b(self) -> int:
        return self.n_epochs_b - self.n_shared_epochs

    @property
    def n_dropped_sids_a(self) -> int:
        return self.n_sids_a - self.n_shared_sids

    @property
    def n_dropped_sids_b(self) -> int:
        return self.n_sids_b - self.n_shared_sids

compare_datasets(ds_a, ds_b, *, variables=None, tier=ToleranceTier.NUMERICAL, tolerance_overrides=None, label='', align=True, metadata=None, report_coverage=False)

Compare two xarray Datasets variable-by-variable.

Parameters

ds_a, ds_b : xarray.Dataset Datasets to compare. ds_a is typically the candidate (canvodpy), ds_b the reference. variables : list[str], optional Variables to compare. If None, uses the intersection of both datasets' data variables. tier : ToleranceTier Comparison strictness level. tolerance_overrides : dict[str, Tolerance], optional Per-variable tolerance overrides. label : str Human-readable label for this comparison. align : bool If True, align datasets on shared (epoch, sid) coordinates. metadata : dict, optional Free-form metadata to attach to the result.

Returns

ComparisonResult

Source code in packages/canvod-audit/src/canvod/audit/core.py
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
def compare_datasets(
    ds_a: Any,
    ds_b: Any,
    *,
    variables: list[str] | None = None,
    tier: ToleranceTier = ToleranceTier.NUMERICAL,
    tolerance_overrides: dict[str, Tolerance] | None = None,
    label: str = "",
    align: bool = True,
    metadata: dict[str, Any] | None = None,
    report_coverage: bool = False,
) -> ComparisonResult:
    """Compare two xarray Datasets variable-by-variable.

    Parameters
    ----------
    ds_a, ds_b : xarray.Dataset
        Datasets to compare. ``ds_a`` is typically the candidate (canvodpy),
        ``ds_b`` the reference.
    variables : list[str], optional
        Variables to compare. If None, uses the intersection of both datasets'
        data variables.
    tier : ToleranceTier
        Comparison strictness level.
    tolerance_overrides : dict[str, Tolerance], optional
        Per-variable tolerance overrides.
    label : str
        Human-readable label for this comparison.
    align : bool
        If True, align datasets on shared (epoch, sid) coordinates.
    metadata : dict, optional
        Free-form metadata to attach to the result.

    Returns
    -------
    ComparisonResult
    """
    alignment = None
    if align:
        ds_a, ds_b, alignment = _align_datasets(ds_a, ds_b)

        # Guard: fail if alignment produced zero overlap
        if alignment.n_shared_epochs == 0 or alignment.n_shared_sids == 0:
            return ComparisonResult(
                label=label or f"{tier.value} comparison",
                variable_stats={},
                tier=tier,
                passed=False,
                failures={"_alignment": "No shared coordinates after alignment"},
                metadata=metadata or {},
                alignment=alignment,
            )
        # Warn when >5 % of the union is dropped — significant data loss
        total_epochs = (
            alignment.n_shared_epochs
            + alignment.n_dropped_epochs_a
            + alignment.n_dropped_epochs_b
        )
        total_sids = (
            alignment.n_shared_sids
            + alignment.n_dropped_sids_a
            + alignment.n_dropped_sids_b
        )
        drop_epoch_pct = (
            (alignment.n_dropped_epochs_a + alignment.n_dropped_epochs_b) / total_epochs
            if total_epochs > 0
            else 0.0
        )
        drop_sid_pct = (
            (alignment.n_dropped_sids_a + alignment.n_dropped_sids_b) / total_sids
            if total_sids > 0
            else 0.0
        )
        if drop_epoch_pct > 0.05 or drop_sid_pct > 0.05:
            warnings.warn(
                f"[{label or 'compare_datasets'}] Alignment dropped "
                f"{drop_epoch_pct:.1%} of epochs "
                f"({alignment.n_dropped_epochs_a} from A, "
                f"{alignment.n_dropped_epochs_b} from B) and "
                f"{drop_sid_pct:.1%} of SIDs "
                f"({alignment.n_dropped_sids_a} from A, "
                f"{alignment.n_dropped_sids_b} from B). "
                "Results cover only the intersection.",
                stacklevel=2,
            )

    # Determine variables to compare
    vars_a = set(ds_a.data_vars)
    vars_b = set(ds_b.data_vars)
    if variables is None:
        variables = sorted(vars_a & vars_b)

    # Compute per-variable stats
    variable_stats: dict[str, VariableStats] = {}
    failures: dict[str, str] = {}
    cov_valid_both: dict[str, int] = {}
    cov_valid_a_only: dict[str, int] = {}
    cov_valid_b_only: dict[str, int] = {}
    cov_neither: dict[str, int] = {}

    for var in variables:
        if var not in ds_a.data_vars or var not in ds_b.data_vars:
            continue

        try:
            a_vals = ds_a[var].values.astype(np.float64)
            b_vals = ds_b[var].values.astype(np.float64)
        except ValueError, TypeError:
            failures[var] = "non-numeric dtype, skipped"
            continue

        if a_vals.shape != b_vals.shape:
            failures[var] = f"shape mismatch: {a_vals.shape} vs {b_vals.shape}"
            continue

        # Strip attrs from the DataArray and all its coordinates before
        # comparing — da.equals() already ignores attrs per xarray spec, but
        # this makes the contract explicit.  Coordinate VALUES and dtypes are
        # still checked; if they differ between stores that is meaningful.
        exact_match = bool(_strip_attrs(ds_a[var]).equals(_strip_attrs(ds_b[var])))

        vs = compute_variable_stats(var, a_vals, b_vals, exact_match=exact_match)
        variable_stats[var] = vs

        # Tolerance check is annotation-only — never overrides exact_match for `passed`
        tol = get_tolerance(var, tier, tolerance_overrides)
        reason = _check_tolerance(vs, tol)
        if reason:
            failures[var] = reason

        # Structural diagnostic: values are identical but exact_match=False —
        # use xr.testing.assert_equal to pinpoint what structural element
        # differs (coord dtype, missing coord, dimension order, etc.).
        # Only runs when no numeric or NaN-rate failure was already recorded,
        # so it doesn't shadow a real science difference.
        if not exact_match and vs.max_abs_diff == 0.0 and var not in failures:
            msg = _structural_diff(ds_a[var], ds_b[var])
            failures[var] = f"structural (values identical): {msg}"

        if report_coverage:
            mask_a = ~np.isnan(a_vals)
            mask_b = ~np.isnan(b_vals)
            cov_valid_both[var] = int(np.sum(mask_a & mask_b))
            cov_valid_a_only[var] = int(np.sum(mask_a & ~mask_b))
            cov_valid_b_only[var] = int(np.sum(~mask_a & mask_b))
            cov_neither[var] = int(np.sum(~mask_a & ~mask_b))

    coverage = (
        CoverageReport(
            vars_a_only=sorted(vars_a - vars_b),
            vars_b_only=sorted(vars_b - vars_a),
            valid_both=cov_valid_both,
            valid_a_only=cov_valid_a_only,
            valid_b_only=cov_valid_b_only,
            neither_valid=cov_neither,
        )
        if report_coverage
        else None
    )

    # passed iff every compared variable is within tolerance for the given tier.
    # For EXACT tier (atol=0): a variable with any diff is in failures, so
    # len(failures)==0 implies bit-identical.  For SCIENTIFIC tier: implies
    # within stated tolerances.  exact_match is still recorded per-variable for
    # informational purposes (shown in the Typst ✓/✗ column).
    passed = bool(variable_stats) and len(failures) == 0

    return ComparisonResult(
        label=label or f"{tier.value} comparison",
        variable_stats=variable_stats,
        tier=tier,
        passed=passed,
        failures=failures,
        metadata=metadata or {},
        alignment=alignment,
        coverage=coverage,
    )

Statistics

Statistical comparison functions for paired arrays.

All functions operate on flat numpy arrays and handle NaN masking consistently: statistics are computed only over mutually non-NaN values.

This module also provides the observable-difference reporting framework: VariableBudget, VarDiffStats, compute_diff_report, and print_diff_report. These support the scientific principle that every observable difference must be reported with actual numbers and annotated against a physically-grounded expected budget — never hidden in tolerances.

VariableStats dataclass

Per-variable comparison statistics.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
@dataclass(frozen=True)
class VariableStats:
    """Per-variable comparison statistics."""

    name: str
    exact_match: bool
    # ── validity counts ───────────────────────────────────────────────────
    n_total: int  # total cells in shared (epoch, sid) domain
    n_compared: int  # both non-NaN (valid pairs)
    n_a_only: int  # valid in A, NaN in B
    n_b_only: int  # valid in B, NaN in A
    n_neither: int  # both NaN
    n_nonzero_diff: int  # valid pairs where |diff| > 0
    # ── difference statistics (NaN when exact_match=True or n_compared=0) ─
    max_abs_diff: float
    rmse: float
    mae: float
    bias: float
    correlation: float
    # ── percentiles of |diff| over valid pairs ────────────────────────────
    p50: float
    p90: float
    p99: float
    # ── NaN rates ─────────────────────────────────────────────────────────
    n_nan_a: int
    n_nan_b: int
    pct_nan_a: float
    pct_nan_b: float
    nan_agreement_rate: float

    def as_dict(self) -> dict[str, Any]:
        """Flat dict for DataFrame construction."""
        return {
            "variable": self.name,
            "exact_match": self.exact_match,
            "n_total": self.n_total,
            "n_compared": self.n_compared,
            "n_a_only": self.n_a_only,
            "n_b_only": self.n_b_only,
            "n_neither": self.n_neither,
            "n_nonzero_diff": self.n_nonzero_diff,
            "max_abs_diff": round(self.max_abs_diff, 8),
            "rmse": round(self.rmse, 8),
            "mae": round(self.mae, 8),
            "bias": round(self.bias, 8),
            "correlation": round(self.correlation, 6),
            "p50": round(self.p50, 8),
            "p90": round(self.p90, 8),
            "p99": round(self.p99, 8),
            "pct_nan_a": round(self.pct_nan_a, 4),
            "pct_nan_b": round(self.pct_nan_b, 4),
            "nan_agreement": round(self.nan_agreement_rate, 4),
        }

as_dict()

Flat dict for DataFrame construction.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def as_dict(self) -> dict[str, Any]:
    """Flat dict for DataFrame construction."""
    return {
        "variable": self.name,
        "exact_match": self.exact_match,
        "n_total": self.n_total,
        "n_compared": self.n_compared,
        "n_a_only": self.n_a_only,
        "n_b_only": self.n_b_only,
        "n_neither": self.n_neither,
        "n_nonzero_diff": self.n_nonzero_diff,
        "max_abs_diff": round(self.max_abs_diff, 8),
        "rmse": round(self.rmse, 8),
        "mae": round(self.mae, 8),
        "bias": round(self.bias, 8),
        "correlation": round(self.correlation, 6),
        "p50": round(self.p50, 8),
        "p90": round(self.p90, 8),
        "p99": round(self.p99, 8),
        "pct_nan_a": round(self.pct_nan_a, 4),
        "pct_nan_b": round(self.pct_nan_b, 4),
        "nan_agreement": round(self.nan_agreement_rate, 4),
    }

compute_variable_stats(name, a, b, exact_match=None)

Compute all comparison statistics for a single variable.

Parameters

name : str Variable name. a, b : np.ndarray Arrays of equal shape to compare. exact_match : bool, optional Pre-computed exact equality (e.g. from da.equals()). If None, inferred as max_abs_diff == 0.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
def compute_variable_stats(
    name: str,
    a: np.ndarray,
    b: np.ndarray,
    exact_match: bool | None = None,
) -> VariableStats:
    """Compute all comparison statistics for a single variable.

    Parameters
    ----------
    name : str
        Variable name.
    a, b : np.ndarray
        Arrays of equal shape to compare.
    exact_match : bool, optional
        Pre-computed exact equality (e.g. from ``da.equals()``).
        If None, inferred as ``max_abs_diff == 0``.
    """
    a = a.ravel().astype(np.float64)
    b = b.ravel().astype(np.float64)

    n_total = len(a)
    mask_a = np.isfinite(a)
    mask_b = np.isfinite(b)
    mask = mask_a & mask_b

    n_nan_a = int(np.isnan(a).sum())
    n_nan_b = int(np.isnan(b).sum())
    n_compared = int(mask.sum())
    n_a_only = int(np.sum(mask_a & ~mask_b))
    n_b_only = int(np.sum(~mask_a & mask_b))
    n_neither = int(np.sum(~mask_a & ~mask_b))

    if n_compared == 0:
        _nan = float("nan")
        return VariableStats(
            name=name,
            exact_match=exact_match if exact_match is not None else True,
            n_total=n_total,
            n_compared=0,
            n_a_only=n_a_only,
            n_b_only=n_b_only,
            n_neither=n_neither,
            n_nonzero_diff=0,
            max_abs_diff=_nan,
            rmse=_nan,
            mae=_nan,
            bias=_nan,
            correlation=_nan,
            p50=_nan,
            p90=_nan,
            p99=_nan,
            n_nan_a=n_nan_a,
            n_nan_b=n_nan_b,
            pct_nan_a=n_nan_a / n_total if n_total > 0 else 0.0,
            pct_nan_b=n_nan_b / n_total if n_total > 0 else 0.0,
            nan_agreement_rate=nan_agreement(a, b),
        )

    # Compute diff once, reuse for all stats
    diff = a[mask] - b[mask]
    absdiff = np.abs(diff)
    n_nonzero_diff = int(np.sum(absdiff > 0))

    _max_abs = float(np.max(absdiff))
    if exact_match is None:
        exact_match = _max_abs == 0.0

    return VariableStats(
        name=name,
        exact_match=exact_match,
        n_total=n_total,
        n_compared=n_compared,
        n_a_only=n_a_only,
        n_b_only=n_b_only,
        n_neither=n_neither,
        n_nonzero_diff=n_nonzero_diff,
        max_abs_diff=_max_abs,
        rmse=float(np.sqrt(np.mean(diff**2))),
        mae=float(np.mean(absdiff)),
        bias=float(np.mean(diff)),
        correlation=correlation(a, b),
        p50=float(np.percentile(absdiff, 50)),
        p90=float(np.percentile(absdiff, 90)),
        p99=float(np.percentile(absdiff, 99)),
        n_nan_a=n_nan_a,
        n_nan_b=n_nan_b,
        pct_nan_a=n_nan_a / n_total if n_total > 0 else 0.0,
        pct_nan_b=n_nan_b / n_total if n_total > 0 else 0.0,
        nan_agreement_rate=nan_agreement(a, b),
    )

rmse(a, b)

Root Mean Square Error over mutually valid elements.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
285
286
287
288
289
290
291
def rmse(a: np.ndarray, b: np.ndarray) -> float:
    """Root Mean Square Error over mutually valid elements."""
    mask = _valid_mask(a, b)
    if not mask.any():
        return float("nan")
    diff = a[mask] - b[mask]
    return float(np.sqrt(np.mean(diff**2)))

bias(a, b)

Mean difference (a - b) over mutually valid elements.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
294
295
296
297
298
299
def bias(a: np.ndarray, b: np.ndarray) -> float:
    """Mean difference (a - b) over mutually valid elements."""
    mask = _valid_mask(a, b)
    if not mask.any():
        return float("nan")
    return float(np.mean(a[mask] - b[mask]))

mae(a, b)

Mean Absolute Error over mutually valid elements.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
302
303
304
305
306
307
def mae(a: np.ndarray, b: np.ndarray) -> float:
    """Mean Absolute Error over mutually valid elements."""
    mask = _valid_mask(a, b)
    if not mask.any():
        return float("nan")
    return float(np.mean(np.abs(a[mask] - b[mask])))

max_abs_diff(a, b)

Maximum absolute difference over mutually valid elements.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
310
311
312
313
314
315
def max_abs_diff(a: np.ndarray, b: np.ndarray) -> float:
    """Maximum absolute difference over mutually valid elements."""
    mask = _valid_mask(a, b)
    if not mask.any():
        return float("nan")
    return float(np.max(np.abs(a[mask] - b[mask])))

correlation(a, b)

Pearson correlation over mutually valid elements.

Source code in packages/canvod-audit/src/canvod/audit/stats.py
318
319
320
321
322
323
324
def correlation(a: np.ndarray, b: np.ndarray) -> float:
    """Pearson correlation over mutually valid elements."""
    mask = _valid_mask(a, b)
    if mask.sum() < 2:
        return float("nan")
    r = np.corrcoef(a[mask], b[mask])
    return float(r[0, 1])

nan_agreement(a, b)

Fraction of elements where NaN status agrees (both NaN or both finite).

Source code in packages/canvod-audit/src/canvod/audit/stats.py
327
328
329
330
331
332
def nan_agreement(a: np.ndarray, b: np.ndarray) -> float:
    """Fraction of elements where NaN status agrees (both NaN or both finite)."""
    nan_a = np.isnan(a)
    nan_b = np.isnan(b)
    agree = nan_a == nan_b
    return float(agree.mean())

Tolerances

Tolerance definitions for numerical comparison.

Three tiers of comparison strictness, from bit-identical to domain-specific scientific thresholds.

TIER_DEFAULTS = {ToleranceTier.EXACT: Tolerance(atol=0.0, mae_atol=0.0, description='Bit-identical comparison'), ToleranceTier.NUMERICAL: Tolerance(atol=1e-06, mae_atol=1e-10, description='Float64 precision — atol bounds worst-case single-element error from operation reordering; mae_atol bounds typical (MAE) error.'), ToleranceTier.SCIENTIFIC: Tolerance(atol=0.01, mae_atol=0.01, description='Domain-specific scientific tolerance')} module-attribute

SCIENTIFIC_DEFAULTS = {'SNR': Tolerance(atol=0.25, mae_atol=0.0, nan_rate_atol=0.01, description='SBF quantization is 0.25 dB; RINEX ~0.001 dB. Hardware limitation.'), 'vod': Tolerance(atol=0.01, mae_atol=0.01, nan_rate_atol=0.01, description='VOD retrieval: sub-0.01 differences are below measurement noise.'), 'phi': Tolerance(atol=0.05, mae_atol=0.0, nan_rate_atol=0.0, description='Elevation angle: coordinate conversion differences up to ~2.4 deg observed between implementations (wrap-aware).'), 'theta': Tolerance(atol=0.05, mae_atol=0.0, nan_rate_atol=0.0, description='Azimuth angle: coordinate conversion differences.'), 'carrier_phase': Tolerance(atol=1e-06, mae_atol=1e-09, nan_rate_atol=0.01, description='Carrier phase: high-precision observable, expect near-exact agreement.'), 'pseudorange': Tolerance(atol=0.001, mae_atol=1e-06, nan_rate_atol=0.01, description='Pseudorange: meter-level observable.'), 'sat_x': Tolerance(atol=0.001, mae_atol=1e-09, nan_rate_atol=0.05, description='Satellite X coordinate (meters). NaN rate may differ due to broadcast vs SP3 satellite coverage.'), 'sat_y': Tolerance(atol=0.001, mae_atol=1e-09, nan_rate_atol=0.05, description='Satellite Y coordinate (meters).'), 'sat_z': Tolerance(atol=0.001, mae_atol=1e-09, nan_rate_atol=0.05, description='Satellite Z coordinate (meters).')} module-attribute

Tolerance dataclass

Numerical tolerance for a single variable comparison.

A variable passes if ALL of the following hold:

  1. max_abs_diff <= atol — worst-case single-element error
  2. mae <= mae_atol — typical (mean) error, when mae_atol > 0
  3. |NaN_rate_a - NaN_rate_b| <= nan_rate_atol

Parameters

atol : float Absolute tolerance on maximum single-element error. mae_atol : float Absolute tolerance on Mean Absolute Error (typical error). Set to 0 to skip this check. nan_rate_atol : float Maximum allowed difference in NaN rates between the two datasets. 0.0 means NaN patterns must match exactly. description : str Human-readable justification for this tolerance (for the paper).

Source code in packages/canvod-audit/src/canvod/audit/tolerances.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
@dataclass(frozen=True)
class Tolerance:
    """Numerical tolerance for a single variable comparison.

    A variable passes if ALL of the following hold:

    1. ``max_abs_diff <= atol``  — worst-case single-element error
    2. ``mae <= mae_atol``       — typical (mean) error, when ``mae_atol > 0``
    3. ``|NaN_rate_a - NaN_rate_b| <= nan_rate_atol``

    Parameters
    ----------
    atol : float
        Absolute tolerance on maximum single-element error.
    mae_atol : float
        Absolute tolerance on Mean Absolute Error (typical error).
        Set to 0 to skip this check.
    nan_rate_atol : float
        Maximum allowed difference in NaN rates between the two datasets.
        0.0 means NaN patterns must match exactly.
    description : str
        Human-readable justification for this tolerance (for the paper).
    """

    atol: float
    mae_atol: float
    nan_rate_atol: float = 0.0
    description: str = ""

ToleranceTier

Bases: Enum

Pre-defined comparison strictness levels.

Source code in packages/canvod-audit/src/canvod/audit/tolerances.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
class ToleranceTier(Enum):
    """Pre-defined comparison strictness levels."""

    EXACT = "exact"
    """Bit-identical. atol=0, mae_atol=0. Use for values that should be
    computed from the same source data with the same algorithm."""

    NUMERICAL = "numerical"
    """Float64 precision. atol=1e-6, mae_atol=1e-10. Use for values that
    should be mathematically identical but may differ due to floating-point
    operation ordering."""

    SCIENTIFIC = "scientific"
    """Domain-specific thresholds per variable. Use for comparisons across
    independent implementations or data sources where small physical
    differences are expected (e.g. SBF 0.25 dB SNR quantization)."""

EXACT = 'exact' class-attribute instance-attribute

Bit-identical. atol=0, mae_atol=0. Use for values that should be computed from the same source data with the same algorithm.

NUMERICAL = 'numerical' class-attribute instance-attribute

Float64 precision. atol=1e-6, mae_atol=1e-10. Use for values that should be mathematically identical but may differ due to floating-point operation ordering.

SCIENTIFIC = 'scientific' class-attribute instance-attribute

Domain-specific thresholds per variable. Use for comparisons across independent implementations or data sources where small physical differences are expected (e.g. SBF 0.25 dB SNR quantization).

get_tolerance(variable, tier, overrides=None)

Look up the tolerance for a variable at a given tier.

Resolution order: 1. overrides[variable] if provided 2. SCIENTIFIC_DEFAULTS[variable] if tier is SCIENTIFIC 3. TIER_DEFAULTS[tier] (catch-all)

Source code in packages/canvod-audit/src/canvod/audit/tolerances.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def get_tolerance(
    variable: str,
    tier: ToleranceTier,
    overrides: dict[str, Tolerance] | None = None,
) -> Tolerance:
    """Look up the tolerance for a variable at a given tier.

    Resolution order:
    1. ``overrides[variable]`` if provided
    2. ``SCIENTIFIC_DEFAULTS[variable]`` if tier is SCIENTIFIC
    3. ``TIER_DEFAULTS[tier]`` (catch-all)
    """
    if overrides and variable in overrides:
        return overrides[variable]
    if tier == ToleranceTier.SCIENTIFIC and variable in SCIENTIFIC_DEFAULTS:
        return SCIENTIFIC_DEFAULTS[variable]
    return TIER_DEFAULTS[tier]

Tiers — Internal

Internal consistency comparisons within canvodpy.

Compare outputs from different processing paths that should produce equivalent results: SBF vs RINEX readers, broadcast vs agency ephemeris.

compare_sbf_vs_rinex(store_sbf, store_rinex, *, group, variables=None)

Compare SBF and RINEX reader outputs from the same observation period.

Parameters

store_sbf : MyIcechunkStore Store populated from SBF files. store_rinex : MyIcechunkStore Store populated from RINEX files. group : str Date group key (e.g. "2025001"). variables : list[str], optional Variables to compare. Defaults to intersection.

Returns

ComparisonResult SCIENTIFIC tier — expects SNR quantization differences (0.25 dB) and potentially different satellite coverage.

Source code in packages/canvod-audit/src/canvod/audit/tiers/internal.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def compare_sbf_vs_rinex(
    store_sbf: Any,
    store_rinex: Any,
    *,
    group: str,
    variables: list[str] | None = None,
) -> ComparisonResult:
    """Compare SBF and RINEX reader outputs from the same observation period.

    Parameters
    ----------
    store_sbf : MyIcechunkStore
        Store populated from SBF files.
    store_rinex : MyIcechunkStore
        Store populated from RINEX files.
    group : str
        Date group key (e.g. "2025001").
    variables : list[str], optional
        Variables to compare. Defaults to intersection.

    Returns
    -------
    ComparisonResult
        SCIENTIFIC tier — expects SNR quantization differences (0.25 dB)
        and potentially different satellite coverage.
    """
    ds_sbf = store_sbf.read_group(group)
    ds_rinex = store_rinex.read_group(group)

    return compare_datasets(
        ds_sbf,
        ds_rinex,
        variables=variables,
        tier=ToleranceTier.SCIENTIFIC,
        label=f"SBF vs RINEX — {group}",
        metadata={
            "comparison_type": "internal",
            "reader_a": "sbf",
            "reader_b": "rinex",
            "group": group,
        },
    )

compare_ephemeris_sources(store_broadcast, store_agency, *, group, variables=None)

Compare broadcast vs agency (SP3/CLK) ephemeris augmentation.

Parameters

store_broadcast : MyIcechunkStore Store augmented with broadcast ephemeris (from SBF SatVisibility). store_agency : MyIcechunkStore Store augmented with agency products (SP3 + CLK). group : str Date group key.

Returns

ComparisonResult SCIENTIFIC tier — broadcast ephemeris has lower accuracy than precise products (~1-2m vs ~2cm orbit accuracy).

Source code in packages/canvod-audit/src/canvod/audit/tiers/internal.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def compare_ephemeris_sources(
    store_broadcast: Any,
    store_agency: Any,
    *,
    group: str,
    variables: list[str] | None = None,
) -> ComparisonResult:
    """Compare broadcast vs agency (SP3/CLK) ephemeris augmentation.

    Parameters
    ----------
    store_broadcast : MyIcechunkStore
        Store augmented with broadcast ephemeris (from SBF SatVisibility).
    store_agency : MyIcechunkStore
        Store augmented with agency products (SP3 + CLK).
    group : str
        Date group key.

    Returns
    -------
    ComparisonResult
        SCIENTIFIC tier — broadcast ephemeris has lower accuracy than
        precise products (~1-2m vs ~2cm orbit accuracy).
    """
    ds_broadcast = store_broadcast.read_group(group)
    ds_agency = store_agency.read_group(group)

    # Default to satellite coordinate variables
    if variables is None:
        coord_vars = ["sat_x", "sat_y", "sat_z", "phi", "theta"]
        variables = [
            v
            for v in coord_vars
            if v in ds_broadcast.data_vars and v in ds_agency.data_vars
        ]

    return compare_datasets(
        ds_broadcast,
        ds_agency,
        variables=variables,
        tier=ToleranceTier.SCIENTIFIC,
        label=f"Broadcast vs Agency ephemeris — {group}",
        metadata={
            "comparison_type": "internal",
            "ephemeris_a": "broadcast",
            "ephemeris_b": "agency",
            "group": group,
        },
    )

Tiers — Regression

Regression verification against frozen reference outputs.

Freeze a known-good output as a checkpoint, then compare future outputs against it to detect regressions.

freeze_checkpoint(ds, output_path, *, metadata=None)

Save a dataset as a NetCDF checkpoint for future regression testing.

Parameters

ds : xarray.Dataset The known-good reference output. output_path : Path Where to write the checkpoint file (.nc). metadata : dict, optional Metadata to store as dataset attributes (git hash, date, config).

Returns

Path The written checkpoint path.

Source code in packages/canvod-audit/src/canvod/audit/tiers/regression.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def freeze_checkpoint(
    ds: Any,
    output_path: Path,
    *,
    metadata: dict[str, Any] | None = None,
) -> Path:
    """Save a dataset as a NetCDF checkpoint for future regression testing.

    Parameters
    ----------
    ds : xarray.Dataset
        The known-good reference output.
    output_path : Path
        Where to write the checkpoint file (.nc).
    metadata : dict, optional
        Metadata to store as dataset attributes (git hash, date, config).

    Returns
    -------
    Path
        The written checkpoint path.
    """
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    if metadata:
        ds = ds.assign_attrs(**{f"checkpoint_{k}": str(v) for k, v in metadata.items()})

    ds.to_netcdf(output_path)
    return output_path

compare_against_checkpoint(ds, checkpoint_path, *, variables=None, tier=ToleranceTier.EXACT, label='')

Compare a dataset against a frozen checkpoint.

Parameters

ds : xarray.Dataset Current output to verify. checkpoint_path : Path Path to the reference checkpoint (.nc file). variables : list[str], optional Variables to compare. Defaults to intersection. tier : ToleranceTier Default EXACT — regressions should produce identical output. label : str Comparison label.

Returns

ComparisonResult

Source code in packages/canvod-audit/src/canvod/audit/tiers/regression.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def compare_against_checkpoint(
    ds: Any,
    checkpoint_path: Path,
    *,
    variables: list[str] | None = None,
    tier: ToleranceTier = ToleranceTier.EXACT,
    label: str = "",
) -> ComparisonResult:
    """Compare a dataset against a frozen checkpoint.

    Parameters
    ----------
    ds : xarray.Dataset
        Current output to verify.
    checkpoint_path : Path
        Path to the reference checkpoint (.nc file).
    variables : list[str], optional
        Variables to compare. Defaults to intersection.
    tier : ToleranceTier
        Default EXACT — regressions should produce identical output.
    label : str
        Comparison label.

    Returns
    -------
    ComparisonResult
    """
    checkpoint_path = Path(checkpoint_path)
    ds_ref = xr.open_dataset(checkpoint_path)

    return compare_datasets(
        ds,
        ds_ref,
        variables=variables,
        tier=tier,
        label=label or f"Regression check vs {checkpoint_path.name}",
        metadata={
            "comparison_type": "regression",
            "checkpoint_path": str(checkpoint_path),
        },
    )

Tiers — External

External intercomparison with independent implementations.

Compare canvodpy outputs against gnssvod (Humphrey et al.) — the established community GNSS-VOD processing tool.

This module provides the low-level compare_vs_gnssvod function. For the full audit workflow (with AuditResult), use canvod.audit.runners.vs_gnssvod.audit_vs_gnssvod instead.

compare_vs_gnssvod(ds_canvod, ds_or_df_gnssvod, *, band_filter='L1|C', snr_col='S1C', vod_col='VOD1', variables=None, label='canvodpy vs gnssvod', gnssvod_epoch_col='Epoch', gnssvod_sid_col='SV')

Compare canvodpy output against gnssvod output for one band.

Uses GnssvodAdapter to project canvodpy into gnssvod variable space before comparison, ensuring identical variable names, units, and conventions.

Parameters

ds_canvod : xarray.Dataset canvodpy output (epoch, sid) dataset. ds_or_df_gnssvod : xarray.Dataset or pandas.DataFrame gnssvod output. If a pandas DataFrame, it is automatically converted to xarray. band_filter : str SID band suffix, e.g. "L1|C" or "L2|W". snr_col : str gnssvod SNR column name for this band. vod_col : str or None gnssvod VOD column name for this band. variables : list[str], optional Variables to compare. Defaults to all shared variables. label : str Comparison label. gnssvod_epoch_col, gnssvod_sid_col : str Column names in gnssvod DataFrame.

Returns

ComparisonResult SCIENTIFIC tier — two independent implementations.

Source code in packages/canvod-audit/src/canvod/audit/tiers/external.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def compare_vs_gnssvod(
    ds_canvod: Any,
    ds_or_df_gnssvod: Any,
    *,
    band_filter: str = "L1|C",
    snr_col: str = "S1C",
    vod_col: str | None = "VOD1",
    variables: list[str] | None = None,
    label: str = "canvodpy vs gnssvod",
    gnssvod_epoch_col: str = "Epoch",
    gnssvod_sid_col: str = "SV",
) -> ComparisonResult:
    """Compare canvodpy output against gnssvod output for one band.

    Uses ``GnssvodAdapter`` to project canvodpy into gnssvod variable
    space before comparison, ensuring identical variable names, units,
    and conventions.

    Parameters
    ----------
    ds_canvod : xarray.Dataset
        canvodpy output (epoch, sid) dataset.
    ds_or_df_gnssvod : xarray.Dataset or pandas.DataFrame
        gnssvod output. If a pandas DataFrame, it is automatically
        converted to xarray.
    band_filter : str
        SID band suffix, e.g. ``"L1|C"`` or ``"L2|W"``.
    snr_col : str
        gnssvod SNR column name for this band.
    vod_col : str or None
        gnssvod VOD column name for this band.
    variables : list[str], optional
        Variables to compare. Defaults to all shared variables.
    label : str
        Comparison label.
    gnssvod_epoch_col, gnssvod_sid_col : str
        Column names in gnssvod DataFrame.

    Returns
    -------
    ComparisonResult
        SCIENTIFIC tier — two independent implementations.
    """
    import pandas as pd

    if isinstance(ds_or_df_gnssvod, pd.DataFrame):
        ds_gnssvod = gnssvod_df_to_xarray(
            ds_or_df_gnssvod,
            epoch_col=gnssvod_epoch_col,
            sid_col=gnssvod_sid_col,
        )
    else:
        ds_gnssvod = ds_or_df_gnssvod

    # Project canvodpy into gnssvod space
    adapter = GnssvodAdapter(
        ds_canvod,
        band_filter=band_filter,
        snr_col=snr_col,
        vod_col=vod_col,
    )
    ds_adapted = adapter.to_gnssvod_dataset()

    # Handle azimuth wrap-around
    ds_adapted = _wrap_aware_azimuth_diff(ds_adapted, ds_gnssvod)

    # Build tolerance overrides
    tol_overrides = {}
    if snr_col in ds_adapted.data_vars:
        tol_overrides[snr_col] = GNSSVOD_TOLERANCES["SNR"]
    if "Azimuth" in ds_adapted.data_vars:
        tol_overrides["Azimuth"] = GNSSVOD_TOLERANCES["Azimuth"]
    if "Elevation" in ds_adapted.data_vars:
        tol_overrides["Elevation"] = GNSSVOD_TOLERANCES["Elevation"]
    if vod_col and vod_col in ds_adapted.data_vars:
        tol_overrides[vod_col] = GNSSVOD_TOLERANCES["VOD"]

    return compare_datasets(
        ds_adapted,
        ds_gnssvod,
        variables=variables,
        tier=ToleranceTier.SCIENTIFIC,
        tolerance_overrides=tol_overrides,
        label=label,
        metadata={
            "comparison_type": "external",
            "implementation_a": "canvodpy",
            "implementation_b": "gnssvod",
            "band": band_filter,
            "snr_col": snr_col,
            "vod_col": vod_col,
        },
    )

Reporting — Tables

Table export for audit results: LaTeX, Markdown, Polars.

to_polars(result)

Return per-variable stats as a polars DataFrame.

Source code in packages/canvod-audit/src/canvod/audit/reporting/tables.py
11
12
13
def to_polars(result: ComparisonResult) -> object:
    """Return per-variable stats as a polars DataFrame."""
    return result.to_polars()

to_markdown(result)

Render per-variable stats as a Markdown table.

Source code in packages/canvod-audit/src/canvod/audit/reporting/tables.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def to_markdown(result: ComparisonResult) -> str:
    """Render per-variable stats as a Markdown table."""
    df = result.to_polars()
    if df.is_empty():
        return "_No variables compared._"

    cols = df.columns
    lines = []
    lines.append("| " + " | ".join(cols) + " |")
    lines.append("| " + " | ".join(["---"] * len(cols)) + " |")

    for row in df.iter_rows(named=True):
        cells = []
        for c in cols:
            v = row[c]
            if isinstance(v, float):
                cells.append(f"{v:.6g}")
            else:
                cells.append(str(v))
        lines.append("| " + " | ".join(cells) + " |")

    return "\n".join(lines)

to_latex(result, *, caption='', label='')

Render per-variable stats as a LaTeX table.

Produces a tabular environment inside a table float, ready to paste into a paper.

Source code in packages/canvod-audit/src/canvod/audit/reporting/tables.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def to_latex(
    result: ComparisonResult,
    *,
    caption: str = "",
    label: str = "",
) -> str:
    """Render per-variable stats as a LaTeX table.

    Produces a ``tabular`` environment inside a ``table`` float, ready
    to paste into a paper.
    """
    df = result.to_polars()
    if df.is_empty():
        return "% No variables compared."

    cols = df.columns
    col_spec = "l" + "r" * (len(cols) - 1)

    lines = [
        r"\begin{table}[htbp]",
        r"\centering",
        f"\\caption{{{caption or result.label}}}",
    ]
    if label:
        lines.append(f"\\label{{{label}}}")

    lines.append(f"\\begin{{tabular}}{{{col_spec}}}")
    lines.append(r"\toprule")

    # Header
    header = " & ".join(f"\\textbf{{{c}}}" for c in cols) + r" \\"
    lines.append(header)
    lines.append(r"\midrule")

    # Rows
    for row in df.iter_rows(named=True):
        cells = []
        for c in cols:
            v = row[c]
            if isinstance(v, float):
                if abs(v) < 1e-3 or abs(v) > 1e4:
                    cells.append(f"${v:.2e}$")
                else:
                    cells.append(f"{v:.4f}")
            else:
                cells.append(str(v).replace("_", r"\_"))
        lines.append(" & ".join(cells) + r" \\")

    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    lines.append(r"\end{table}")

    return "\n".join(lines)

Reporting — Figures

Publication-quality figures for audit results.

plot_diff_histogram(ds_a, ds_b, variable, *, ax=None, bins=100, title=None)

Histogram of element-wise differences (a - b) for a single variable.

Returns a matplotlib Figure.

Source code in packages/canvod-audit/src/canvod/audit/reporting/figures.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def plot_diff_histogram(
    ds_a: Any,
    ds_b: Any,
    variable: str,
    *,
    ax: Any = None,
    bins: int = 100,
    title: str | None = None,
) -> Any:
    """Histogram of element-wise differences (a - b) for a single variable.

    Returns a matplotlib Figure.
    """
    import matplotlib.pyplot as plt

    a = ds_a[variable].values.ravel().astype(np.float64)
    b = ds_b[variable].values.ravel().astype(np.float64)
    mask = np.isfinite(a) & np.isfinite(b)
    diff = a[mask] - b[mask]

    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 4))
    else:
        fig = ax.get_figure()

    ax.hist(diff, bins=bins, color="#5D7D5B", edgecolor="#375D3B", alpha=0.8)
    ax.axvline(0, color="#C75050", linewidth=1, linestyle="--")
    ax.set_xlabel(f"Difference ({variable})")
    ax.set_ylabel("Count")
    ax.set_title(title or f"Distribution of differences — {variable}")

    # Annotate with stats
    _text = f"mean={np.mean(diff):.2e}\nstd={np.std(diff):.2e}\nmax|diff|={np.max(np.abs(diff)):.2e}"
    ax.text(
        0.98,
        0.95,
        _text,
        transform=ax.transAxes,
        ha="right",
        va="top",
        fontsize=8,
        family="monospace",
        bbox={"boxstyle": "round", "facecolor": "#E1E6B9", "alpha": 0.8},
    )

    fig.tight_layout()
    return fig

plot_scatter(ds_a, ds_b, variable, *, ax=None, title=None, label_a='Dataset A', label_b='Dataset B', max_points=50000)

Scatter plot of variable values: a vs b with 1:1 line.

Returns a matplotlib Figure.

Source code in packages/canvod-audit/src/canvod/audit/reporting/figures.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def plot_scatter(
    ds_a: Any,
    ds_b: Any,
    variable: str,
    *,
    ax: Any = None,
    title: str | None = None,
    label_a: str = "Dataset A",
    label_b: str = "Dataset B",
    max_points: int = 50000,
) -> Any:
    """Scatter plot of variable values: a vs b with 1:1 line.

    Returns a matplotlib Figure.
    """
    import matplotlib.pyplot as plt

    a = ds_a[variable].values.ravel().astype(np.float64)
    b = ds_b[variable].values.ravel().astype(np.float64)
    mask = np.isfinite(a) & np.isfinite(b)
    a, b = a[mask], b[mask]

    # Subsample for plotting performance
    if len(a) > max_points:
        idx = np.random.default_rng(42).choice(len(a), max_points, replace=False)
        a, b = a[idx], b[idx]

    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 6))
    else:
        fig = ax.get_figure()

    ax.scatter(b, a, s=1, alpha=0.3, color="#5D7D5B")

    # 1:1 line
    lims = [min(a.min(), b.min()), max(a.max(), b.max())]
    ax.plot(lims, lims, "k--", linewidth=0.8, label="1:1")

    ax.set_xlabel(f"{label_b}{variable}")
    ax.set_ylabel(f"{label_a}{variable}")
    ax.set_title(title or f"{variable}: {label_a} vs {label_b}")
    ax.set_aspect("equal")
    ax.legend(loc="upper left")

    fig.tight_layout()
    return fig

plot_summary_dashboard(result, *, figsize=(12, 6))

Multi-panel summary of a ComparisonResult.

Bar chart of RMSE per variable + pass/fail indicators. Returns a matplotlib Figure.

Source code in packages/canvod-audit/src/canvod/audit/reporting/figures.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def plot_summary_dashboard(
    result: ComparisonResult,
    *,
    figsize: tuple[float, float] = (12, 6),
) -> Any:
    """Multi-panel summary of a ComparisonResult.

    Bar chart of RMSE per variable + pass/fail indicators.
    Returns a matplotlib Figure.
    """
    import matplotlib.pyplot as plt

    variables = list(result.variable_stats.keys())
    if not variables:
        fig, ax = plt.subplots()
        ax.text(0.5, 0.5, "No variables compared", ha="center", va="center")
        return fig

    rmses = [result.variable_stats[v].rmse for v in variables]
    passed = [v not in result.failures for v in variables]
    colors = ["#5D7D5B" if p else "#C75050" for p in passed]

    fig, (ax1, ax2) = plt.subplots(
        1, 2, figsize=figsize, gridspec_kw={"width_ratios": [2, 1]}
    )

    # RMSE bar chart
    ax1.barh(variables, rmses, color=colors)
    ax1.set_xlabel("RMSE")
    ax1.set_title(f"{result.label} — RMSE per variable")
    ax1.invert_yaxis()

    # NaN agreement chart
    nan_agree = [result.variable_stats[v].nan_agreement_rate for v in variables]
    ax2.barh(variables, nan_agree, color="#ABC8A4")
    ax2.set_xlabel("NaN agreement rate")
    ax2.set_xlim(0, 1.05)
    ax2.set_title("NaN pattern agreement")
    ax2.invert_yaxis()

    status = "PASSED" if result.passed else "FAILED"
    fig.suptitle(f"{result.label}  [{status}]", fontweight="bold")
    fig.tight_layout()
    return fig