{
    "schema_version": "1.0",
    "generated_at": "2026-06-28T02:59:55-05:00",
    "page_url": "https://mlb.gamedayanalytics.net/models/runs-under/methodology",
    "algorithm": "Gradient-boosted binary classifier + isotonic calibration",
    "target": {
        "name": "scored_zero",
        "type": "binary (0/1)",
        "definition": "P(player scores zero runs in the game). Equivalent to the UNDER on a 0.5 batter-runs prop. scored_zero=1 means the player did NOT score; the UNDER wins.",
        "units": "probability in [0,1]"
    },
    "architecture": {
        "description": "Isolated module alongside the rest of the GameDay Analytics platform. Owns its own DB tables (all prefixed ru_model_), its own Python package (python/ru_model/), its own API namespace (/api/runs-under/), and its own cron entry point.",
        "owns": {
            "tables_prefix": "ru_model_",
            "python_package": "python/ru_model/",
            "api_namespace": "/api/runs-under/"
        },
        "reads_only_shared_tables": [
            "games",
            "teams",
            "pitchers",
            "batters",
            "player_game_stats",
            "statcast_pitcher_stats",
            "lineups",
            "predicted_lineups",
            "player_props",
            "prop_history",
            "park_factors"
        ]
    },
    "active_model": {
        "id": 1,
        "version_tag": "v1.0-lgbm",
        "trained_at": "2026-06-27 21:22:18",
        "metrics": {
            "brier": 0.22447600193122613,
            "log_loss": 0.6436264311712481,
            "auc": 0.5780961515632932,
            "base_zero_rate": 0.647656940760389,
            "n_test": 3733
        }
    },
    "recent_versions": [
        {
            "id": 1,
            "version_tag": "v1.0-lgbm",
            "trained_at": "2026-06-27 21:22:18",
            "is_active": true,
            "metrics": {
                "brier": 0.22447600193122613,
                "log_loss": 0.6436264311712481,
                "auc": 0.5780961515632932
            }
        }
    ],
    "feature_groups": {
        "on_base": [
            "player_season_obp",
            "player_l7_obp",
            "player_l15_obp",
            "player_l15_reach_rate",
            "player_season_k_pct",
            "player_season_bb_pct"
        ],
        "lineup_context": [
            "batting_slot",
            "behind_obp",
            "behind_slg"
        ],
        "pitcher_suppression": [
            "sp_obp_against",
            "sp_k_pct",
            "sp_runners_on_rate",
            "sp_xwoba_against",
            "sp_hardhit_pct_against",
            "sp_barrel_pct_against"
        ],
        "environment": [
            "park_run_factor",
            "is_home",
            "is_night",
            "temperature"
        ],
        "team_offense": [
            "team_runs_per_game",
            "implied_team_total"
        ]
    },
    "feature_group_descriptions": {
        "on_base": "The batter's own propensity to reach base and score \u2014 season/recent on-base rate, runs-scored rate, plate-appearance volume. The dominant driver of whether they score at all.",
        "lineup_context": "Where the batter hits (batting_slot) and the run-scoring context around them \u2014 lineup spot, projected PAs, teammates' ability to drive them in.",
        "pitcher_suppression": "The opposing starter's ability to suppress baserunners and runs \u2014 K%, WHIP, ERA, and Statcast suppression signals for the listed starter.",
        "environment": "Park and game-environment factors: venue run environment, home/away, day/night, and park factors that shift offensive output.",
        "team_offense": "The batter's team offensive strength and the opponent's run-prevention \u2014 team runs-per-game, recent form, opponent staff context."
    },
    "feature_inventory": [
        "player_season_obp",
        "player_l7_obp",
        "player_l15_obp",
        "player_l15_reach_rate",
        "player_season_k_pct",
        "player_season_bb_pct",
        "batting_slot",
        "behind_obp",
        "behind_slg",
        "sp_obp_against",
        "sp_k_pct",
        "sp_runners_on_rate",
        "sp_xwoba_against",
        "sp_hardhit_pct_against",
        "sp_barrel_pct_against",
        "park_run_factor",
        "is_home",
        "is_night",
        "temperature",
        "team_runs_per_game",
        "implied_team_total"
    ],
    "calibration": {
        "approach": "Isotonic regression applied to the raw classifier output. The gradient-boosted booster produces raw_prob; isotonic mapping converts it to a calibrated prob_zero whose decile reliability matches observed zero-run rates.",
        "why": "Raw boosted-tree scores are not well-calibrated probabilities. Edge selection compares model P(zero) against the book's implied P(under), so the probability must be honest, not merely rank-correct.",
        "reliability": "The refinement report and /performance expose a 10-bucket reliability table: model_prob decile vs observed zero-run rate.",
        "activation_gate": "A new trained version only auto-activates if its cross-validated Brier score clears the configured ceiling (see version activation in the training pipeline)."
    },
    "sufficiency_gate": {
        "description": "The public page only publishes picks once the live model has accumulated enough graded picks AND its live Brier score is at or below the configured ceiling. Until then, the page shows a \"building sample\" state.",
        "min_graded": 150,
        "brier_ceiling": 0.18,
        "brier_live": "mean((model_prob - scored_zero)^2) over graded UNDER picks; null until any pick is graded.",
        "published_rule": "graded_count >= min_graded AND brier_live <= brier_ceiling"
    },
    "edge_logic": {
        "formula": "edge = (model P(zero) - implied P(under)) expressed in percentage points",
        "odds_filter": "Only consider books posting under odds at or below the configured threshold (avoids thin/long-shot prices).",
        "pick_selection": {
            "no_pick": "edge < min_edge_threshold (in pp), or odds outside the allowed band",
            "under": "edge >= min_edge_threshold AND under_odds within the allowed band"
        },
        "min_edge_threshold": 3,
        "odds_threshold": -160,
        "rationale": "UNDER-only: the model is trained on the zero-runs event, and the asymmetric prop (0.5 line) means UNDER is the only directionally-meaningful side. Threshold lives in ru_model_settings (versioned \u2014 every change inserts a new row)."
    },
    "grading_rules": [
        {
            "result": "win",
            "condition": "Player scored zero runs (scored_zero=1). The UNDER cleared.",
            "units": "+unit_size * (American-odds payout factor)"
        },
        {
            "result": "loss",
            "condition": "Player scored one or more runs (scored_zero=0). The UNDER missed.",
            "units": "-unit_size"
        },
        {
            "result": "void",
            "condition": "Player scratched/DNP after lock, or game postponed/cancelled.",
            "units": "0.00"
        }
    ],
    "loss_categories": {
        "scored_one": "Loss where the player scored exactly one run (near-miss).",
        "scored_multi": "Loss where the player scored two or more runs.",
        "dnp": "Void category \u2014 player did not play after lock; excluded from bankroll W-L."
    },
    "pipeline_phases": [
        {
            "phase": "seed",
            "description": "Pull the upcoming slate's probable lineups; upsert ru_model_players; build the feature frame; run the active booster + isotonic calibration; INSERT initial ru_model_predictions (prob_zero, raw_prob)."
        },
        {
            "phase": "repoll",
            "description": "Refresh lineups/odds; rebuild features; recompute predictions (only where is_locked=0); snapshot line+odds to ru_model_line_snapshots on change."
        },
        {
            "phase": "lock",
            "description": "At lock_minutes_before first pitch: snapshot features to ru_model_features; final prediction; set is_locked=1; pull latest line+odds; compute edge; if edge >= min_edge_threshold and odds in band \u2192 UNDER pick, else no_pick; INSERT ru_model_picks."
        },
        {
            "phase": "reconcile",
            "description": "For each finished game: read the player's runs from game stats; grade scored_zero vs the locked pick (win/loss/void); compute units at locked odds; assign loss_category; INSERT ru_model_results."
        },
        {
            "phase": "retrain",
            "description": "Re-pull historical batter-games in the training window; rebuild features; time-series CV; fit booster + isotonic; save joblib + metrics_json into a new ru_model_versions row. Auto-activates only if CV Brier clears the activation gate."
        }
    ],
    "limitations": [
        {
            "key": "lineup_lock_refresh",
            "description": "Once locked the prediction never refreshes, even if the lineup changes before first pitch."
        },
        {
            "key": "weather",
            "description": "Temperature/wind/humidity are not in the feature set."
        },
        {
            "key": "bullpen",
            "description": "Opposing bullpen quality/usage is only indirectly captured via team run-prevention; no per-reliever signal."
        },
        {
            "key": "in_game_state",
            "description": "No accounting for blowout/garbage-time substitutions that cut a batter's plate appearances short."
        }
    ],
    "settings": {
        "default_book": {
            "value": "Hard Rock Bet",
            "updated_at": "2026-06-27 02:40:49"
        },
        "feature_weight_overrides": {
            "value": {
                "on_base": 1,
                "lineup_context": 1,
                "pitcher_suppression": 1,
                "environment": 1,
                "team_offense": 1
            },
            "updated_at": "2026-06-27 02:40:49"
        },
        "min_edge_threshold": {
            "value": 3,
            "updated_at": "2026-06-27 02:40:49"
        },
        "odds_threshold": {
            "value": -160,
            "updated_at": "2026-06-27 02:40:49"
        },
        "poll_lead_minutes": {
            "value": 120,
            "updated_at": "2026-06-27 02:40:49"
        },
        "sufficiency_brier_ceiling": {
            "value": 0.18,
            "updated_at": "2026-06-27 02:40:49"
        },
        "sufficiency_min_graded": {
            "value": 150,
            "updated_at": "2026-06-27 02:40:49"
        },
        "unit_size": {
            "value": 25,
            "updated_at": "2026-06-27 02:40:49"
        }
    },
    "storage_layout": [
        {
            "table": "ru_model_players",
            "mutability": "upsert per game/player",
            "purpose": "One row per slate player-game \u2014 status, lineup_source, opponent, batting_slot."
        },
        {
            "table": "ru_model_predictions",
            "mutability": "mutable until is_locked=1",
            "purpose": "Current calibrated prob_zero (+ raw_prob) per player-game. Immutable after lock."
        },
        {
            "table": "ru_model_features",
            "mutability": "append-only at lock",
            "purpose": "Snapshot of every feature value used at lock time (audit trail)."
        },
        {
            "table": "ru_model_line_snapshots",
            "mutability": "append-only on change",
            "purpose": "Line + odds history per player-game (deduped \u2014 only writes on change)."
        },
        {
            "table": "ru_model_picks",
            "mutability": "append-only at lock",
            "purpose": "Final UNDER pick (under/no_pick/void) with locked line, odds, model_prob, edge."
        },
        {
            "table": "ru_model_results",
            "mutability": "append-only at grade",
            "purpose": "Graded outcome (win/loss/void), scored_zero, units, loss_category. One per pick."
        },
        {
            "table": "ru_model_versions",
            "mutability": "append-only, flag-flip activation",
            "purpose": "Trained-model artifacts with metrics; flip is_active to switch."
        },
        {
            "table": "ru_model_settings",
            "mutability": "append-only (versioned)",
            "purpose": "Editable knobs. Latest row per key wins."
        },
        {
            "table": "ru_model_feature_importance",
            "mutability": "append-only per version",
            "purpose": "Per-feature importance for each trained model version."
        }
    ],
    "storage_row_counts": {
        "player_games": 540,
        "predictions": 540,
        "picks_made": 16,
        "picks_graded": 98,
        "line_snapshots": 304,
        "feature_snapshots": 98,
        "model_versions": 1,
        "settings_rows": 16
    },
    "links": {
        "page": "/models/runs-under/methodology",
        "today_api": "/api/runs-under/today.php",
        "history_api": "/api/runs-under/history.php",
        "performance_api": "/api/runs-under/performance.php",
        "line_history": "/api/runs-under/line-history.php?player_game_id=N",
        "public_today": "/models/runs-under"
    }
}