{"default_threshold":0.5,"generated_at":"2026-05-22T04:00:04Z","honest_caveats":["All metrics computed on the last 30 production days, no train/test split. Forecast_7day is the only model with logged daily predictions + resolved outcomes; all other models report training-time test metrics until production logging is wired.","Calibration drift = empirical positive rate minus mean predicted probability over the window. Positive drift means the model is UNDER-predicting (real world rate > what the model said); negative means OVER-predicting.","Empirical precision/recall at threshold 0.5 may be weaker than the model's headline test AUC because we evaluate at a fixed operating point rather than across the full ROC. Forecast_7day's optimal threshold post-2026-05-21 retrain is 0.27; metrics_at_0.25 included for that operating point.","Models marked kind='training-time-only' have n_predictions=0 in the live track record because per-request predictions are not yet persisted. Adding production logging is tracked separately."],"models":{"contagion_chain_7d":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/atlas/contagion-chain/{trigger}","mean_predicted_prob":null,"model_id":"contagion-chain-v1-7d","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/contagion_chain_v1.json","training_time_metrics":{"overall_test_ap":0.5134118545225828,"overall_test_auc":0.6983610717603259,"overall_test_brier":0.1847886341975772,"test_ap":0.4928577559110649,"test_auc":0.6696949571962517,"test_brier":0.18587975807485208,"test_n_test":3102,"test_pos_rate":0.25435203094777564}},"duration_rsf":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/duration","mean_predicted_prob":null,"model_id":"shutdown-duration-rsf-v1","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/shutdown_cohorts_v1.json","training_time_metrics":null},"forecast_7day":{"abs_calibration_drift_pp":0.3,"brier":0.1184,"calibrated_raw_labels_view":{"abs_calibration_drift_pp":45.12,"brier":0.445,"calibration_drift_pp":45.12,"empirical_positive_rate":0.5917,"empirical_precision_at_threshold":0.5916083916083916,"empirical_recall_at_threshold":0.9929577464788732,"mean_predicted_prob":0.1405,"n_above_threshold":715,"n_false_negatives":3,"n_false_positives":292,"n_observed_positives":426,"n_predictions":720,"n_true_negatives":2,"n_true_positives":423,"note":"Calibrator applied, observed labels unchanged. Isolates the calibration fix from the disruption-relabel.","threshold":0.05},"calibration_drift_pp":-0.3,"calibrator_applied":true,"calibrator_floor":0.019,"caveats":["Headline n_above_threshold + precision/recall are at the OPERATIONAL threshold (0.05) literally emitted by the model at request time — this is what would have triggered a real alert.","metrics_at_0.5 + metrics_at_0.25 are for journalist comparability with other models' headline cuts; expect near-zero recall at 0.5 because the deployed model rarely crosses 0.5 (most predictions are low-risk by base rate).","Outcomes for the headline are recomputed from voidly_data.incidents using incident_type IN ('censorship','mixed') — matching the forecast model's post-2026-05-21 disruption-exclusion rule. The legacy sentinel_outcomes.observed (still in DB) treats every incident as positive, including IODA disruption — that view is surfaced as raw_uncalibrated_raw_labels_view.","Calibrated probabilities = live isotonic calibrator at forecast_calibrator_v2_isotonic_prod.pkl applied to the stored raw prob with the same gate as forecast_api (country in WATCHED_FOR_CALIBRATION (30) AND raw_prob >= 0.019)."],"empirical_positive_rate":0.1375,"empirical_precision_at_threshold":0.13846153846153847,"empirical_recall_at_threshold":1,"kind":"production-logged","live_endpoint":"/v1/forecast/{cc}/7day","mean_predicted_prob":0.1405,"metrics_at_0.25":{"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":0,"n_above_threshold":0,"n_false_negatives":99,"n_false_positives":0,"n_observed_positives":99,"n_predictions":720,"n_true_negatives":621,"n_true_positives":0,"threshold":0.25},"metrics_at_0.5":{"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":0,"n_above_threshold":0,"n_false_negatives":99,"n_false_positives":0,"n_observed_positives":99,"n_predictions":720,"n_true_negatives":621,"n_true_positives":0,"threshold":0.5},"model_id":"forecast-v1-7day","n_above_threshold":715,"n_false_negatives":0,"n_false_positives":616,"n_observed_positives":99,"n_observed_relabeled_from_disruption_filter":337,"n_predictions":720,"n_true_negatives":5,"n_true_positives":99,"operational_threshold":0.05,"per_country_threshold_view":{"empirical_precision_at_threshold":0.13846153846153847,"empirical_recall_at_threshold":1,"n_above_threshold":715,"n_false_negatives":0,"n_false_positives":616,"n_observed_positives":99,"n_predictions":720,"n_true_negatives":5,"n_true_positives":99,"note":"Same calibrated probabilities + clean labels as the headline, but scored at per-country F1-optimal thresholds from forecast_per_country_thresholds.json. This is what the headline would look like if sentinel-daily-eval consumed the sidecar for every country (the wiring landed 2026-05-21 — historical rows in this window predate the wiring and were logged at threshold=0.05).","thresholds_source":"/opt/voidly-ai/ml-deploy/forecast_per_country_thresholds.json"},"raw_uncalibrated_raw_labels_view":{"abs_calibration_drift_pp":54.18,"brier":0.5356,"calibration_drift_pp":54.18,"empirical_positive_rate":0.5917,"empirical_precision_at_threshold":0.6627906976744186,"empirical_recall_at_threshold":0.4014084507042254,"mean_predicted_prob":0.0498,"n_above_threshold":258,"n_false_negatives":255,"n_false_positives":87,"n_observed_positives":426,"n_predictions":720,"n_true_negatives":207,"n_true_positives":171,"note":"Stored raw probabilities scored against original sentinel_outcomes.observed (includes IODA disruption). Surfaced for transparency; do NOT cite as the model's honest production performance.","threshold":0.05},"thresholds_seen_in_window":[0.05]},"hourly_12h":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/hourly","mean_predicted_prob":null,"model_id":"hourly-forecast-v1-12h","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/hourly_forecast_features.meta.json","training_time_metrics":null},"hourly_24h":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/hourly","mean_predicted_prob":null,"model_id":"hourly-forecast-v1-24h","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/hourly_forecast_features.meta.json","training_time_metrics":null},"hourly_6h":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/hourly","mean_predicted_prob":null,"model_id":"hourly-forecast-v1-6h","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/hourly_forecast_features.meta.json","training_time_metrics":null},"multi_horizon_1d":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/multi-horizon","mean_predicted_prob":null,"model_id":"forecast-multi-horizon-1d","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/forecast_metrics.json","training_time_metrics":{"f1":0.6413043478260869,"roc_auc":0.9546571995724539}},"multi_horizon_30d":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/multi-horizon","mean_predicted_prob":null,"model_id":"forecast-multi-horizon-30d","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/forecast_metrics.json","training_time_metrics":{"f1":0.6413043478260869,"roc_auc":0.9546571995724539}},"multi_horizon_7d":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/multi-horizon","mean_predicted_prob":null,"model_id":"forecast-multi-horizon-7d","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/forecast_metrics.json","training_time_metrics":{"f1":0.6413043478260869,"roc_auc":0.9546571995724539}},"trajectory_d1":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/trajectory","mean_predicted_prob":null,"model_id":"trajectory-v1-d1","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/trajectory_metrics.json","training_time_metrics":null},"trajectory_d30":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/trajectory","mean_predicted_prob":null,"model_id":"trajectory-v1-d30","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/trajectory_metrics.json","training_time_metrics":null},"trajectory_d7":{"abs_calibration_drift_pp":null,"brier":null,"calibration_drift_pp":null,"caveats":["Production prediction logging is not yet wired for this model; this row reflects held-out test metrics, NOT live precision/recall. Until per-request predictions are persisted to a forecasts table we cannot honestly compute an empirical prod track record."],"empirical_positive_rate":null,"empirical_precision_at_threshold":null,"empirical_recall_at_threshold":null,"kind":"training-time-only","live_endpoint":"/v1/forecast/{cc}/trajectory","mean_predicted_prob":null,"model_id":"trajectory-v1-d7","n_above_threshold":0,"n_predictions":0,"n_true_positives":0,"training_metrics_source":"/opt/voidly-ai/ml-deploy/trajectory_metrics.json","training_time_metrics":null}},"most_calibrated_model":"forecast_7day","most_drifted_model":"forecast_7day","schema":"voidly-prediction-track-record/v1","window_days":30,"window_start":"2026-04-22","_voidly_pay":{"claim_did":"https://voidly.ai/pay/claim","for_builders":"https://voidly.ai/pay/for-builders","marketplace":"https://api.voidly.ai/v1/pay/marketplace","live_demo":"https://huggingface.co/spaces/emperor-mew/voidly-pay","universal_proxy":{"url":"https://api.voidly.ai/v1/pay/proxy","example":"https://api.voidly.ai/v1/pay/proxy?u=<https-url>&to=did:voidly:<your-did>&price=0.01","page":"https://voidly.ai/pay/proxy","note":"Paywall any public HTTPS URL with one query param. No SDK install required."},"install":{"typescript":"npm install @voidly/pay","python":"pip install voidly-pay","langchain":"pip install voidly-pay-langchain","vercel_ai":"npm install @voidly/pay-vercel-ai","mcp":"npx @voidly/pay-mcp","cli":"npm install -g @voidly/pay-cli"},"note":"Voidly Pay is the open agent-to-agent payment rail. Claim a DID + 10 starter credits in 60s, or paywall any URL via the universal proxy with one query param."}}