{"degradation_reason":"rolling 30d precision 0.353 < 0.6","degraded":true,"generated_at":"2026-06-21T04:05:26.565101+00:00","methodology_url":"https://voidly.ai/sentinel/methodology","notes":"prod_rolling reflects real alerts evaluated against observed incidents in the 7-day target window. training_holdout is the stratified holdout from the last training run — it sanity-checks the model but is not evidence of live performance. When prod_rolling.n_evaluated is small, defer to training_holdout. When it's large, prod_rolling is authoritative.","prod_rolling":{"accuracy":0.5011111111111111,"brier_score":0.2957,"calibration_bins":[{"bin_high":0.1,"bin_low":0,"n":698,"observed_rate":0.3438,"predicted_mean":0.0465},{"bin_high":0.2,"bin_low":0.1,"n":63,"observed_rate":0.2222,"predicted_mean":0.1504},{"bin_high":0.3,"bin_low":0.2,"n":2,"observed_rate":0,"predicted_mean":0.2655},{"bin_high":0.4,"bin_low":0.3,"n":7,"observed_rate":0.7143,"predicted_mean":0.3263},{"bin_high":0.5,"bin_low":0.4,"n":29,"observed_rate":0.5517,"predicted_mean":0.4508},{"bin_high":0.6,"bin_low":0.5,"n":9,"observed_rate":0.1111,"predicted_mean":0.5778},{"bin_high":0.7,"bin_low":0.6,"n":37,"observed_rate":0.3514,"predicted_mean":0.6392},{"bin_high":0.8,"bin_low":0.7,"n":7,"observed_rate":0.1429,"predicted_mean":0.7614},{"bin_high":0.9,"bin_low":0.8,"n":17,"observed_rate":0.4118,"predicted_mean":0.8413},{"bin_high":1,"bin_low":0.9,"n":31,"observed_rate":1,"predicted_mean":0.9485}],"calibration_mae":0.2737,"confusion":{"false_negative":183,"false_positive":266,"total":900,"true_negative":306,"true_positive":145},"n_evaluated":900,"per_country":{"BD":{"false_negative":14,"false_positive":2,"n":30,"precision":0.6,"recall":0.17647058823529413,"true_negative":11,"true_positive":3},"BR":{"false_negative":7,"false_positive":3,"n":30,"precision":0.25,"recall":0.125,"true_negative":19,"true_positive":1},"BY":{"false_negative":18,"false_positive":6,"n":30,"precision":0.4,"recall":0.18181818181818182,"true_negative":2,"true_positive":4},"CN":{"false_negative":1,"false_positive":22,"n":30,"precision":0,"recall":0,"true_negative":7,"true_positive":0},"CU":{"false_negative":11,"false_positive":6,"n":30,"precision":0,"recall":0,"true_negative":13,"true_positive":0},"EG":{"false_negative":0,"false_positive":10,"n":30,"precision":0.6666666666666666,"recall":1,"true_negative":0,"true_positive":20},"ER":{"false_negative":0,"false_positive":9,"n":30,"precision":0,"recall":null,"true_negative":21,"true_positive":0},"ET":{"false_negative":0,"false_positive":23,"n":30,"precision":0,"recall":null,"true_negative":7,"true_positive":0},"ID":{"false_negative":4,"false_positive":5,"n":30,"precision":0.6153846153846154,"recall":0.6666666666666666,"true_negative":13,"true_positive":8},"IN":{"false_negative":17,"false_positive":1,"n":30,"precision":0.9166666666666666,"recall":0.39285714285714285,"true_negative":1,"true_positive":11},"IR":{"false_negative":15,"false_positive":2,"n":30,"precision":0.8571428571428571,"recall":0.4444444444444444,"true_negative":1,"true_positive":12},"KP":{"false_negative":0,"false_positive":6,"n":30,"precision":0,"recall":null,"true_negative":24,"true_positive":0},"KZ":{"false_negative":11,"false_positive":8,"n":30,"precision":0.2,"recall":0.15384615384615385,"true_negative":9,"true_positive":2},"LB":{"false_negative":1,"false_positive":10,"n":30,"precision":0,"recall":0,"true_negative":19,"true_positive":0},"MM":{"false_negative":13,"false_positive":8,"n":30,"precision":0,"recall":0,"true_negative":9,"true_positive":0},"MY":{"false_negative":8,"false_positive":9,"n":30,"precision":0.18181818181818182,"recall":0.2,"true_negative":11,"true_positive":2},"NG":{"false_negative":1,"false_positive":16,"n":30,"precision":0,"recall":0,"true_negative":13,"true_positive":0},"NI":{"false_negative":6,"false_positive":11,"n":30,"precision":0.08333333333333333,"recall":0.14285714285714285,"true_negative":12,"true_positive":1},"PH":{"false_negative":8,"false_positive":8,"n":30,"precision":0,"recall":0,"true_negative":14,"true_positive":0},"PK":{"false_negative":2,"false_positive":0,"n":30,"precision":1,"recall":0.9333333333333333,"true_negative":0,"true_positive":28},"RU":{"false_negative":8,"false_positive":5,"n":30,"precision":0.4444444444444444,"recall":0.3333333333333333,"true_negative":13,"true_positive":4},"SA":{"false_negative":5,"false_positive":10,"n":30,"precision":0.09090909090909091,"recall":0.16666666666666666,"true_negative":14,"true_positive":1},"SD":{"false_negative":1,"false_positive":19,"n":30,"precision":0,"recall":0,"true_negative":10,"true_positive":0},"SY":{"false_negative":5,"false_positive":7,"n":30,"precision":0,"recall":0,"true_negative":18,"true_positive":0},"TH":{"false_negative":3,"false_positive":14,"n":30,"precision":0.17647058823529413,"recall":0.5,"true_negative":10,"true_positive":3},"TM":{"false_negative":1,"false_positive":12,"n":30,"precision":0,"recall":0,"true_negative":17,"true_positive":0},"TR":{"false_negative":5,"false_positive":17,"n":30,"precision":0.05555555555555555,"recall":0.16666666666666666,"true_negative":7,"true_positive":1},"UZ":{"false_negative":2,"false_positive":4,"n":30,"precision":0.8571428571428571,"recall":0.9230769230769231,"true_negative":0,"true_positive":24},"VE":{"false_negative":12,"false_positive":0,"n":30,"precision":1,"recall":0.6,"true_negative":0,"true_positive":18},"VN":{"false_negative":4,"false_positive":13,"n":30,"precision":0.13333333333333333,"recall":0.3333333333333333,"true_negative":11,"true_positive":2}},"precision":0.35279805352798055,"recall":0.4420731707317073,"window_days":30},"published_warning":"Stratified AUC overstates real-world performance by 50.8pp vs. time-based split. Do not cite the stratified number as a deployment figure; use the loco_median or the prod_rolling block once it populates.","schema":"voidly-sentinel-accuracy/v1","training_holdout":{"as_of":"2026-06-07T02:14:25.651493","f1":0.6813186813186813,"positive_rate":0.05946436677258284,"precision":0.6549295774647887,"recall":0.7099236641221374,"roc_auc":0.9672938341831472,"samples":2203,"source":"training_holdout","split_loco":{"f1_median":0.3043065361905941,"n_countries":21,"n_countries_evaluated":20,"notes":"Leave-country-out cross-validation. For each country, train on all other countries + eval on that country. The median is a more honest generalization figure than stratified. Individual per-country numbers reveal which countries the model struggles on (novel crises).","per_country":{"BD":{"auc":0.6088634867436711,"f1":0.16393442622950818,"n_pos_test":49,"n_test":731,"precision":0.4166666666666667,"recall":0.10204081632653061,"threshold":0.31000000000000005},"BY":{"auc":0.73406547369681,"f1":0.32098765432098764,"n_pos_test":49,"n_test":731,"precision":0.40625,"recall":0.2653061224489796,"threshold":0.08000000000000002},"CN":{"auc":0.9626808248691905,"f1":0.4375,"n_pos_test":9,"n_test":731,"precision":0.30434782608695654,"recall":0.7777777777777778,"threshold":0.4100000000000001},"CU":{"auc":0.4291845493562232,"f1":0,"n_pos_test":32,"n_test":731,"precision":0,"recall":0,"threshold":0.5},"EG":{"auc":0.6924246744606026,"f1":0.39622641509433965,"n_pos_test":63,"n_test":731,"precision":0.4883720930232558,"recall":0.3333333333333333,"threshold":0.17000000000000004},"ID":{"auc":0.9910220994475138,"f1":0.5185185185185185,"n_pos_test":7,"n_test":731,"precision":0.35,"recall":1,"threshold":0.27},"IR":{"auc":0.6094652769910501,"f1":0.24615384615384617,"n_pos_test":52,"n_test":731,"precision":0.20512820512820512,"recall":0.3076923076923077,"threshold":0.05},"KP":{"auc":null,"f1":null,"n_pos_test":0,"n_test":731,"note":"no positives in train or test","precision":null,"recall":null},"KZ":{"auc":0.851234519822416,"f1":0.5569620253164557,"n_pos_test":37,"n_test":731,"precision":0.5238095238095238,"recall":0.5945945945945946,"threshold":0.14},"MM":{"auc":0.8273638968481376,"f1":0.2696629213483146,"n_pos_test":33,"n_test":731,"precision":0.21428571428571427,"recall":0.36363636363636365,"threshold":0.16000000000000003},"MY":{"auc":0.6150627615062761,"f1":0.06153846153846154,"n_pos_test":14,"n_test":731,"precision":0.034482758620689655,"recall":0.2857142857142857,"threshold":0.05},"PK":{"auc":0.5227036499879139,"f1":0.28762541806020064,"n_pos_test":140,"n_test":731,"precision":0.27044025157232704,"recall":0.30714285714285716,"threshold":0.05},"RU":{"auc":0.49725778915919755,"f1":0.1702127659574468,"n_pos_test":71,"n_test":731,"precision":0.34782608695652173,"recall":0.11267605633802817,"threshold":0.18000000000000005},"SA":{"auc":0.8034893267651889,"f1":0.32558139534883723,"n_pos_test":35,"n_test":731,"precision":0.875,"recall":0.2,"threshold":0.45000000000000007},"SY":{"auc":0.7432045779685265,"f1":0.34146341463414637,"n_pos_test":32,"n_test":731,"precision":0.7777777777777778,"recall":0.21875,"threshold":0.4600000000000001},"TH":{"auc":0.9472160130054867,"f1":0.7213114754098361,"n_pos_test":28,"n_test":731,"precision":0.6666666666666666,"recall":0.7857142857142857,"threshold":0.24000000000000005},"TM":{"auc":0.6463438932058179,"f1":0,"n_pos_test":14,"n_test":731,"precision":0,"recall":0,"threshold":0.5},"TR":{"auc":0.581855500821018,"f1":0.2727272727272727,"n_pos_test":35,"n_test":731,"precision":0.22641509433962265,"recall":0.34285714285714286,"threshold":0.4100000000000001},"UZ":{"auc":0.733740191804708,"f1":0.37606837606837606,"n_pos_test":111,"n_test":731,"precision":0.35772357723577236,"recall":0.3963963963963964,"threshold":0.05},"VE":{"auc":0.9352265799634221,"f1":0.5084745762711864,"n_pos_test":28,"n_test":731,"precision":0.4838709677419355,"recall":0.5357142857142857,"threshold":0.19},"VN":{"auc":0.7955665024630543,"f1":0.26666666666666666,"n_pos_test":35,"n_test":731,"precision":0.6,"recall":0.17142857142857143,"threshold":0.2}},"precision_median":0.3538617886178862,"recall_median":0.30741758241758244,"roc_auc_max":0.9910220994475138,"roc_auc_median":0.733902832750759,"roc_auc_min":0.4291845493562232,"split":"loco"},"split_stratified":{"f1":0.6813186813186813,"n_test":2203,"notes":"Stratified random 15% holdout. Inflates AUC via within-country temporal leakage. Publish alongside time-based as the training-level sanity check, NOT as the deployment figure.","positive_rate":0.05946436677258284,"precision":0.6549295774647887,"recall":0.7099236641221374,"roc_auc":0.9672938341831472,"split":"stratified","threshold":0.24000000000000005},"split_summary":{"delta_stratified_vs_time_pp":50.79,"generated_at":"2026-06-21T03:00:11.923065+00:00","loco_median_auc":0.733902832750759,"loco_median_f1":0.3043065361905941,"schema":"voidly-sentinel-evaluation/v1","stratified_auc":0.9672938341831472,"stratified_f1":0.6813186813186813,"time_based_auc":0.45940327782056223,"time_based_f1":0.3256997455470738},"split_time":{"f1":0.3256997455470738,"n_test":2303,"notes":"Time-based split — train on data before T, test on data after T. This is the honest deployment-aligned figure. Treat as the Sentinel baseline when n_evaluated in prod is < 30.","positive_rate":0.1945288753799392,"precision":0.1945288753799392,"recall":1,"roc_auc":0.45940327782056223,"split":"time","test_date_range":["2026-02-18","2026-06-07"],"threshold":0.05,"train_date_range":["2024-06-07","2025-10-29"],"val_date_range":["2025-10-29","2026-02-18"]},"threshold":0.24000000000000005},"window_days":30,"_voidly_pay":{"claim_did":"https://voidly.ai/pay/claim","for_builders":"https://voidly.ai/pay/for-builders","marketplace":"https://api.voidly.ai/v1/pay/marketplace","live_demo":"https://huggingface.co/spaces/emperor-mew/voidly-pay","universal_proxy":{"url":"https://api.voidly.ai/v1/pay/proxy","example":"https://api.voidly.ai/v1/pay/proxy?u=<https-url>&to=did:voidly:<your-did>&price=0.01","page":"https://voidly.ai/pay/proxy","note":"Paywall any public HTTPS URL with one query param. No SDK install required."},"install":{"typescript":"npm install @voidly/pay","python":"pip install voidly-pay","langchain":"pip install voidly-pay-langchain","vercel_ai":"npm install @voidly/pay-vercel-ai","mcp":"npx @voidly/pay-mcp","cli":"npm install -g @voidly/pay-cli"},"note":"Voidly Pay is the open agent-to-agent payment rail. Claim a DID + 10 starter credits in 60s, or paywall any URL via the universal proxy with one query param."}}