Model: 1211

The following results repeat the experiment with gpt-5.2-2025-12-11.

The individual results can be found in the separate directories.

Aggregate Results

The first section compiles the individual results from different ground truth distributions into a single file for the results processed by this model – gpt-5.2-2025-12-11.

library(tidyverse)
library(gtExtras)
# Beta
load("./beta-1211/CompleteBeta.1211.RData")
Complete.Beta.1211$name <- "Beta"
Complete.Beta.1211 <- Complete.Beta.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rbeta-"),"-",","),")",sep=""))

# Binomial
load("./binomial-1211/Complete.Binomial.1211.RData")
Complete.Binomial.1211$name <- "Binomial"
Complete.Binomial.1211 <- Complete.Binomial.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rbinom-"),"-",","),")",sep=""))

# Chi-square
load("./chisquare-1211/CompleteChisq.1211.RData")
Complete.Chisq.1211$name <- "Chi-square"
Complete.Chisq.1211 <- Complete.Chisq.1211 |> mutate(Dist = paste0(name,"(",str_remove(key, "rchisq-"),")",sep=""))


# Gamma
load("./gamma-1211/CompleteGamma.1211.RData")
Complete.Gamma.1211$name <- "Gamma"
Complete.Gamma.1211 <- Complete.Gamma.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rgamma-"),"-",","),")",sep=""))

# Geometric
load("./geometric-1211/Complete.Geometric.1211.RData")
Complete.Geometric.1211$name <- "Geometric"
Complete.Geometric.1211 <- Complete.Geometric.1211 |> mutate(Dist = paste0(name,"(",str_remove(key, "rgeom-"),")",sep=""))

# Lognormal
load("./lognormal-1211/CompleteLognormal.1211.RData")
Complete.Lognormal.1211$name <- "Lognormal"
Complete.Lognormal.1211 <- Complete.Lognormal.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rlnorm-"),"-",","),")",sep=""))

# Normal
load("./normal-1211/CompleteNormal.1211.RData")
Complete.Normal.1211$name <- "Normal"
Complete.Normal.1211 <- Complete.Normal.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rnorm-"),"-",","),")",sep=""))

# Poisson
load("./poisson-1211/CompletePoisson.1211.RData")
Complete.Poisson.1211$name <- "Poisson"
Complete.Poisson.1211 <- Complete.Poisson.1211 |> mutate(Dist = paste0(name,"(",str_remove(key, "rpois-"),")",sep=""))

# Uniform
load("./uniform-1211/Complete.Uniform.1211.RData")
Complete.Uniform.1211$name <- "Uniform"
Complete.Uniform.1211 <- Complete.Uniform.1211 |> mutate(Dist = paste0(name,"[",str_replace(str_remove(key, "runif-"),"-",","),"]",sep=""))

# Bind Together
Complete.Data.1211 <- dplyr::bind_rows(Complete.Beta.1211, Complete.Binomial.1211, Complete.Chisq.1211, Complete.Gamma.1211, Complete.Geometric.1211, Complete.Lognormal.1211, Complete.Normal.1211, Complete.Poisson.1211, Complete.Uniform.1211)

# Create size and Full Distribution
Complete.Data.1211 <- Complete.Data.1211 |> rowwise() |> mutate(Size = length(data))
Complete.Data.1211 <- Complete.Data.1211 |> mutate(Full.Dist = paste0(Dist,"-",Size),sep="")
# Save Compiled file
save(Complete.Data.1211, file="Complete.Data.All.1211.RData")

Collate Complete Files

This section collects the four distinct results sets and binds them together.

load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.0409.RData")
load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.0613.RData")
load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.0806.RData")
load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.1211.RData")
Complete.Data.01.2026 <- bind_rows(Complete.Data.0409, Complete.Data.0613, Complete.Data.0806, Complete.Data.1211)

Results

A first table of pure raw responses comes next. This provides the foundation for cleaning up similar labels with variation in spelling and capitalization.

Complete.Data.01.2026$Fresh.Outcome <- c(1:196000) %>% map_chr(., function(x) {Complete.Data.01.2026$response$body$choices[[x]]$message$content})
Complete.Data.01.2026$model <- Complete.Data.01.2026$body$model
Complete.Data.01.2026 |> group_by(Fresh.Outcome, model) |> summarise(Count = n()) |>  ungroup() |> pivot_wider(names_from = model, values_from = Count) |> gt()
`summarise()` has grouped output by 'Fresh.Outcome'. You can override using the
`.groups` argument.
Fresh.Outcome gpt-5.2-2025-12-11 gpt-4-0613 gpt-4-turbo-2024-04-09 gpt-4o-2024-08-06
15 NA NA NA
Arcsine 36 NA NA NA
Bernoulli 744 1071 690 835
Beta 4165 14 8885 2141
Betabinomial 33 NA NA NA
Bimodal NA 49 NA NA
Binomial 15525 84 1969 1028
Categorical 86 NA NA NA
Cauchy 35 NA 6 NA
Chisquare 9 NA NA NA
Degenerate 71 71 71 71
ExGaussian 3 NA NA NA
Exponential 196 156 930 5266
Gamma 2714 NA 517 17
Geometric 583 NA NA NA
Gumbel 76 NA NA NA
Laplace 3 NA 454 NA
Log-Normal NA NA NA 3
Log-normal NA NA NA 938
Logistic NA NA 7 NA
Lognormal 8127 3979 7974 468
Mixture 1 NA 1 NA
Multinomial 24 NA NA NA
NegBinomial 5 NA NA NA
Negative Binomial NA 5 NA NA
NegativeBinomial 224 4 NA NA
Negativebinomial 197 NA NA NA
Negbinomial 1 NA NA NA
Normal 5201 29210 3230 12090
Pareto 83 NA 126 142
Pascal 77 NA NA NA
Poisson 8553 12981 22764 17042
Skew-normal NA NA 10 NA
Skew-right NA 3 NA NA
SkewNormal NA NA 160 NA
Skewed NA NA NA 1
Skewness NA 245 5 NA
Skewnormal 15 NA 3 NA
Student 29 NA NA NA
Studentt 1 NA NA NA
Triangular 16 NA 1 NA
Uniform 1250 1128 1141 8557
Weibull 78 NA 56 400
ZIP 1 NA NA NA
Zero-inflated NA NA NA 1
ZeroInflatedPoisson 4 NA NA NA
ZeroinflatedPoisson 8 NA NA NA
Zipf 2 NA NA NA
arcsine 1 NA NA NA
beta 4 NA NA NA
binomial 1 NA NA NA
categorical 3 NA NA NA
exgaussian 1 NA NA NA
exponential 3 NA NA NA
gamma 33 NA NA NA
geometric 3 NA NA NA
lognormal 723 NA NA NA
negativebinomial 8 NA NA NA
normal 11 NA NA NA
skewnormal 3 NA NA NA
t 13 NA NA NA
uniform 2 NA NA NA

Cleaned a bit

This is the minimal table with cleaned responses. It is still far too large for usefulness.

Complete.Data.01.2026 <- Complete.Data.01.2026 |> 
  mutate(Outcome = case_match(Fresh.Outcome, 
                              c("arcsine","Arcsine") ~ "Arcsine",
                              c("categorical","Categorical") ~ "Categorical",
                              c("exponential","Exponential") ~ "Exponential",
                              c("beta","Beta") ~ "Beta",
                              c("exgaussian","ExGaussian") ~ "ExGaussian",
                              c("Normal","normal") ~ "Normal",
                              c("binomial","Binomial") ~ "Binomial",
                              c("geometric","Geometric") ~ "Geometric",
                              c("gamma","Gamma") ~ "Gamma",                              c("Lognormal","lognormal","Log-Normal","Log-normal") ~ "Lognormal",
                              c("NegativeBinomial","Negative Binomial","NegBinomial","Negbinomial","negativebinomial","Negativebinomial") ~ "Negative Binomial",
                              c("Skewed","Skewnormal","SkewNormal","Skew-normal","Skew-right","Skewness","skewnormal")~ "Skewed",
                              c("Student","Studentt","Studentt","t")~ "Student's t", c("Uniform","uniform")~ "Uniform",
                              c("ZeroInflatedPoisson","ZeroinflatedPoisson","Zero-inflated","ZIP")~ "ZIP",
                              .default = Fresh.Outcome))
Complete.Data.01.2026 |> group_by(Outcome, model) |> summarise(Count = n()) |>  ungroup() |> pivot_wider(names_from = model, values_from = Count) |> gt()
`summarise()` has grouped output by 'Outcome'. You can override using the
`.groups` argument.
Outcome gpt-5.2-2025-12-11 gpt-4-0613 gpt-4-turbo-2024-04-09 gpt-4o-2024-08-06
15 NA NA NA
Arcsine 37 NA NA NA
Bernoulli 744 1071 690 835
Beta 4169 14 8885 2141
Betabinomial 33 NA NA NA
Bimodal NA 49 NA NA
Binomial 15526 84 1969 1028
Categorical 89 NA NA NA
Cauchy 35 NA 6 NA
Chisquare 9 NA NA NA
Degenerate 71 71 71 71
ExGaussian 4 NA NA NA
Exponential 199 156 930 5266
Gamma 2747 NA 517 17
Geometric 586 NA NA NA
Gumbel 76 NA NA NA
Laplace 3 NA 454 NA
Logistic NA NA 7 NA
Lognormal 8850 3979 7974 1409
Mixture 1 NA 1 NA
Multinomial 24 NA NA NA
Negative Binomial 435 9 NA NA
Normal 5212 29210 3230 12090
Pareto 83 NA 126 142
Pascal 77 NA NA NA
Poisson 8553 12981 22764 17042
Skewed 18 248 178 1
Student's t 43 NA NA NA
Triangular 16 NA 1 NA
Uniform 1252 1128 1141 8557
Weibull 78 NA 56 400
ZIP 13 NA NA 1
Zipf 2 NA NA NA