library(tidyverse)
library(gtExtras)
# Beta
load("./beta-1211/CompleteBeta.1211.RData")
Complete.Beta.1211$name <- "Beta"
Complete.Beta.1211 <- Complete.Beta.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rbeta-"),"-",","),")",sep=""))
# Binomial
load("./binomial-1211/Complete.Binomial.1211.RData")
Complete.Binomial.1211$name <- "Binomial"
Complete.Binomial.1211 <- Complete.Binomial.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rbinom-"),"-",","),")",sep=""))
# Chi-square
load("./chisquare-1211/CompleteChisq.1211.RData")
Complete.Chisq.1211$name <- "Chi-square"
Complete.Chisq.1211 <- Complete.Chisq.1211 |> mutate(Dist = paste0(name,"(",str_remove(key, "rchisq-"),")",sep=""))
# Gamma
load("./gamma-1211/CompleteGamma.1211.RData")
Complete.Gamma.1211$name <- "Gamma"
Complete.Gamma.1211 <- Complete.Gamma.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rgamma-"),"-",","),")",sep=""))
# Geometric
load("./geometric-1211/Complete.Geometric.1211.RData")
Complete.Geometric.1211$name <- "Geometric"
Complete.Geometric.1211 <- Complete.Geometric.1211 |> mutate(Dist = paste0(name,"(",str_remove(key, "rgeom-"),")",sep=""))
# Lognormal
load("./lognormal-1211/CompleteLognormal.1211.RData")
Complete.Lognormal.1211$name <- "Lognormal"
Complete.Lognormal.1211 <- Complete.Lognormal.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rlnorm-"),"-",","),")",sep=""))
# Normal
load("./normal-1211/CompleteNormal.1211.RData")
Complete.Normal.1211$name <- "Normal"
Complete.Normal.1211 <- Complete.Normal.1211 |> mutate(Dist = paste0(name,"(",str_replace(str_remove(key, "rnorm-"),"-",","),")",sep=""))
# Poisson
load("./poisson-1211/CompletePoisson.1211.RData")
Complete.Poisson.1211$name <- "Poisson"
Complete.Poisson.1211 <- Complete.Poisson.1211 |> mutate(Dist = paste0(name,"(",str_remove(key, "rpois-"),")",sep=""))
# Uniform
load("./uniform-1211/Complete.Uniform.1211.RData")
Complete.Uniform.1211$name <- "Uniform"
Complete.Uniform.1211 <- Complete.Uniform.1211 |> mutate(Dist = paste0(name,"[",str_replace(str_remove(key, "runif-"),"-",","),"]",sep=""))
# Bind Together
Complete.Data.1211 <- dplyr::bind_rows(Complete.Beta.1211, Complete.Binomial.1211, Complete.Chisq.1211, Complete.Gamma.1211, Complete.Geometric.1211, Complete.Lognormal.1211, Complete.Normal.1211, Complete.Poisson.1211, Complete.Uniform.1211)
# Create size and Full Distribution
Complete.Data.1211 <- Complete.Data.1211 |> rowwise() |> mutate(Size = length(data))
Complete.Data.1211 <- Complete.Data.1211 |> mutate(Full.Dist = paste0(Dist,"-",Size),sep="")
# Save Compiled file
save(Complete.Data.1211, file="Complete.Data.All.1211.RData")Model: 1211
The following results repeat the experiment with gpt-5.2-2025-12-11.
The individual results can be found in the separate directories.
Aggregate Results
The first section compiles the individual results from different ground truth distributions into a single file for the results processed by this model – gpt-5.2-2025-12-11.
Collate Complete Files
This section collects the four distinct results sets and binds them together.
load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.0409.RData")
load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.0613.RData")
load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.0806.RData")
load("~/Nextcloud/LLM_Distributions/Final-Prompt-Results/Complete.Data.All.1211.RData")
Complete.Data.01.2026 <- bind_rows(Complete.Data.0409, Complete.Data.0613, Complete.Data.0806, Complete.Data.1211)Results
A first table of pure raw responses comes next. This provides the foundation for cleaning up similar labels with variation in spelling and capitalization.
Complete.Data.01.2026$Fresh.Outcome <- c(1:196000) %>% map_chr(., function(x) {Complete.Data.01.2026$response$body$choices[[x]]$message$content})
Complete.Data.01.2026$model <- Complete.Data.01.2026$body$model
Complete.Data.01.2026 |> group_by(Fresh.Outcome, model) |> summarise(Count = n()) |> ungroup() |> pivot_wider(names_from = model, values_from = Count) |> gt()`summarise()` has grouped output by 'Fresh.Outcome'. You can override using the
`.groups` argument.
| Fresh.Outcome | gpt-5.2-2025-12-11 | gpt-4-0613 | gpt-4-turbo-2024-04-09 | gpt-4o-2024-08-06 |
|---|---|---|---|---|
| 15 | NA | NA | NA | |
| Arcsine | 36 | NA | NA | NA |
| Bernoulli | 744 | 1071 | 690 | 835 |
| Beta | 4165 | 14 | 8885 | 2141 |
| Betabinomial | 33 | NA | NA | NA |
| Bimodal | NA | 49 | NA | NA |
| Binomial | 15525 | 84 | 1969 | 1028 |
| Categorical | 86 | NA | NA | NA |
| Cauchy | 35 | NA | 6 | NA |
| Chisquare | 9 | NA | NA | NA |
| Degenerate | 71 | 71 | 71 | 71 |
| ExGaussian | 3 | NA | NA | NA |
| Exponential | 196 | 156 | 930 | 5266 |
| Gamma | 2714 | NA | 517 | 17 |
| Geometric | 583 | NA | NA | NA |
| Gumbel | 76 | NA | NA | NA |
| Laplace | 3 | NA | 454 | NA |
| Log-Normal | NA | NA | NA | 3 |
| Log-normal | NA | NA | NA | 938 |
| Logistic | NA | NA | 7 | NA |
| Lognormal | 8127 | 3979 | 7974 | 468 |
| Mixture | 1 | NA | 1 | NA |
| Multinomial | 24 | NA | NA | NA |
| NegBinomial | 5 | NA | NA | NA |
| Negative Binomial | NA | 5 | NA | NA |
| NegativeBinomial | 224 | 4 | NA | NA |
| Negativebinomial | 197 | NA | NA | NA |
| Negbinomial | 1 | NA | NA | NA |
| Normal | 5201 | 29210 | 3230 | 12090 |
| Pareto | 83 | NA | 126 | 142 |
| Pascal | 77 | NA | NA | NA |
| Poisson | 8553 | 12981 | 22764 | 17042 |
| Skew-normal | NA | NA | 10 | NA |
| Skew-right | NA | 3 | NA | NA |
| SkewNormal | NA | NA | 160 | NA |
| Skewed | NA | NA | NA | 1 |
| Skewness | NA | 245 | 5 | NA |
| Skewnormal | 15 | NA | 3 | NA |
| Student | 29 | NA | NA | NA |
| Studentt | 1 | NA | NA | NA |
| Triangular | 16 | NA | 1 | NA |
| Uniform | 1250 | 1128 | 1141 | 8557 |
| Weibull | 78 | NA | 56 | 400 |
| ZIP | 1 | NA | NA | NA |
| Zero-inflated | NA | NA | NA | 1 |
| ZeroInflatedPoisson | 4 | NA | NA | NA |
| ZeroinflatedPoisson | 8 | NA | NA | NA |
| Zipf | 2 | NA | NA | NA |
| arcsine | 1 | NA | NA | NA |
| beta | 4 | NA | NA | NA |
| binomial | 1 | NA | NA | NA |
| categorical | 3 | NA | NA | NA |
| exgaussian | 1 | NA | NA | NA |
| exponential | 3 | NA | NA | NA |
| gamma | 33 | NA | NA | NA |
| geometric | 3 | NA | NA | NA |
| lognormal | 723 | NA | NA | NA |
| negativebinomial | 8 | NA | NA | NA |
| normal | 11 | NA | NA | NA |
| skewnormal | 3 | NA | NA | NA |
| t | 13 | NA | NA | NA |
| uniform | 2 | NA | NA | NA |
Cleaned a bit
This is the minimal table with cleaned responses. It is still far too large for usefulness.
Complete.Data.01.2026 <- Complete.Data.01.2026 |>
mutate(Outcome = case_match(Fresh.Outcome,
c("arcsine","Arcsine") ~ "Arcsine",
c("categorical","Categorical") ~ "Categorical",
c("exponential","Exponential") ~ "Exponential",
c("beta","Beta") ~ "Beta",
c("exgaussian","ExGaussian") ~ "ExGaussian",
c("Normal","normal") ~ "Normal",
c("binomial","Binomial") ~ "Binomial",
c("geometric","Geometric") ~ "Geometric",
c("gamma","Gamma") ~ "Gamma", c("Lognormal","lognormal","Log-Normal","Log-normal") ~ "Lognormal",
c("NegativeBinomial","Negative Binomial","NegBinomial","Negbinomial","negativebinomial","Negativebinomial") ~ "Negative Binomial",
c("Skewed","Skewnormal","SkewNormal","Skew-normal","Skew-right","Skewness","skewnormal")~ "Skewed",
c("Student","Studentt","Studentt","t")~ "Student's t", c("Uniform","uniform")~ "Uniform",
c("ZeroInflatedPoisson","ZeroinflatedPoisson","Zero-inflated","ZIP")~ "ZIP",
.default = Fresh.Outcome))
Complete.Data.01.2026 |> group_by(Outcome, model) |> summarise(Count = n()) |> ungroup() |> pivot_wider(names_from = model, values_from = Count) |> gt()`summarise()` has grouped output by 'Outcome'. You can override using the
`.groups` argument.
| Outcome | gpt-5.2-2025-12-11 | gpt-4-0613 | gpt-4-turbo-2024-04-09 | gpt-4o-2024-08-06 |
|---|---|---|---|---|
| 15 | NA | NA | NA | |
| Arcsine | 37 | NA | NA | NA |
| Bernoulli | 744 | 1071 | 690 | 835 |
| Beta | 4169 | 14 | 8885 | 2141 |
| Betabinomial | 33 | NA | NA | NA |
| Bimodal | NA | 49 | NA | NA |
| Binomial | 15526 | 84 | 1969 | 1028 |
| Categorical | 89 | NA | NA | NA |
| Cauchy | 35 | NA | 6 | NA |
| Chisquare | 9 | NA | NA | NA |
| Degenerate | 71 | 71 | 71 | 71 |
| ExGaussian | 4 | NA | NA | NA |
| Exponential | 199 | 156 | 930 | 5266 |
| Gamma | 2747 | NA | 517 | 17 |
| Geometric | 586 | NA | NA | NA |
| Gumbel | 76 | NA | NA | NA |
| Laplace | 3 | NA | 454 | NA |
| Logistic | NA | NA | 7 | NA |
| Lognormal | 8850 | 3979 | 7974 | 1409 |
| Mixture | 1 | NA | 1 | NA |
| Multinomial | 24 | NA | NA | NA |
| Negative Binomial | 435 | 9 | NA | NA |
| Normal | 5212 | 29210 | 3230 | 12090 |
| Pareto | 83 | NA | 126 | 142 |
| Pascal | 77 | NA | NA | NA |
| Poisson | 8553 | 12981 | 22764 | 17042 |
| Skewed | 18 | 248 | 178 | 1 |
| Student's t | 43 | NA | NA | NA |
| Triangular | 16 | NA | 1 | NA |
| Uniform | 1252 | 1128 | 1141 | 8557 |
| Weibull | 78 | NA | 56 | 400 |
| ZIP | 13 | NA | NA | 1 |
| Zipf | 2 | NA | NA | NA |