function calculate_deviance(y_gen, x_gen, y_oos, x_oos; degree=2, reg=Inf)
# fit model
lb = [-5 .+ zeros(degree); 0.01]
ub = [5 .+ zeros(degree); 10.0]
p0 = [zeros(degree); 5.0]
function model_loglik(p, y, x, reg)
if mean(p[1:end-1]) <= reg
ll = 0
for i = 1:length(y)
μ = sum(x[i, 1:degree] .* p[1:end-1])
ll += logpdf(Normal(μ, p[end]), y[i])
end
else
ll = -Inf
end
return ll
end
result = optimize(p -> -model_loglik(p, y_gen, x_gen, reg), lb, ub, p0)
θ = result.minimizer
function deviance(p, y_pred, x_pred)
dev = 0
for i = 1:length(y_pred)
μ = sum(x_pred[i, 1:degree] .* p[1:end-1])
dev += -2 * logpdf(Normal(μ, p[end]), y_pred[i])
end
return dev
end
dev_is = deviance(θ, y_gen, x_gen)
dev_oos = deviance(θ, y_oos, x_oos)
return (dev_is, dev_oos)
end
N_sim = 10_000
function simulate_deviance(N_sim, N_data; reg=Inf)
d_is = zeros(5, 4)
for d = 1:5
dev_out = zeros(N_sim, 2)
for n = 1:N_sim
x_gen = rand(Uniform(-2, 2), (N_data, 5))
μ_gen = [0.1 * x_gen[i, 1] - 0.3 * x_gen[i, 2] for i in 1:N_data]
y_gen = rand.(Normal.(μ_gen, 1))
x_oos = rand(Uniform(-2, 2), (N_data, 5))
μ_oos = [0.1 * x_oos[i, 1] - 0.3 * x_oos[i, 2] for i in 1:N_data]
y_oos = rand.(Normal.(μ_oos, 1))
dev_out[n, :] .= calculate_deviance(y_gen, x_gen, y_oos, x_oos, degree=d, reg=reg)
end
d_is[d, 1] = mean(dev_out[:, 1])
d_is[d, 2] = std(dev_out[:, 1])
d_is[d, 3] = mean(dev_out[:, 2])
d_is[d, 4] = std(dev_out[:, 2])
end
return d_is
end
d_20 = simulate_deviance(N_sim, 20)
p1 = scatter(collect(1:5) .- 0.1, d_20[:, 1], yerr=d_20[:, 2], color=:blue, linecolor=:blue, lw=2, markersize=5, label="In Sample", xlabel="Number of Predictors", ylabel="Deviance", title=L"$N = 20$", grid=true)
scatter!(p1, collect(1:5) .+ 0.1, d_20[:, 3], yerr=d_20[:, 4], lw=2, markersize=5, color=:black, linecolor=:black, label="Out of Sample")
plot!(p1, size=(600, 550))
d_100 = simulate_deviance(N_sim, 100)
p2 = scatter(collect(1:5) .- 0.1, d_100[:, 1], yerr=d_100[:, 2], color=:blue, linecolor=:blue, lw=2, markersize=5, label="In Sample", xlabel="Number of Predictors", ylabel="Deviance", title=L"$N = 100$", grid=true)
scatter!(p2, collect(1:5) .+ 0.1, d_100[:, 3], yerr=d_100[:, 4], lw=2, markersize=5, color=:black, linecolor=:black, label="Out of Sample")
plot!(p2, size=(600, 550))
display(p1)
display(p2)