# read in data; each year is a row and each column is a day
dat = CSV.read("data/weather/snoqualmie_falls.txt", delim=" ", ignorerepeated=true, silencewarnings=true, skipto=2, DataFrame)
years = 1948:1983
# the last "day" of each non-leap year is NA, so need to skip over these
days_per_year = repeat([366, 365, 365, 365], outer=Int(ceil(length(years) / 4)))
snoq = zeros(sum(days_per_year))
for i = 1:length(years)
# need to use values() to get the vector of elements in the row; this is a quirk of DataFrames.jl
snoq[1+sum(days_per_year[1:i-1]):sum(days_per_year[1:i])] .= values(dat[i, 1:days_per_year[i]])
end
# want to arrange dataframe with today's precipitation (predictor) and tomorrow's (prediction)
snoq_dat = DataFrame(today=snoq[1:end-1], tomorrow=snoq[2:end])
p1 = histogram(snoq_dat.today, xlabel="Precipitation (1/100 inch)", ylabel="Count", label=false)
p2 = scatter(snoq_dat.today, snoq_dat.tomorrow, xlabel="Precip Today (1/100 inch)", ylabel="Precip Tomorrow (1/100 inch)", markersize=2, alpha=0.2, label=false)
plot(p1, p2, layout=(1, 2), size=(1200, 500))