Prediction and residual values analysis

Below we demonstrate how to extract prediction and residual values from the different types of regression analyzes. The use of histogram and hexbin is demonstrated for analyzes of the results. Especially histogram is a very useful command that can be used to study visually the extent to which the residuals are normally distributed. But in principle, all available and relevant commands may be used for further analyzes.

require no.ssb.fdb:13 as db

create-dataset regressiondata
import db/INNTEKT_WLONN 2019-12-31 as wage
import db/INNTEKT_BER_BRFORM 2019-12-31 as wealth
import db/BEFOLKNING_FOEDSELS_AAR_MND as birthdate
import db/BEFOLKNING_KJOENN as gender
import db/BEFOLKNING_STATUSKODE 2020-01-01 as residentstatus

keep if residentstatus == '1'

generate age = 2019 - int(birthdate/100)

generate male = 0
replace male = 1 if gender == '1'

regress wage age male wealth
regress-predict wage age male wealth
histogram predicted
hexbin predicted wage
regress-predict wage age male wealth, residuals(res) predicted(pred) cooksd(cook)
regress-predict wage age male, residuals(res2) predicted(pred2) cooksd(cook2)
histogram pred
histogram res
histogram cook
histogram res2

ivregress wage male (wealth = age)
ivregress-predict wage male (wealth = age), residuals(res3) predicted(pred3)
histogram pred3
histogram res3

summarize wage wealth
histogram wage
histogram wealth
generate highwage = 0
replace highwage = 1 if wage > 800000
generate highwealth = 0
replace highwealth = 1 if wealth > 4000000

logit highwage age male highwealth
logit-predict highwage age male highwealth, residuals(res4) predicted(pred4) probabilities(prob4)
histogram pred4
histogram res4
histogram prob4

probit highwage age male highwealth
probit-predict highwage age male highwealth, predicted(pred5) probabilities(prob5)
histogram pred5
histogram prob5

generate wagecat = 0
replace wagecat = 1 if wage > 0
replace wagecat = 2 if wage > 800000

mlogit wagecat age male highwealth
mlogit-predict wagecat age male highwealth, predicted(pred6) probabilities(prob6)
summarize pred6_1
histogram pred6_2
histogram prob6_1
histogram prob6_2

sample 0.05 54321

clone-units regressiondata paneldata
use paneldata
import-panel db/INNTEKT_WLONN db/BEFOLKNING_FOEDSELS_AAR_MND db/BEFOLKNING_KJOENN db/INNTEKT_BER_BRFORM 2017-12-31 2018-12-31 2019-12-31
generate age = 2019 - int(BEFOLKNING_FOEDSELS_AAR_MND/100)
generate male = 0
replace male = 1 if BEFOLKNING_KJOENN == '1'
rename INNTEKT_WLONN wage
rename INNTEKT_BER_BRFORM wealth

regress-panel wage male age wealth
regress-panel wage male age wealth, re
regress-panel-predict wage male age wealth, predicted(ppred1) residuals(pres1) effects(peff1)
regress-panel-predict wage male age wealth, re predicted(ppred2) residuals(pres2) effects(peff2)
histogram ppred1
histogram pres1
histogram peff1
histogram ppred2
histogram pres2
histogram peff2
hausman wage male age wealth