Course set solutions – beginners course
The script below shows how to solve the lessons of the course set used on the standard beginners course held by Sikt and Statistics Norway on regular basis.
Click here for more about our courses.
//Connecting to database
require no.ssb.fdb:12 as ds
//Creating dataset and population
create-dataset totalpop
import ds/BEFOLKNING_FOEDSELS_AAR_MND as birthdate
generate age = 2019 - int(birthdate/100)
histogram age, discrete
import ds/BEFOLKNING_STATUSKODE 2019-01-01 as regstat
keep if regstat == '1'
histogram age, discrete
summarize age
keep if age > 30 & age < 50
import ds/BEFOLKNING_KJOENN as gender
import ds/BEFOLKNING_FODELAND as country
import ds/NUDB_BU 2019-07-31 as edu
import ds/BEFOLKNING_KOMMNR_FAKTISK 2019-01-01 as residence
import ds/INNTEKT_WLONN 2015-12-31 as salary15
import ds/INNTEKT_WLONN 2016-12-31 as salary16
import ds/INNTEKT_WLONN 2017-12-31 as salary17
import ds/INNTEKT_WLONN 2018-12-31 as salary18
import ds/INNTEKT_WLONN 2019-12-31 as salary19
//Producing descriptive statistics
summarize salary15 salary16 salary17 salary18 salary19
barchart (mean) salary15 salary16 salary17 salary18 salary19
barchart (count) salary15 salary16 salary17 salary18 salary19
barchart (median) salary15 salary16 salary17 salary18 salary19
histogram salary19, freq
histogram salary19, freq normal
generate norwegian = 0
replace norwegian = 1 if country == '000'
tabulate norwegian
tabulate norwegian, cellpct
piechart norwegian
generate edulevel = substr(edu,1,1)
tabulate edulevel, cellpct
destring edulevel
summarize salary15 salary16 salary17 salary18 salary19 if edulevel < 2
summarize salary15 salary16 salary17 salary18 salary19 if edulevel > 6
tabulate gender, summarize(salary19)
tabulate norwegian, summarize(salary19)
tabulate edulevel, summarize(salary19)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(gender)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(norwegian)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(edulevel)
//Producing detailed statistics on professions
import ds/REGSYS_ARB_YRKE_STYRK08 2019-11-16 as profession19
generate prof_gr = 9
replace prof_gr = 1 if substr(profession19,1,1) == '1'
replace prof_gr = 2 if profession19 == '2211'
replace prof_gr = 3 if profession19 == '2212'
replace prof_gr = 4 if profession19 == '2223'
replace prof_gr = 5 if substr(profession19,1,2) == '23'
replace prof_gr = 6 if substr(profession19,1,2) == '25'
replace prof_gr = 7 if substr(profession19,1,2) == '61'
replace prof_gr = 999 if sysmiss(profession19)
define-labels proflabel 1 Leaders 2 'General practitioners' 3 'Medical specialists' 4 Nurses 5 Teachers 6 'IT developers' 7 'Agricultural professions' 9 Other 999 'Not working'
assign-labels prof_gr proflabel
tabulate prof_gr
tabulate prof_gr, cellpct
tabulate prof_gr, summarize(salary19)
barchart (mean) salary19, over(prof_gr)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(prof_gr)
//Data on parents
import ds/BEFOLKNING_FAR_FNR as father_fnr
import ds/BEFOLKNING_MOR_FNR as mother_fnr
create-dataset parents
import ds/INNTEKT_WLONN 2019-12-31 as salary19_father
import ds/REGSYS_ARB_YRKE_STYRK08 2019-11-16 as profession19_father
import ds/NUDB_BU 2019-07-31 as edu_father
generate edulevel_father = substr(edu_father,1,1)
destring edulevel_father
clone-variables salary19_father -> salary19_mother
clone-variables profession19_father -> profession19_mother
clone-variables edulevel_father -> edulevel_mother
merge salary19_father profession19_father edulevel_father into totalpop on father_fnr
merge salary19_mother profession19_mother edulevel_mother into totalpop on mother_fnr
use totalpop
summarize salary19 salary19_father salary19_mother
correlate salary19 salary19_father
correlate salary19 salary19_mother
correlate salary19_father salary19_mother
correlate edulevel edulevel_father
correlate edulevel edulevel_mother
correlate edulevel_father edulevel_mother
correlate salary19 salary19_father if gender == '1'
correlate salary19 salary19_mother if gender == '2'
correlate edulevel edulevel_father if gender == '1'
correlate edulevel edulevel_mother if gender == '2'
generate prof_gr_father = 9
replace prof_gr_father = 1 if substr(profession19_father,1,1) == '1'
replace prof_gr_father = 2 if profession19_father == '2211'
replace prof_gr_father = 3 if profession19_father == '2212'
replace prof_gr_father = 4 if profession19_father == '2223'
replace prof_gr_father = 5 if substr(profession19_father,1,2) == '23'
replace prof_gr_father = 6 if substr(profession19_father,1,2) == '25'
replace prof_gr_father = 7 if substr(profession19_father,1,2) == '61'
replace prof_gr_father = 999 if sysmiss(profession19_father)
assign-labels prof_gr_father proflabel
tabulate prof_gr_father prof_gr if prof_gr_father < 9, rowpct
tabulate prof_gr_father prof_gr if prof_gr_father < 9 & gender == '1', rowpct
//Regression analysis
generate male = 0
replace male = 1 if gender == '1'
generate oslo = 1 if residence == '0301'
replace oslo = 0 if residence != '0301'
tabulate oslo, cellpct
generate high_edu = 1 if edulevel >= 7
replace high_edu = 0 if edulevel >= 0 & edulevel < 7
regress salary19 age norwegian oslo male high_edu salary19_father
regress-predict salary19 age norwegian oslo male high_edu salary19_father, residuals(res)
histogram res
histogram res, normal
//Using longitudinal information
create-dataset events
import-event ds/SIVSTANDFDT_SIVSTAND 2018-01-01 to 2018-12-31 as siv_events
keep if siv_events == '4'
collapse (count) siv_events, by(PERSONID_1 )
rename siv_events divorces
merge divorces into totalpop
use totalpop
generate divorced2018 = 0
replace divorced2018 = 1 if divorces >= 1
tabulate divorced2018