Course set solutions – beginners course

The script below shows how to solve the lessons of the course set used on the standard beginners course held by Sikt and Statistics Norway on regular basis.

Click here for more about our courses.

//Connecting to database
require no.ssb.fdb:12 as ds

//Creating dataset and population
create-dataset totalpop

import ds/BEFOLKNING_FOEDSELS_AAR_MND as birthdate
generate age = 2019 - int(birthdate/100)

histogram age, discrete

import ds/BEFOLKNING_STATUSKODE 2019-01-01 as regstat
keep if regstat == '1'
histogram age, discrete
summarize age

keep if age > 30 & age < 50

import ds/BEFOLKNING_KJOENN as gender
import ds/BEFOLKNING_FODELAND as country
import ds/NUDB_BU 2019-07-31 as edu
import ds/BEFOLKNING_KOMMNR_FAKTISK 2019-01-01 as residence
import ds/INNTEKT_WLONN 2015-12-31 as salary15
import ds/INNTEKT_WLONN 2016-12-31 as salary16
import ds/INNTEKT_WLONN 2017-12-31 as salary17
import ds/INNTEKT_WLONN 2018-12-31 as salary18
import ds/INNTEKT_WLONN 2019-12-31 as salary19


//Producing descriptive statistics
summarize salary15 salary16 salary17 salary18 salary19

barchart (mean) salary15 salary16 salary17 salary18 salary19
barchart (count) salary15 salary16 salary17 salary18 salary19
barchart (median) salary15 salary16 salary17 salary18 salary19

histogram salary19, freq
histogram salary19, freq normal

generate norwegian = 0
replace norwegian = 1 if country == '000'
tabulate norwegian
tabulate norwegian, cellpct
piechart norwegian

generate edulevel = substr(edu,1,1)
tabulate edulevel, cellpct
destring edulevel

summarize salary15 salary16 salary17 salary18 salary19 if edulevel < 2
summarize salary15 salary16 salary17 salary18 salary19 if edulevel > 6

tabulate gender, summarize(salary19)
tabulate norwegian, summarize(salary19)
tabulate edulevel, summarize(salary19)

barchart (mean) salary15 salary16 salary17 salary18 salary19, over(gender)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(norwegian)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(edulevel)


//Producing detailed statistics on professions
import ds/REGSYS_ARB_YRKE_STYRK08 2019-11-16 as profession19

generate prof_gr = 9
replace prof_gr = 1 if substr(profession19,1,1) == '1'
replace prof_gr = 2 if profession19 == '2211'
replace prof_gr = 3 if profession19 == '2212'
replace prof_gr = 4 if profession19 == '2223'
replace prof_gr = 5 if substr(profession19,1,2) == '23'
replace prof_gr = 6 if substr(profession19,1,2) == '25'
replace prof_gr = 7 if substr(profession19,1,2) == '61'
replace prof_gr = 999 if sysmiss(profession19)

define-labels proflabel 1 Leaders 2 'General practitioners' 3 'Medical specialists' 4 Nurses 5 Teachers 6 'IT developers' 7 'Agricultural professions' 9 Other 999 'Not working'
assign-labels prof_gr proflabel 

tabulate prof_gr
tabulate prof_gr, cellpct
tabulate prof_gr, summarize(salary19)
barchart (mean) salary19, over(prof_gr)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(prof_gr)


//Data on parents
import ds/BEFOLKNING_FAR_FNR as father_fnr
import ds/BEFOLKNING_MOR_FNR as mother_fnr

create-dataset parents
import ds/INNTEKT_WLONN 2019-12-31 as salary19_father
import ds/REGSYS_ARB_YRKE_STYRK08 2019-11-16 as profession19_father
import ds/NUDB_BU 2019-07-31 as edu_father
generate edulevel_father = substr(edu_father,1,1)
destring edulevel_father

clone-variables salary19_father -> salary19_mother
clone-variables profession19_father -> profession19_mother
clone-variables edulevel_father -> edulevel_mother

merge salary19_father profession19_father edulevel_father into totalpop on father_fnr
merge salary19_mother profession19_mother edulevel_mother into totalpop on mother_fnr

use totalpop
summarize salary19 salary19_father salary19_mother
correlate salary19 salary19_father
correlate salary19 salary19_mother
correlate salary19_father salary19_mother
correlate edulevel edulevel_father
correlate edulevel edulevel_mother
correlate edulevel_father edulevel_mother

correlate salary19 salary19_father if gender == '1'
correlate salary19 salary19_mother if gender == '2'
correlate edulevel edulevel_father if gender == '1'
correlate edulevel edulevel_mother if gender == '2'

generate prof_gr_father = 9
replace prof_gr_father = 1 if substr(profession19_father,1,1) == '1'
replace prof_gr_father = 2 if profession19_father == '2211'
replace prof_gr_father = 3 if profession19_father == '2212'
replace prof_gr_father = 4 if profession19_father == '2223'
replace prof_gr_father = 5 if substr(profession19_father,1,2) == '23'
replace prof_gr_father = 6 if substr(profession19_father,1,2) == '25'
replace prof_gr_father = 7 if substr(profession19_father,1,2) == '61'
replace prof_gr_father = 999 if sysmiss(profession19_father)

assign-labels prof_gr_father proflabel

tabulate prof_gr_father prof_gr if prof_gr_father < 9, rowpct
tabulate prof_gr_father prof_gr if prof_gr_father < 9 & gender == '1', rowpct


//Regression analysis
generate male = 0
replace male = 1 if gender == '1'

generate oslo = 1 if residence == '0301'
replace oslo = 0 if residence != '0301'
tabulate oslo, cellpct

generate high_edu = 1 if edulevel >= 7
replace high_edu = 0 if edulevel >= 0 & edulevel < 7

regress salary19 age norwegian oslo male high_edu salary19_father
regress-predict salary19 age norwegian oslo male high_edu salary19_father, residuals(res)

histogram res
histogram res, normal


//Using longitudinal information
create-dataset events
import-event ds/SIVSTANDFDT_SIVSTAND 2018-01-01 to 2018-12-31 as siv_events
keep if siv_events == '4'
collapse (count) siv_events, by(PERSONID_1 )
rename siv_events divorces
merge divorces into totalpop

use totalpop
generate divorced2018 = 0
replace divorced2018 = 1 if divorces >= 1
tabulate divorced2018