朝が苦手な人間が綴るブログ (限界大学院生編)

基礎こそ物の上手なれ. 人間万事塞翁が馬. を大切にしている経済学徒.

社会科学のためのデータ分析入門 章末問題解答(1章-2) Rコード

章末問題解答(1章-1) Rコードの記事はこちらこちらから確認できます。
 

はじめに (Textbook Solution: Quantitative Social Science: An Introduction )

Rを使った統計学の日本語のテキストとして非常に定評のある社会科学のためのデータ分析入門の章末問題の解答(Rコード)です。

 
欠点なのかは分かりませんが、こちらのテキストには章末問題の解答がついていません。そして日本語でも英語でもwebで公開されていません(2018年冬ごろの時点では)。2018年冬に私が上巻の章末問題を解いたのですが、一度公開してみようと思ったので複数の記事に分けて投稿していこうと思います。誰かの役に立てればとも思っているのですが、私のコードにミスがあった場合に指摘していただけると嬉しいです。
I would highly appreciate if you could point out mistakes.
 
また同じ変数に関するプロットをする場合でも複数の方法を使ったりもしています。
 

1章-2 (Chapter1 - Section 2)

 
スクリプトをベタ張りしています。

## Chapter 1 Introduction
## Exercise Solution

## -----------------------------------------------------
## Taka(the author of this script) uses Japanese-Version QSS.
## -----------------------------------------------------
## Section 2
## Q1

kenya <- read.csv("Kenya.csv")
sweden <- read.csv("Sweden.csv")
world <- read.csv("World.csv")

dim(kenya); dim(sweden); dim(world)

## Create sum of person-year.
kenya$py <- kenya$py.men + kenya$py.women
sweden$py <- sweden$py.men + sweden$py.women
world$py <- world$py.men + world$py.women

## We will create CBRs in 3 ways.
## Create Kenya's CBR. 
kc1 <- sum(kenya[1:15, "births"]) / sum(kenya[1:15, "py"])
kc2 <- sum(kenya[16:30, "births"]) / sum(kenya[16:30, "py"])
kCBR <- c(kc1, kc2)

## Create Sweden's CBR. 
sc1 <- sum(sweden[c(1:15), "births"]) / sum(sweden[c(1:15), "py"])
sc2 <- sum(sweden[c(16:30), "births"]) / sum(sweden[c(16:30), "py"])
sCBR <- c(sc1, sc2)

## Create World's CBR.
## First, we create & add CBR in the data.frame.
world$crb <- world$births / world$py

## Divide data into two periods.  
wp1 <- world[world$period=="1950-1955", ] # first half
wp2 <- world[world$period=="2005-2010", ] # second half

## Calculate CBRs
wc1 <- sum(wp1$births) / sum(wp1$py)
wc2 <- sum(wp2$births) / sum(wp2$py)

## Save CBR as a new vector which has length=2
wCBR <- c(wc1, wc2)

## Print 3 countries' CBRs
kCBR; sCBR; wCBR


## Q2

## We will create ASFR in the different ways again.
## childbearing age: CBA
## Kenya
## 1950-55
Kcba1 <- kenya[4:10, ]
K.ASFR1 <- Kcba1$births / Kcba1$py.women

## 2005-10
Kcba2 <- kenya[19:25, ]
K.ASFR2 <- Kcba2$births / Kcba2$py.women

## total
## K.ASFR <- c(K.ASFR1, K.ASFR2) # easiest way
Kcba3 <- rbind(kenya[4:10, ], kenya[19:25, ])
K.ASFR <- Kcba3$births / Kcba3$py.women
 
## Sweden
## 1950-55
Scbab1 <- sweden[4:10, "births"]
Scbaw1 <- sweden[4:10, "py.women"]
S.ASFR1 <- Scbab1 / Scbaw1

## 2005-10
Scbaw2 <- sweden[19:25, "py.women"]
Scbab2 <- sweden[19:25, "births"]
S.ASFR2 <- Scbab2 / Scbaw2

## total
Scbab <- c(Scbab1, Scbab2)
Scbaw <- c(Scbaw1, Scbaw2)
S.ASFR <- Scbab / Scbaw

## World
## 1950-55
W.ASFR <- world$births / world$py.women
W.ASFR1 <- W.ASFR[4:10]

## 2005-10
W.ASFR2 <- W.ASFR[c(19:25)]

## total
W.ASFR <- W.ASFR[c(4,5,6,7,8,9,10,19,20,21,22,23,24,25)]

## kenya[kenya$age == "15-19", ]

K.ASFR1; S.ASFR1; W.ASFR1
mean(K.ASFR1); mean(S.ASFR1); mean(W.ASFR1)

K.ASFR2; S.ASFR2; W.ASFR2
mean(K.ASFR2); mean(S.ASFR2); mean(W.ASFR2)

K.ASFR; S.ASFR; W.ASFR
mean(K.ASFR); mean(S.ASFR); mean(W.ASFR)


## Q3

## Kenya's TFR
K.TFR <- c(sum(K.ASFR1 * 5), sum(K.ASFR2 * 5))
## the number of women
kw1 <- sum(kenya[kenya$period=="1950-1955", "py.women"]) # first half
kw2 <- sum(kenya[kenya$period=="2005-2010", "py.women"]) # second half

## Sweden's TFR
S.TFR <- c(sum(S.ASFR1 * 5), sum(S.ASFR2 * 5))
## the number of women
sw1 <- sum(sweden[sweden$period=="1950-1955", "py.women"]) # first half
sw2 <- sum(sweden[sweden$period=="2005-2010", "py.women"]) # second half

## World's TFR
W.TFR <- c(sum(W.ASFR1 * 5), sum(W.ASFR2 * 5))
## the number of women
ww1 <- sum(world[world$period=="1950-1955", "py.women"]) # first half
ww2 <- sum(world[world$period=="2005-2010", "py.women"]) # second half
## the number of births
wb1 <- sum(world[world$period=="1950-1955", "births"]) # first half
wb2 <- sum(world[world$period=="2005-2010", "births"]) # second half

## print TFR & #women
K.TFR; S.TFR; W.TFR
kw1; kw2; sw1; sw2; ww1; ww2
wb1; wb2


## Q4

## Kenya's CDR
K.CDR1 <-  sum(kenya[kenya$period=="1950-1955", "deaths"]) / 
  sum(kenya[kenya$period=="1950-1955", "py"]) 

K.CDR2 <-  sum(kenya[kenya$period=="2005-2010", "deaths"]) / 
  sum(kenya[kenya$period=="2005-2010", "py"]) 

K.CDR <- c(K.CDR1, K.CDR2)

## Sweden's CDR
S.CDR1 <- sum(sweden[1:15, "deaths"]) / sum(sweden[1:15, "py"])
S.CDR2 <- sum(sweden[16:30, "deaths"]) / sum(sweden[16:30, "py"])

S.CDR <- c(S.CDR1, S.CDR2)

## World's CDR
W.CDR1 <- sum(world[c(1:15), "deaths"]) / sum(world[c(1:15), "py"])
W.CDR2 <- sum(world[c(16:30), "deaths"]) / sum(world[c(16:30), "py"])

W.CDR <- c(W.CDR1, W.CDR2)

## Print CDRs. 
K.CDR; S.CDR; W.CDR


## Q5

K.ASDR <- kenya[16:30, "deaths"] / kenya[16:30, "py"]
S.ASDR <- sweden[16:30, "deaths"] / sweden[16:30, "py"]

options(scipen = 1) # avoid exponential notation
K.ASDR; S.ASDR
S.ASDR - K.ASDR

## In the every age class Kenya's ASDR is higher. 


## Q5

## rate of Sweden's each age class, P
sp <- sweden$py[16:30] / sum(sweden$py[16:30])

## Kenya's counterfactual CDR
cfK.CDR <- sum(K.ASDR * sp) # result

## Compare the two
K.CDR[2]; cfK.CDR # result: 0.01038914 0.02321646
K.CDR[2] - cfK.CDR

## This means if Kenya has Sweden's population distribution, 
## CDR will be 1 percent point higher than factual CDR. 
## Thus, the comparison of factual CDRs b/w Kenya & Sweden
## does not necessarily give a meaningful result. 

 
章末問題解答(1章-1) Rコード
www.econ-stat-grad.com