-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTitanic.R
51 lines (39 loc) · 1.18 KB
/
Titanic.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
##### Kaggle Titanic #####
## Load Data
setwd("/Users/yumeng.zou/Google Drive/Freshyear/Summer/R")
train<-read.csv(file="train.csv",header=TRUE,na.strings="")
test<-read.csv(file="test.csv",header=TRUE,na.strings="")
## Exploratory Data Analysis
typeof(train) ## data frame
head(train)
tail(train)
length(train) ## number of variables
str(train)
head(test)
tail(test)
length(test)
str(test)
## Response Variable: Survived
## Numerical:
## Discrete: SibSp, Parch
## Continuous: Age, Fare
## Categorical:
## Ordinal: Pclass
## Nominal: Sex, Embarked
## Other: ID, name, Ticket, Cabin
## Combine two data frames
test$Survived<-NA*(1:418) ## create a new variable for test
total<-rbind(train,test)
fix(train) ## a table of all numerical variables
apply(train, 2, function(x) length(which(is.na(x))))
apply(test, 2, function(x) length(which(is.na(x))))
## there are NA in Age, Embarked,
summary(train)
attach(train)
## Categorical Variables
prop.table(table(Survived, Pclass),2)
prop.table(table(Survived, Sex),2)
prop.table(table(Survived, Embarked),2)
## Numerical Variables
prop.table(table(Survived,SibSp),2)
prop.table(table(Survived,Parch),2)