::p_load(spdep, tmap, sf,
pacman ggpubr, tidyverse, GWmodel, SpatialML, olsrr, devtools,tidymodels)
Inclass Ex09 - GWR Method
Getting started
Today: how to calibrate SpatialML
Reading the input data sets.
<- read_rds("data/aspatial/mdata.rds") mdata
Data sampling
set.seed(1234)
<- initial_split(mdata,
resale_split prop=6.5/10,
)<- training(resale_split)
train_data <- testing(resale_split) test_data
write_rds(train_data, "data/model/train_data.rds")
write_rds(test_data, "data/model/test_data.rds")
summary(train_data)
resale_price floor_area_sqm storey_order remaining_lease_mths
Min. : 218000 Min. : 74.00 Min. : 1.000 Min. : 555.0
1st Qu.: 353000 1st Qu.: 91.00 1st Qu.: 2.000 1st Qu.: 798.0
Median : 405000 Median : 93.00 Median : 3.000 Median : 935.0
Mean : 434362 Mean : 95.12 Mean : 3.262 Mean : 940.2
3rd Qu.: 470000 3rd Qu.:102.00 3rd Qu.: 4.000 3rd Qu.:1111.0
Max. :1186888 Max. :133.00 Max. :17.000 Max. :1164.0
PROX_CBD PROX_ELDERLYCARE PROX_HAWKER PROX_MRT
Min. : 0.9994 Min. :0.0000 Min. :0.03334 Min. :0.02204
1st Qu.:10.1516 1st Qu.:0.2994 1st Qu.:0.38016 1st Qu.:0.30115
Median :13.4006 Median :0.6248 Median :0.65621 Median :0.53331
Mean :12.5028 Mean :0.8068 Mean :0.76461 Mean :0.60827
3rd Qu.:15.3988 3rd Qu.:1.1446 3rd Qu.:0.97880 3rd Qu.:0.82375
Max. :19.6501 Max. :3.3016 Max. :2.86763 Max. :2.13061
PROX_PARK PROX_GOOD_PRISCH PROX_MALL PROX_CHAS
Min. :0.04416 Min. : 0.06525 Min. :0.0000 Min. :0.0000
1st Qu.:0.51146 1st Qu.: 2.27719 1st Qu.:0.3725 1st Qu.:0.1149
Median :0.72996 Median : 3.99853 Median :0.5690 Median :0.1771
Mean :0.82665 Mean : 4.17364 Mean :0.6365 Mean :0.1918
3rd Qu.:1.03458 3rd Qu.: 5.75485 3rd Qu.:0.8309 3rd Qu.:0.2493
Max. :2.41314 Max. :10.62237 Max. :2.2710 Max. :0.8083
PROX_SUPERMARKET WITHIN_350M_KINDERGARTEN WITHIN_350M_CHILDCARE
Min. :0.0000001 Min. :0.000 Min. : 0.000
1st Qu.:0.1731267 1st Qu.:0.000 1st Qu.: 3.000
Median :0.2590770 Median :1.000 Median : 4.000
Mean :0.2826749 Mean :1.018 Mean : 3.888
3rd Qu.:0.3655440 3rd Qu.:1.000 3rd Qu.: 5.000
Max. :1.5713170 Max. :7.000 Max. :20.000
WITHIN_350M_BUS WITHIN_1KM_PRISCH geometry
Min. : 0.000 Min. :0.00 POINT :10335
1st Qu.: 6.000 1st Qu.:2.00 epsg:3414 : 0
Median : 8.000 Median :3.00 +proj=tmer...: 0
Mean : 7.995 Mean :3.29
3rd Qu.:10.000 3rd Qu.:4.00
Max. :18.000 Max. :9.00
Least Square model
<- lm(resale_price ~ floor_area_sqm +
price_MLR+ remaining_lease_mths +
storey_order +
PROX_CBD +
PROX_ELDERLYCARE +
PROX_HAWKER +
PROX_MRT +
PROX_PARK+
PROX_GOOD_PRISCH+
PROX_MALL+
PROX_CHAS+
PROX_SUPERMARKET +
WITHIN_350M_KINDERGARTEN +
WITHIN_350M_CHILDCARE data=train_data
WITHIN_1KM_PRISCH,
)summary(price_MLR)
Call:
lm(formula = resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths +
PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK +
PROX_GOOD_PRISCH + PROX_MALL + PROX_CHAS + PROX_SUPERMARKET +
WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_1KM_PRISCH,
data = train_data)
Residuals:
Min 1Q Median 3Q Max
-219245 -38971 -2267 36062 455160
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 110234.290 10474.885 10.524 < 2e-16 ***
floor_area_sqm 2858.469 91.025 31.403 < 2e-16 ***
storey_order 14186.600 338.903 41.860 < 2e-16 ***
remaining_lease_mths 341.761 4.599 74.317 < 2e-16 ***
PROX_CBD -17727.851 230.993 -76.746 < 2e-16 ***
PROX_ELDERLYCARE -14947.999 999.024 -14.963 < 2e-16 ***
PROX_HAWKER -18865.892 1290.067 -14.624 < 2e-16 ***
PROX_MRT -31843.412 1745.138 -18.247 < 2e-16 ***
PROX_PARK -7799.877 1512.972 -5.155 2.58e-07 ***
PROX_GOOD_PRISCH 2600.312 339.871 7.651 2.17e-14 ***
PROX_MALL -14345.932 1990.117 -7.209 6.05e-13 ***
PROX_CHAS -10493.329 6414.324 -1.636 0.102
PROX_SUPERMARKET -27127.584 4499.225 -6.029 1.70e-09 ***
WITHIN_350M_KINDERGARTEN 8645.043 630.215 13.718 < 2e-16 ***
WITHIN_350M_CHILDCARE -4228.986 350.605 -12.062 < 2e-16 ***
WITHIN_1KM_PRISCH -7846.524 491.021 -15.980 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 61510 on 10319 degrees of freedom
Multiple R-squared: 0.7385, Adjusted R-squared: 0.7381
F-statistic: 1943 on 15 and 10319 DF, p-value: < 2.2e-16
write_rds(price_MLR, "data/model/price_mlr.rds")
Change training data to spatial point
sf is a list object
ranger dont understand simple feature
<-as_Spatial(train_data)
train_data_sp train_data_sp
class : SpatialPointsDataFrame
features : 10335
extent : 11597.31, 42623.63, 28217.39, 48741.06 (xmin, xmax, ymin, ymax)
crs : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
variables : 17
names : resale_price, floor_area_sqm, storey_order, remaining_lease_mths, PROX_CBD, PROX_ELDERLYCARE, PROX_HAWKER, PROX_MRT, PROX_PARK, PROX_GOOD_PRISCH, PROX_MALL, PROX_CHAS, PROX_SUPERMARKET, WITHIN_350M_KINDERGARTEN, WITHIN_350M_CHILDCARE, ...
min values : 218000, 74, 1, 555, 0.999393538715878, 1.98943787433087e-08, 0.0333358643817954, 0.0220407324774434, 0.0441643212802781, 0.0652540365486641, 0, 6.20621206270077e-09, 1.21715176356525e-07, 0, 0, ...
max values : 1186888, 133, 17, 1164, 19.6500691667807, 3.30163731686804, 2.86763031236184, 2.13060636038504, 2.41313695915468, 10.6223726149914, 2.27100643784442, 0.808332738794272, 1.57131703651196, 7, 20, ...
Random Forest Method
<- st_coordinates(mdata)
coords <- st_coordinates(train_data)
coords_train <- st_coordinates(test_data) coords_test
export
write_rds(coords_train, "data/model/coords_train.rds")
write_rds(coords_test, "data/model/coords_test.rds")
drop the geometry
<- train_data %>%
train_data st_drop_geometry()
model
<- ranger(resale_price ~ floor_area_sqm +
price_rf+ remaining_lease_mths +
storey_order +
PROX_CBD +
PROX_ELDERLYCARE +
PROX_HAWKER +
PROX_MRT +
PROX_PARK+
PROX_GOOD_PRISCH+
PROX_MALL+
PROX_CHAS+
PROX_SUPERMARKET +
WITHIN_350M_KINDERGARTEN +
WITHIN_350M_CHILDCARE data=train_data
WITHIN_1KM_PRISCH,
)print(price_rf)
Ranger result
Call:
ranger(resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths + PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_GOOD_PRISCH + PROX_MALL + PROX_CHAS + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_1KM_PRISCH, data = train_data)
Type: Regression
Number of trees: 500
Sample size: 10335
Number of independent variables: 15
Mtry: 3
Target node size: 5
Variable importance mode: none
Splitrule: variance
OOB prediction error (MSE): 717626812
R squared (OOB): 0.9503324
For better comparision, we should look at the MSE
the code chunk below is to calibriate a geographic random forest model by using grf()
set.seed(1234)
<- grf(formula = resale_price ~ floor_area_sqm +
gwRF + remaining_lease_mths +
storey_order +
PROX_CBD +
PROX_ELDERLYCARE +
PROX_HAWKER +
PROX_MRT +
PROX_PARK+
PROX_GOOD_PRISCH+
PROX_MALL+
PROX_CHAS+
PROX_SUPERMARKET +
WITHIN_350M_KINDERGARTEN +
WITHIN_350M_CHILDCARE dframe=train_data,
WITHIN_1KM_PRISCH, bw=55, # need to calculate by ourself
kernel = "adaptive",
coords = coords_train)
Ranger result
Call:
ranger(resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths + PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_GOOD_PRISCH + PROX_MALL + PROX_CHAS + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_1KM_PRISCH, data = train_data, num.trees = 500, mtry = 5, importance = "impurity", num.threads = NULL)
Type: Regression
Number of trees: 500
Sample size: 10335
Number of independent variables: 15
Mtry: 5
Target node size: 5
Variable importance mode: impurity
Splitrule: variance
OOB prediction error (MSE): 658980191
R squared (OOB): 0.9543914
floor_area_sqm storey_order remaining_lease_mths
6.556904e+12 1.318161e+13 3.131699e+13
PROX_CBD PROX_ELDERLYCARE PROX_HAWKER
5.116133e+13 5.061929e+12 4.223677e+12
PROX_MRT PROX_PARK PROX_GOOD_PRISCH
5.837746e+12 3.613496e+12 1.346975e+13
PROX_MALL PROX_CHAS PROX_SUPERMARKET
3.366486e+12 1.205397e+12 1.899672e+12
WITHIN_350M_KINDERGARTEN WITHIN_350M_CHILDCARE WITHIN_1KM_PRISCH
6.150282e+11 1.150095e+12 5.503917e+12
Min. 1st Qu. Median Mean 3rd Qu. Max.
-251496.7 -12951.1 349.5 754.7 14999.9 317500.0
Min. 1st Qu. Median Mean 3rd Qu. Max.
-76845.82 -3150.39 40.43 42.07 3492.12 78767.02
Min Max Mean StD
floor_area_sqm 0 437249351201 18152539307 43342331389
storey_order 292515764 270218242191 16704202864 23955594317
remaining_lease_mths 560601547 557672690386 35835828777 76053328776
PROX_CBD 46360552 397107257375 10633564793 28457854471
PROX_ELDERLYCARE 41600101 316697712372 9210553346 22724653416
PROX_HAWKER 44860060 365932899594 9122563636 21994719920
PROX_MRT 45464484 294071582930 8318278088 18905814256
PROX_PARK 45600643 274601736084 8158079408 18975453439
PROX_GOOD_PRISCH 33822890 296167885165 9489326577 21372268982
PROX_MALL 47119225 448978483169 9849607710 26944595486
PROX_CHAS 39895296 303874994634 5935973932 13575504260
PROX_SUPERMARKET 39466033 372572090806 9479790126 25191325255
WITHIN_350M_KINDERGARTEN 0 186107002291 2402078603 12357813084
WITHIN_350M_CHILDCARE 0 245431165553 4687634633 16814147573
WITHIN_1KM_PRISCH 0 181156126989 1352868378 6151303431
write_rds(gwRF, "data/model/gwRF.rds")
Preparing the test data
<- cbind(test_data, coords_test) %>%
test_data st_drop_geometry()
<- predict.grf(gwRF, test_data, x.var.name = "X",
gwRF_pred y.var.name = "Y",
local.w = 1,
global.w = 0)