Inclass Ex09 - GWR Method

Published

March 6, 2023

Modified

March 13, 2023

Getting started

pacman::p_load(spdep, tmap, sf,
               ggpubr, tidyverse, GWmodel, SpatialML, olsrr, devtools,tidymodels)

Today: how to calibrate SpatialML

Reading the input data sets.

mdata <- read_rds("data/aspatial/mdata.rds")

Data sampling

set.seed(1234)
resale_split <- initial_split(mdata,
                              prop=6.5/10,
                              )
train_data <- training(resale_split)
test_data <- testing(resale_split)
write_rds(train_data, "data/model/train_data.rds")
write_rds(test_data, "data/model/test_data.rds")
summary(train_data)
  resale_price     floor_area_sqm    storey_order    remaining_lease_mths
 Min.   : 218000   Min.   : 74.00   Min.   : 1.000   Min.   : 555.0      
 1st Qu.: 353000   1st Qu.: 91.00   1st Qu.: 2.000   1st Qu.: 798.0      
 Median : 405000   Median : 93.00   Median : 3.000   Median : 935.0      
 Mean   : 434362   Mean   : 95.12   Mean   : 3.262   Mean   : 940.2      
 3rd Qu.: 470000   3rd Qu.:102.00   3rd Qu.: 4.000   3rd Qu.:1111.0      
 Max.   :1186888   Max.   :133.00   Max.   :17.000   Max.   :1164.0      
    PROX_CBD       PROX_ELDERLYCARE  PROX_HAWKER         PROX_MRT      
 Min.   : 0.9994   Min.   :0.0000   Min.   :0.03334   Min.   :0.02204  
 1st Qu.:10.1516   1st Qu.:0.2994   1st Qu.:0.38016   1st Qu.:0.30115  
 Median :13.4006   Median :0.6248   Median :0.65621   Median :0.53331  
 Mean   :12.5028   Mean   :0.8068   Mean   :0.76461   Mean   :0.60827  
 3rd Qu.:15.3988   3rd Qu.:1.1446   3rd Qu.:0.97880   3rd Qu.:0.82375  
 Max.   :19.6501   Max.   :3.3016   Max.   :2.86763   Max.   :2.13061  
   PROX_PARK       PROX_GOOD_PRISCH     PROX_MALL        PROX_CHAS     
 Min.   :0.04416   Min.   : 0.06525   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.51146   1st Qu.: 2.27719   1st Qu.:0.3725   1st Qu.:0.1149  
 Median :0.72996   Median : 3.99853   Median :0.5690   Median :0.1771  
 Mean   :0.82665   Mean   : 4.17364   Mean   :0.6365   Mean   :0.1918  
 3rd Qu.:1.03458   3rd Qu.: 5.75485   3rd Qu.:0.8309   3rd Qu.:0.2493  
 Max.   :2.41314   Max.   :10.62237   Max.   :2.2710   Max.   :0.8083  
 PROX_SUPERMARKET    WITHIN_350M_KINDERGARTEN WITHIN_350M_CHILDCARE
 Min.   :0.0000001   Min.   :0.000            Min.   : 0.000       
 1st Qu.:0.1731267   1st Qu.:0.000            1st Qu.: 3.000       
 Median :0.2590770   Median :1.000            Median : 4.000       
 Mean   :0.2826749   Mean   :1.018            Mean   : 3.888       
 3rd Qu.:0.3655440   3rd Qu.:1.000            3rd Qu.: 5.000       
 Max.   :1.5713170   Max.   :7.000            Max.   :20.000       
 WITHIN_350M_BUS  WITHIN_1KM_PRISCH          geometry    
 Min.   : 0.000   Min.   :0.00      POINT        :10335  
 1st Qu.: 6.000   1st Qu.:2.00      epsg:3414    :    0  
 Median : 8.000   Median :3.00      +proj=tmer...:    0  
 Mean   : 7.995   Mean   :3.29                           
 3rd Qu.:10.000   3rd Qu.:4.00                           
 Max.   :18.000   Max.   :9.00                           

Least Square model

price_MLR<- lm(resale_price ~ floor_area_sqm + 
                 storey_order + remaining_lease_mths +
                 PROX_CBD +
                 PROX_ELDERLYCARE + 
                 PROX_HAWKER +
                 PROX_MRT +
                 PROX_PARK+
                 PROX_GOOD_PRISCH+
                 PROX_MALL+
                 PROX_CHAS+
                 PROX_SUPERMARKET + 
                 WITHIN_350M_KINDERGARTEN +
                 WITHIN_350M_CHILDCARE +
                 WITHIN_1KM_PRISCH, data=train_data
                 
                 )
summary(price_MLR)

Call:
lm(formula = resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths + 
    PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + 
    PROX_GOOD_PRISCH + PROX_MALL + PROX_CHAS + PROX_SUPERMARKET + 
    WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_1KM_PRISCH, 
    data = train_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-219245  -38971   -2267   36062  455160 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)              110234.290  10474.885  10.524  < 2e-16 ***
floor_area_sqm             2858.469     91.025  31.403  < 2e-16 ***
storey_order              14186.600    338.903  41.860  < 2e-16 ***
remaining_lease_mths        341.761      4.599  74.317  < 2e-16 ***
PROX_CBD                 -17727.851    230.993 -76.746  < 2e-16 ***
PROX_ELDERLYCARE         -14947.999    999.024 -14.963  < 2e-16 ***
PROX_HAWKER              -18865.892   1290.067 -14.624  < 2e-16 ***
PROX_MRT                 -31843.412   1745.138 -18.247  < 2e-16 ***
PROX_PARK                 -7799.877   1512.972  -5.155 2.58e-07 ***
PROX_GOOD_PRISCH           2600.312    339.871   7.651 2.17e-14 ***
PROX_MALL                -14345.932   1990.117  -7.209 6.05e-13 ***
PROX_CHAS                -10493.329   6414.324  -1.636    0.102    
PROX_SUPERMARKET         -27127.584   4499.225  -6.029 1.70e-09 ***
WITHIN_350M_KINDERGARTEN   8645.043    630.215  13.718  < 2e-16 ***
WITHIN_350M_CHILDCARE     -4228.986    350.605 -12.062  < 2e-16 ***
WITHIN_1KM_PRISCH         -7846.524    491.021 -15.980  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 61510 on 10319 degrees of freedom
Multiple R-squared:  0.7385,    Adjusted R-squared:  0.7381 
F-statistic:  1943 on 15 and 10319 DF,  p-value: < 2.2e-16
write_rds(price_MLR, "data/model/price_mlr.rds")

Change training data to spatial point

sf is a list object

ranger dont understand simple feature

train_data_sp <-as_Spatial(train_data)
train_data_sp
class       : SpatialPointsDataFrame 
features    : 10335 
extent      : 11597.31, 42623.63, 28217.39, 48741.06  (xmin, xmax, ymin, ymax)
crs         : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs 
variables   : 17
names       : resale_price, floor_area_sqm, storey_order, remaining_lease_mths,          PROX_CBD,     PROX_ELDERLYCARE,        PROX_HAWKER,           PROX_MRT,          PROX_PARK,   PROX_GOOD_PRISCH,        PROX_MALL,            PROX_CHAS,     PROX_SUPERMARKET, WITHIN_350M_KINDERGARTEN, WITHIN_350M_CHILDCARE, ... 
min values  :       218000,             74,            1,                  555, 0.999393538715878, 1.98943787433087e-08, 0.0333358643817954, 0.0220407324774434, 0.0441643212802781, 0.0652540365486641,                0, 6.20621206270077e-09, 1.21715176356525e-07,                        0,                     0, ... 
max values  :      1186888,            133,           17,                 1164,  19.6500691667807,     3.30163731686804,   2.86763031236184,   2.13060636038504,   2.41313695915468,   10.6223726149914, 2.27100643784442,    0.808332738794272,     1.57131703651196,                        7,                    20, ... 

Random Forest Method

coords <- st_coordinates(mdata)
coords_train <- st_coordinates(train_data)
coords_test <- st_coordinates(test_data)

export

write_rds(coords_train, "data/model/coords_train.rds")
write_rds(coords_test, "data/model/coords_test.rds")

drop the geometry

train_data <- train_data %>% 
  st_drop_geometry()

model

price_rf<- ranger(resale_price ~ floor_area_sqm + 
                 storey_order + remaining_lease_mths +
                 PROX_CBD +
                 PROX_ELDERLYCARE + 
                 PROX_HAWKER +
                 PROX_MRT +
                 PROX_PARK+
                 PROX_GOOD_PRISCH+
                 PROX_MALL+
                 PROX_CHAS+
                 PROX_SUPERMARKET + 
                 WITHIN_350M_KINDERGARTEN +
                 WITHIN_350M_CHILDCARE +
                 WITHIN_1KM_PRISCH, data=train_data
                 
                 )
print(price_rf)
Ranger result

Call:
 ranger(resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths +      PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK +      PROX_GOOD_PRISCH + PROX_MALL + PROX_CHAS + PROX_SUPERMARKET +      WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_1KM_PRISCH,      data = train_data) 

Type:                             Regression 
Number of trees:                  500 
Sample size:                      10335 
Number of independent variables:  15 
Mtry:                             3 
Target node size:                 5 
Variable importance mode:         none 
Splitrule:                        variance 
OOB prediction error (MSE):       717626812 
R squared (OOB):                  0.9503324 

For better comparision, we should look at the MSE

the code chunk below is to calibriate a geographic random forest model by using grf()

set.seed(1234)
gwRF <- grf(formula = resale_price ~ floor_area_sqm + 
                 storey_order + remaining_lease_mths +
                 PROX_CBD +
                 PROX_ELDERLYCARE + 
                 PROX_HAWKER +
                 PROX_MRT +
                 PROX_PARK+
                 PROX_GOOD_PRISCH+
                 PROX_MALL+
                 PROX_CHAS+
                 PROX_SUPERMARKET + 
                 WITHIN_350M_KINDERGARTEN +
                 WITHIN_350M_CHILDCARE +
                 WITHIN_1KM_PRISCH, dframe=train_data,
            bw=55, # need to calculate by ourself
            kernel = "adaptive",
            coords = coords_train)
Ranger result

Call:
 ranger(resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths +      PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK +      PROX_GOOD_PRISCH + PROX_MALL + PROX_CHAS + PROX_SUPERMARKET +      WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_1KM_PRISCH,      data = train_data, num.trees = 500, mtry = 5, importance = "impurity",      num.threads = NULL) 

Type:                             Regression 
Number of trees:                  500 
Sample size:                      10335 
Number of independent variables:  15 
Mtry:                             5 
Target node size:                 5 
Variable importance mode:         impurity 
Splitrule:                        variance 
OOB prediction error (MSE):       658980191 
R squared (OOB):                  0.9543914 
          floor_area_sqm             storey_order     remaining_lease_mths 
            6.556904e+12             1.318161e+13             3.131699e+13 
                PROX_CBD         PROX_ELDERLYCARE              PROX_HAWKER 
            5.116133e+13             5.061929e+12             4.223677e+12 
                PROX_MRT                PROX_PARK         PROX_GOOD_PRISCH 
            5.837746e+12             3.613496e+12             1.346975e+13 
               PROX_MALL                PROX_CHAS         PROX_SUPERMARKET 
            3.366486e+12             1.205397e+12             1.899672e+12 
WITHIN_350M_KINDERGARTEN    WITHIN_350M_CHILDCARE        WITHIN_1KM_PRISCH 
            6.150282e+11             1.150095e+12             5.503917e+12 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-251496.7  -12951.1     349.5     754.7   14999.9  317500.0 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-76845.82  -3150.39     40.43     42.07   3492.12  78767.02 
                               Min          Max        Mean         StD
floor_area_sqm                   0 437249351201 18152539307 43342331389
storey_order             292515764 270218242191 16704202864 23955594317
remaining_lease_mths     560601547 557672690386 35835828777 76053328776
PROX_CBD                  46360552 397107257375 10633564793 28457854471
PROX_ELDERLYCARE          41600101 316697712372  9210553346 22724653416
PROX_HAWKER               44860060 365932899594  9122563636 21994719920
PROX_MRT                  45464484 294071582930  8318278088 18905814256
PROX_PARK                 45600643 274601736084  8158079408 18975453439
PROX_GOOD_PRISCH          33822890 296167885165  9489326577 21372268982
PROX_MALL                 47119225 448978483169  9849607710 26944595486
PROX_CHAS                 39895296 303874994634  5935973932 13575504260
PROX_SUPERMARKET          39466033 372572090806  9479790126 25191325255
WITHIN_350M_KINDERGARTEN         0 186107002291  2402078603 12357813084
WITHIN_350M_CHILDCARE            0 245431165553  4687634633 16814147573
WITHIN_1KM_PRISCH                0 181156126989  1352868378  6151303431
write_rds(gwRF, "data/model/gwRF.rds")

Preparing the test data

test_data <- cbind(test_data, coords_test) %>%
  st_drop_geometry()
gwRF_pred <- predict.grf(gwRF, test_data, x.var.name = "X", 
                         y.var.name = "Y",
                         local.w = 1,
                         global.w = 0)