Neighborhood map
plateau<-mtl_p%>%filter(nom_arr=='Le Plateau-Mont-Royal')%>%st_buffer(dist=0)
# near the border between two
neighbourhood<-mtl_p%>%filter(nom_qr %in% c("Saint-Louis", "Milton-Parc"))%>%summarize()
if(file.exists("osm_neighbourhood.RDS")){
osm_neighbourhood<-readRDS(file="osm_neighbourhood.RDS")
} else {
# osm understands information in 4326 projection
osm_neighbourhood<-read_osm(st_bbox(neighbourhood%>%st_transform(4326)), type="esri") # ext=1.5,
# save to file to speedup runs
saveRDS(osm_neighbourhood, file = "osm_neighbourhood.RDS")
}
# my region of interest
ROI_p<-mtl_p %>% filter( nom_arr %in%
c('Le Plateau-Mont-Royal',
'Villeray–Saint-Michel–Parc-Extension',
'Rosemont–La Petite-Patrie',
"Ahuntsic-Cartierville",
"Outremont",
"Ville-Marie",
"Le Sud-Ouest",
"Côte-des-Neiges–Notre-Dame-de-Grâce","Verdun","Westmount","Hampstead","Côte-Saint-Luc","Saint-Laurent","Mont-Royal"
) ) %>% st_intersection(st_geometry(mtl_land)%>%st_buffer(0)) %>%
mutate( arr=factor(nom_arr,
levels=c('Le Plateau-Mont-Royal',
'Villeray–Saint-Michel–Parc-Extension',
'Rosemont–La Petite-Patrie',
"Ahuntsic-Cartierville","Outremont", "Ville-Marie","Le Sud-Ouest",
"Côte-des-Neiges–Notre-Dame-de-Grâce","Verdun","Westmount","Hampstead","Côte-Saint-Luc","Saint-Laurent","Mont-Royal"),
labels=c('Plateau','Villeray','Rosemont','Ahuntsic',"Outremont",
"Ville-Marie","Sud-Ouest","CdN-NdG","Verdun","Westmount","Hampstead","Côte-Saint-Luc","Saint-Laurent","TMR"
)),
qr=as.factor(nom_qr))
#
if(file.exists("osm_roi.RDS")){
osm_roi<-readRDS(file="osm_roi.RDS")
} else {
osm_roi<-read_osm(st_bbox(ROI_p%>%st_transform(4326)), type="esri") # ext=1.5,
# save to file to speedup runs
saveRDS(osm_roi, file = "osm_roi.RDS")
}
# create subset of data
neighbors<-prop_geo_p %>%
filter(type=='Apartment', mprice>0, bedrooms %in% c(1,2,3,4), area_interior<10000, area_interior>0) %>%
st_join(neighbourhood, left=F) %>%
mutate(bedrooms=droplevels(bedrooms), parking=as.factor(parking>0))
tm_shape(osm_neighbourhood) + tm_rgb(alpha=0.7)+
tm_shape(neighbourhood) + tm_borders(col='red',alpha=0.8) +
tm_shape(neighbors) + tm_symbols(shape=3,size=0.2,alpha=0.8) +
tm_shape(ref_home) + tm_symbols(col='red',shape=4,size=0.5,alpha=0.8)+
tm_compass(position=c("right", "bottom"))+
tm_scale_bar(position=c("left", "bottom"))+
tm_layout(scale=1.5)
neighbourhood prices,
simple linear model
# convert spatial object into a simple data frame with x and y
neighbors_<-bind_cols(as.data.frame(neighbors),as.data.frame(st_coordinates(neighbors)))%>%rename(x=X,y=Y)
ggplot(neighbors_, aes(y=price, x=area_interior, col=parking))+
geom_point(alpha=0.7,size=2)+
scale_y_continuous( labels = scales::dollar)+
geom_smooth(method='lm')+
geom_vline(data=ref_home, aes(xintercept=area_interior),col='black',lty=2)+
coord_cartesian(xlim=c(400,2000),ylim=c(2e5,1e6))+
ylab('Price')+xlab('Surface sqft')
# simple regression model
model_price_lm <- lm(mprice ~ parking:area_interior , data=neighbors_)
print(summary(model_price_lm))
##
## Call:
## lm(formula = mprice ~ parking:area_interior, data = neighbors_)
##
## Residuals:
## Min 1Q Median 3Q Max
## -369729 -64800 -9132 55880 690669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40564.16 22427.94 1.809 0.0715 .
## parkingFALSE:area_interior 438.79 23.57 18.616 <2e-16 ***
## parkingTRUE:area_interior 513.52 19.41 26.459 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 129400 on 311 degrees of freedom
## Multiple R-squared: 0.6989, Adjusted R-squared: 0.6969
## F-statistic: 360.9 on 2 and 311 DF, p-value: < 2.2e-16
# predict price
print(predict(model_price_lm, ref_home, se.fit=T))
## $fit
## 1
## 443160.1
##
## $se.fit
## [1] 10814.63
##
## $df
## [1] 311
##
## $residual.scale
## [1] 129405.7
neighbors_$pred<- predict(model_price_lm)
neighbors_$res <- neighbors_$mprice-neighbors_$pred
ggplot(neighbors_, aes(x=pred, y=res) ) +
geom_point(alpha=0.7,size=2)+
scale_y_continuous(labels = scales::dollar )+
scale_x_continuous(labels = scales::dollar )+
xlab('predicted')+ylab('residuals')
glm model
ggplot(neighbors_, aes(y=price, x=area_interior, col=parking))+
geom_point(alpha=0.7,size=2)+
scale_y_continuous( labels = scales::dollar)+
geom_smooth(method='glm',method.args=list(family = inverse.gaussian(link="log")))+
geom_vline(data=ref_home, aes(xintercept=area_interior),col='black',lty=2)+
coord_cartesian(xlim=c(400,2000),ylim=c(2e5,1e6))+
ylab('Price')+xlab('Surface sqft')
fam=inverse.gaussian(link="log")
ilink <- fam$linkinv
model_price_glm <- glm(mprice ~ parking:area_interior , data=neighbors_, family=fam)
print(summary(model_price_glm))
##
## Call:
## glm(formula = mprice ~ parking:area_interior, family = fam, data = neighbors_)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.158e-03 -2.304e-04 -3.213e-05 1.643e-04 7.957e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.230e+01 3.984e-02 308.73 <2e-16 ***
## parkingFALSE:area_interior 7.482e-04 4.486e-05 16.68 <2e-16 ***
## parkingTRUE:area_interior 8.749e-04 3.836e-05 22.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for inverse.gaussian family taken to be 8.466078e-08)
##
## Null deviance: 8.4965e-05 on 313 degrees of freedom
## Residual deviance: 2.6885e-05 on 311 degrees of freedom
## AIC: 8169
##
## Number of Fisher Scoring iterations: 4
# predict price
predict_home<-predict(model_price_glm, ref_home, se.fit=T)
print("prediction and 95% confidence intervals:")
## [1] "prediction and 95% confidence intervals:"
print(ilink(predict_home$fit))
## 1
## 436702.2
print(c(ilink(predict_home$fit-2*predict_home$se.fit), ilink(predict_home$fit+2*predict_home$se.fit)))
## 1 1
## 421749.4 452185.2
neighbors_$pred<- ilink(predict(model_price_glm))
neighbors_$res <- neighbors_$mprice-neighbors_$pred
ggplot(neighbors_, aes(x=pred, y=res) ) +
geom_point(alpha=0.7,size=2)+
scale_y_continuous(labels = scales::dollar )+
scale_x_continuous(labels = scales::dollar )+
ggtitle('GLM Residuals')
Longitudinal condo price model: Loess smoothihg
Longitudinal condo price model:GAM model
##
## Family: inverse.gaussian
## Link function: log
##
## Formula:
## price_sqft ~ bedrooms + parking + s(start_date, k = 24)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.22248 0.02732 227.797 < 2e-16 ***
## bedrooms2 -0.03438 0.02879 -1.194 0.23335
## bedrooms3 -0.03203 0.03797 -0.844 0.39953
## bedrooms4 -0.18025 0.06643 -2.713 0.00704 **
## parkingTRUE 0.12210 0.02360 5.173 4.17e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(start_date) 1.971 2.454 5.154 0.00532 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.109 Deviance explained = 13%
## -REML = 1920.4 Scale est. = 8.3199e-05 n = 314
## Analysis of Deviance Table
##
## Model 1: price_sqft ~ parking + bedrooms
## Model 2: price_sqft ~ bedrooms + parking + s(start_date, k = 24)
## Resid. Df Resid. Dev Df Deviance
## 1 309.00 0.026257
## 2 307.03 0.025128 1.9712 0.0011288
##
## Family: poisson
## Link function: log
##
## Formula:
## n ~ s(start_date, k = 24)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.37645 0.05988 6.287 3.23e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(start_date) 8.955 23 160.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.349 Deviance explained = 47.8%
## -REML = 304.87 Scale est. = 1 n = 197
Longitudinal condo price model: prediction
## [1] "prediction and 95% confidence intervals:"
## 1 1 1
## 527.7101 549.3195 571.8138
Longitudinal condo price model: relative price change
Condo price (sqft) relative to start
survival analysis
survdiff(Surv(time_on_market, !active) ~ parking, neighbors)
## Call:
## survdiff(formula = Surv(time_on_market, !active) ~ parking, data = neighbors)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## parking=FALSE 146 117 105 1.491 2.57
## parking=TRUE 168 144 156 0.996 2.57
##
## Chisq= 2.6 on 1 degrees of freedom, p= 0.1
surv_type<-survfit(Surv(time_on_market, !active) ~ 1, neighbors_)
p<-ggsurvplot( surv_type, data=neighbors_, conf.int = TRUE,
conf.int.style ='step', censor=F, surv.scale='percent',
break.time.by=30,surv.median.line='hv',
xlim=c(0.0,365.0))
p$plot <- p$plot + theme(legend.text = element_text(size = 5, color = "black", face = "bold"))
p
Plex price estimate
selected_mls=17758383
max_distance=2000 # 2km
plex_pe<-prop_geo_p %>% filter(type!='Apartment', type!='House') %>%
mutate(parking=as.factor(parking>0), stories=as.factor(stories),type=as.factor(type))
selected<-plex_pe%>%filter(mls==selected_mls) %>% rename(mls_ref=mls)
# selected
print(selected%>%as.data.frame()%>%dplyr::select(mls_ref,area_interior,area_land,type,bedrooms,parking,first_ts))
## mls_ref area_interior area_land type bedrooms parking first_ts
## 1 17758383 1765.28 1750 Triplex 3 TRUE 2020-02-05
# create a cicrcle around reference
search_roi <- st_buffer(selected, max_distance)
# remove some noise entries
result <- st_intersection(plex_pe %>% filter(mls!=selected_mls), search_roi) %>%
filter(area_interior<10000, area_interior>100,area_land>0,price<1e7,price>100 )
if(file.exists("osm_result.RDS")){
osm_result<-readRDS(file="osm_result.RDS")
} else {
# osm understands information in 4326 projection
osm_result <- read_osm(st_bbox(st_transform(search_roi, 4326 )), ext=1.5, type="esri")
# save to file to speedup runs
saveRDS(osm_result, file = "osm_result.RDS")
}
#
ggplot(result%>%as.data.frame(), aes(x=area_interior, y=price, col=type))+
geom_point(alpha=0.7)+
geom_point(data=selected%>%as.data.frame(), aes(x=area_interior, y=price) , size=10, shape='+', alpha=0.9, col='black') +
scale_y_continuous(labels = scales::dollar )+xlim(c(0,5000))+
geom_smooth(method='glm',method.args=list(family = inverse.gaussian(link="log")))
ggplot(result%>%as.data.frame(), aes(y=price_sqft, x=parking, col=parking))+
geom_point(alpha=0.7,size=2,pos='jitter')+
geom_boxplot(alpha=0.5,size=0.5)+
facet_grid(type~bedrooms,labeller = label_both)+
scale_y_continuous(labels = scales::dollar)+
ylab('Price/sqft')+xlab('')
per sqft price regression model
m_plex <- glm(price_sqft ~ type+bedrooms+parking, data=result%>%as.data.frame(), family=fam)
print(summary(m_plex))
##
## Call:
## glm(formula = price_sqft ~ type + bedrooms + parking, family = fam,
## data = result %>% as.data.frame())
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.098291 -0.012601 -0.005437 0.003621 0.093939
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.7965411 0.0905453 64.018 <2e-16 ***
## typeTriplex -0.1066840 0.0465888 -2.290 0.0226 *
## typeFourplex 0.0343763 0.0734954 0.468 0.6402
## bedrooms2 -0.0005084 0.0913386 -0.006 0.9956
## bedrooms3 -0.0442496 0.0871485 -0.508 0.6119
## bedrooms4 -0.0801371 0.1065306 -0.752 0.4524
## bedrooms5 -0.1019773 0.1764383 -0.578 0.5636
## parkingTRUE 0.1319670 0.0443394 2.976 0.0031 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for inverse.gaussian family taken to be 0.00053711)
##
## Null deviance: 0.14560 on 402 degrees of freedom
## Residual deviance: 0.13659 on 395 degrees of freedom
## AIC: 4904.7
##
## Number of Fisher Scoring iterations: 7
# # predict price
predict_selected<-predict(m_plex, selected%>%as.data.frame(), se.fit=T)
print("prediction and 95% confidence intervals:")
## [1] "prediction and 95% confidence intervals:"
print(ilink(predict_selected$fit))
## 1
## 322.9749
print(ilink(predict_selected$se.fit))
## [1] 1.043278
print(c(ilink(predict_selected$fit-2*predict_selected$se.fit),
ilink(predict_selected$fit),
ilink(predict_selected$fit+2*predict_selected$se.fit))*selected$area_interior)
## 1 1 1
## 523820.3 570140.9 620557.6