-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
2 changed files
with
233 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
# This script analyses housing data for Luxembourg | ||
library(dplyr) | ||
library(ggplot2) | ||
library(tidyr) | ||
|
||
#Let’s load the datasets: | ||
|
||
commune_level_data <- read.csv("datasets/commune_level_data.csv") | ||
country_level_data <- read.csv("datasets/country_level_data.csv") | ||
|
||
#Let’s compute the Laspeyeres index for each commune: | ||
|
||
commune_level_data <- commune_level_data |> | ||
group_by(locality) |> | ||
mutate(p0 = ifelse(year == "2010", average_price_nominal_euros, NA)) |> | ||
fill(p0, .direction = "down") |> | ||
mutate(p0_m2 = ifelse(year == "2010", average_price_m2_nominal_euros, NA)) |> | ||
fill(p0_m2, .direction = "down") |> | ||
ungroup() |> | ||
mutate(pl = average_price_nominal_euros/p0*100, | ||
pl_m2 = average_price_m2_nominal_euros/p0_m2*100) | ||
|
||
|
||
#Let’s also compute it for the whole country: | ||
|
||
country_level_data <- country_level_data |> | ||
mutate(p0 = ifelse(year == "2010", average_price_nominal_euros, NA)) |> | ||
fill(p0, .direction = "down") |> | ||
mutate(p0_m2 = ifelse(year == "2010", average_price_m2_nominal_euros, NA)) |> | ||
fill(p0_m2, .direction = "down") |> | ||
mutate(pl = average_price_nominal_euros/p0*100, | ||
pl_m2 = average_price_m2_nominal_euros/p0_m2*100) | ||
|
||
|
||
#We are going to create a plot for 5 communes and compare the price evolution in the communes | ||
#to the national price evolution. Let’s first list the communes: | ||
|
||
communes <- c("Luxembourg", | ||
"Esch-sur-Alzette", | ||
"Mamer", | ||
"Schengen", | ||
"Wincrange") | ||
|
||
make_plot <- function(country_level_data, | ||
commune_level_data, | ||
commune){ | ||
|
||
filtered_data <- commune_level_data %>% | ||
filter(locality == commune) | ||
|
||
data_to_plot <- bind_rows( | ||
country_level_data, | ||
filtered_data | ||
) | ||
|
||
ggplot(data_to_plot) + | ||
geom_line(aes(y = pl_m2, | ||
x = year, | ||
group = locality, | ||
colour = locality)) | ||
} | ||
|
||
# Luxembourg | ||
|
||
lux_plot <- make_plot(country_level_data, | ||
commune_level_data, | ||
communes[1]) | ||
# Esch sur Alzette | ||
|
||
esch_plot <- make_plot(country_level_data, | ||
commune_level_data, | ||
communes[2]) | ||
|
||
# Mamer | ||
|
||
mamer_plot <- make_plot(country_level_data, | ||
commune_level_data, | ||
communes[3]) | ||
|
||
# Schengen | ||
|
||
schengen_plot <- make_plot(country_level_data, | ||
commune_level_data, | ||
communes[4]) | ||
|
||
# Wincrange | ||
|
||
wincrange_plot <- make_plot(country_level_data, | ||
commune_level_data, | ||
communes[5]) | ||
|
||
# Let’s save the plots | ||
ggsave("plots/lux_plot.pdf", lux_plot) | ||
ggsave("plots/esch_plot.pdf", esch_plot) | ||
ggsave("plots/mamer_plot.pdf", mamer_plot) | ||
ggsave("plots/schengen_plot.pdf", schenger_plot) | ||
ggsave("plots/wincrange_plot.pdf", wincrange_plot) |
136 changes: 136 additions & 0 deletions
136
Reproducible_pipelines_book_club/Meeting_05/save_data.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
# Download raw Excel | ||
|
||
url <- "https://github.com/b-rodrigues/rap4all/raw/master/datasets/vente-maison-2010-2021.xlsx" | ||
|
||
# Shortened url | ||
#url <- "https://is.gd/1vvBAc" | ||
|
||
raw_data <- tempfile(fileext = ".xlsx") | ||
|
||
download.file(url, raw_data, method = "auto", mode = "wb") | ||
|
||
sheets <- excel_sheets(raw_data) | ||
|
||
read_clean <- function(..., sheet){ | ||
read_excel(..., sheet = sheet) |> | ||
mutate(year = sheet) | ||
} | ||
|
||
raw_data <- map( | ||
sheets, | ||
~read_clean(raw_data, | ||
skip = 10, | ||
sheet = .) | ||
) |> | ||
bind_rows() |> | ||
clean_names() | ||
|
||
raw_data <- raw_data |> | ||
rename( | ||
locality = commune, | ||
n_offers = nombre_doffres, | ||
average_price_nominal_euros = prix_moyen_annonce_en_courant, | ||
average_price_m2_nominal_euros = prix_moyen_annonce_au_m2_en_courant, | ||
average_price_m2_nominal_euros = prix_moyen_annonce_au_m2_en_courant | ||
) |> | ||
mutate(locality = str_trim(locality)) |> | ||
select(year, locality, n_offers, starts_with("average")) | ||
|
||
|
||
str(raw_data) | ||
|
||
# Let's take a look at the spelling | ||
raw_data |> | ||
dplyr::filter(grepl("Luxembourg", locality)) |> | ||
dplyr::count(locality) | ||
|
||
|
||
raw_data |> | ||
dplyr::filter(grepl("P.tange", locality)) |> | ||
dplyr::count(locality) | ||
|
||
|
||
# Deal with spelling | ||
|
||
raw_data <- raw_data |> | ||
mutate(locality = ifelse(grepl("Luxembourg-Ville", locality), | ||
"Luxembourg", | ||
locality), | ||
locality = ifelse(grepl("P.tange", locality), | ||
"Pétange", | ||
locality) | ||
) |> | ||
mutate(across(starts_with("average"), as.numeric)) | ||
|
||
# Check if missing data | ||
raw_data |> | ||
filter(is.na(average_price_nominal_euros)) | ||
|
||
|
||
# Remove rows citing the source | ||
raw_data <- raw_data |> | ||
filter(!grepl("Source", locality)) | ||
|
||
#Keep commune level data | ||
commune_level_data <- raw_data |> | ||
filter(!grepl("nationale|offres", locality), | ||
!is.na(locality)) | ||
|
||
# Keep country level data | ||
country_level <- raw_data |> | ||
filter(grepl("nationale", locality)) |> | ||
select(-n_offers) | ||
|
||
offers_country <- raw_data |> | ||
filter(grepl("Total d.offres", locality)) |> | ||
select(year, n_offers) | ||
|
||
country_level_data <- full_join(country_level, offers_country) |> | ||
select(year, locality, n_offers, everything()) |> | ||
mutate(locality = "Grand-Duchy of Luxembourg") | ||
|
||
|
||
# We need to check if communes are all in our data | ||
current_communes <- "https://is.gd/lux_communes" |> | ||
rvest::read_html() |> | ||
rvest::html_table() |> | ||
purrr::pluck(2) |> | ||
janitor::clean_names() |> | ||
dplyr::filter(name_2 != "Name") |> | ||
dplyr::rename(commune = name_2) |> | ||
dplyr::mutate(commune = stringr::str_remove(commune, " .$")) | ||
|
||
# Test if all communes are there | ||
setdiff(unique(commune_level_data$locality), current_communes$commune) | ||
|
||
# We need former communes | ||
former_communes <- "https://is.gd/lux_former_communes" |> | ||
rvest::read_html() |> | ||
rvest::html_table() |> | ||
purrr::pluck(3) |> | ||
janitor::clean_names() |> | ||
dplyr::filter(year_dissolved > 2009) | ||
|
||
former_communes | ||
|
||
# Put former and current communes together | ||
communes <- unique(c(former_communes$name, current_communes$commune)) | ||
# we need to rename some communes | ||
|
||
# Different spelling of these communes between wikipedia and the data | ||
|
||
communes[which(communes == "Clemency")] <- "Clémency" | ||
communes[which(communes == "Redange")] <- "Redange-sur-Attert" | ||
communes[which(communes == "Erpeldange-sur-Sûre")] <- "Erpeldange" | ||
communes[which(communes == "Luxembourg City")] <- "Luxembourg" | ||
communes[which(communes == "Käerjeng")] <- "Kaerjeng" | ||
communes[which(communes == "Petange")] <- "Pétange" | ||
|
||
|
||
# Test if this set is empty, if yes, we're good | ||
setdiff(unique(commune_level_data$locality), communes) | ||
|
||
# save the data (uncomment if you need to save) | ||
# you may need to create the `datasets` folder first | ||
write.csv(commune_level_data, "datasets/commune_level_data.csv", row.names = TRUE) | ||
write.csv(country_level_data, "datasets/country_level_data.csv", row.names = TRUE) |