--- title: "DBnomics R client" author: "Sébastien Galais^[Banque de France, [https://github.com/s915](https://github.com/s915)], Thomas Brand^[CEPREMAP]" output: html_document: highlight: default theme: simplex smart: false toc: true toc_float: true number_sections: true rmarkdown::html_vignette: highlight: default theme: simplex smart: false toc: true toc_float: true number_sections: true vignette: > %\VignetteIndexEntry{DBnomics R client} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- # DBnomics: the world's economic database Explore all the economic data from different providers (national and international statistical institutes, central banks, etc.), for free, following the link [db.nomics.world](https://db.nomics.world) (*N.B.: in the examples, data have already been retrieved on april 6th 2020*). [![](dbnomics001.png)](https://db.nomics.world) # Fetch time series by `ids` First, let's assume that we know which series we want to download. A series identifier (`ids`) is defined by three values, formatted like this: `provider_code`/`dataset_code`/`series_code`. ## Fetch one series from dataset 'Unemployment rate' (ZUTN) of AMECO provider ```{r, echo = FALSE} library <- function(...) { suppressWarnings( suppressPackageStartupMessages(base::library(..., quietly = TRUE)) ) } ``` ```{r} library(data.table) library(rdbnomics) ``` ```{r, echo = FALSE} reorder_cols <- function(x) { data.table::setDT(x) cols <- c( "provider_code", "dataset_code", "dataset_name", "series_code", "series_name", "original_period", "period", "original_value", "value", "@frequency" ) if ("unit" %in% colnames(x)) { cols <- c(cols, "unit", "Unit") } if ("geo" %in% colnames(x)) { cols <- c(cols, "geo", "Country") } if ("freq" %in% colnames(x)) { cols <- c(cols, "freq", "Frequency") } cols_add <- setdiff(colnames(x), cols) cols <- c(cols, cols_add) cols <- cols[cols %in% colnames(x)] cols <- match(cols, colnames(x)) x[, .SD, .SDcols = cols] } knitr::opts_chunk$set(dev.args = list(bg = "transparent")) display_table <- function(DT) { DT <- head(DT) DT <- as.data.table( lapply(DT, function(x) { if (is.character(x)) { ifelse( nchar(x) > 16, paste0(substr(x, 1, 16), "..."), x ) } else { x } }) ) DT[] } ``` ```{r, eval = FALSE} df <- rdb(ids = "AMECO/ZUTN/EA19.1.0.0.0.ZUTN") df <- df[!is.na(value)] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df001 data.table::setDT(df) ``` In such `data.table`, you will always find at least ten columns: - `provider_code` - `dataset_code` - `dataset_name` - `series_code` - `series_name` - `original_period` (character string) - `period` (date of the first day of `original_period`) - `original_value` (character string) - `value` - `@frequency` (harmonized frequency generated by DBnomics) The other columns depend on the provider and on the dataset. They always come in pairs (for the code and the name). In the data.frame `df`, you have: - `unit` (code) and `Unit` (name) - `geo` (code) and `Country` (name) - `freq` (code) and `Frequency` (name) ```{r, echo = FALSE} df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 0.5, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) legend( "bottomleft", inset = 0.005, legend = sort(unique(df$series_name)), col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` In the event that you only use the argument `ids`, you can drop it and run: ```{r, eval = FALSE} df <- rdb("AMECO/ZUTN/EA19.1.0.0.0.ZUTN") ``` ## Fetch two series from dataset 'Unemployment rate' (ZUTN) of AMECO provider ```{r, eval = FALSE} df <- rdb(ids = c("AMECO/ZUTN/EA19.1.0.0.0.ZUTN", "AMECO/ZUTN/DNK.1.0.0.0.ZUTN")) df <- df[!is.na(value)] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df002 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- df[order(series_code, period)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 1.7, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "l") points(x2, y2, col = cols[2], pch = PCH) legend( "bottomleft", inset = 0.005, legend = sort(unique(df$series_name)), col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` ## Fetch two series from different datasets of different providers ```{r, eval = FALSE} df <- rdb(ids = c("AMECO/ZUTN/EA19.1.0.0.0.ZUTN", "Eurostat/une_rt_q/Q.SA.Y15-24.PC_ACT.T.EA19")) df <- df[!is.na(value)] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df003 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- df[order(series_code, period)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics(legend.text = element_text(size = 7)) i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 legend_text <- sort(unique(df$series_name)) legend_text[2] <- sapply( legend_text[2], function(y) { paste0( paste0( strsplit(y, "active ")[[1]], collapse = "active\n" ), "\n" ) } ) plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 1.5, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "l") points(x2, y2, col = cols[2], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` # Fetch time series by `mask` The code mask notation is a very concise way to select one or many time series at once. ## Fetch one series from dataset 'Balance of Payments' (BOP) of IMF ```{r, eval = FALSE} df <- rdb("IMF", "BOP", mask = "A.FR.BCA_BP6_EUR") df <- df[!is.na(value)] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df004 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_step(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "s", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value), max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` In the event that you only use the arguments `provider_code`, `dataset_code` and `mask`, you can drop the name `mask` and run: ```{r, eval = FALSE} df <- rdb("IMF", "BOP", "A.FR.BCA_BP6_EUR") ``` ## Fetch two series from dataset 'Balance of Payments' (BOP) of IMF You just have to add a `+` between two different values of a dimension. ```{r, eval = FALSE} df <- rdb("IMF", "BOP", mask = "A.FR+ES.BCA_BP6_EUR") df <- df[!is.na(value)] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df005 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- df[order(series_code, period)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_step(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "s", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 2*10^4, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "s") points(x2, y2, col = cols[2], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` ## Fetch all series along one dimension from dataset 'Balance of Payments' (BOP) of IMF ```{r, eval = FALSE} df <- rdb("IMF", "BOP", mask = "A..BCA_BP6_EUR") df <- df[!is.na(value)] df <- df[order(-period, REF_AREA)] df <- head(df, 100) ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df006 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- reorder_cols(df) display_table(df) ``` ## Fetch series along multiple dimensions from dataset 'Balance of Payments' (BOP) of IMF ```{r, eval = FALSE} df <- rdb("IMF", "BOP", mask = "A.FR.BCA_BP6_EUR+IA_BP6_EUR") df <- df[!is.na(value)] df <- df[order(period), head(.SD, 50), by = INDICATOR] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df007 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- reorder_cols(df) display_table(df) ``` # Fetch time series by `dimensions` Searching by `dimensions` is a less concise way to select time series than using the code `mask`, but it works with all the different providers. You have a "*Description of series code*" at the bottom of each dataset page on the [DBnomics website](https://db.nomics.world). ## Fetch one value of one dimension from dataset 'Unemployment rate' (ZUTN) of AMECO provider ```{r, eval = FALSE} df <- rdb("AMECO", "ZUTN", dimensions = list(geo = "ea19")) df <- df[!is.na(value))] # or # df <- rdb("AMECO", "ZUTN", dimensions = '{"geo": ["ea19"]}') # df <- df[!is.na(value))] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df008 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 0.2, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` ## Fetch two values of one dimension from dataset 'Unemployment rate' (ZUTN) of AMECO provider ```{r, eval = FALSE} df <- rdb("AMECO", "ZUTN", dimensions = list(geo = c("ea19", "dnk"))) df <- df[!is.na(value))] # or # df <- rdb("AMECO", "ZUTN", dimensions = '{"geo": ["ea19", "dnk"]}') # df <- df[!is.na(value))] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df009 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- df[order(series_code, period)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 1.2, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "l") points(x2, y2, col = cols[2], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` ## Fetch several values of several dimensions from dataset 'Doing business' (DB) of World Bank ```{r, eval = FALSE} df <- rdb("WB", "DB", dimensions = list(country = c("DZ", "PE"), indicator = c("ENF.CONT.COEN.COST.ZS", "IC.REG.COST.PC.FE.ZS"))) df <- df[!is.na(value))] # or # df <- rdb("WB", "DB", dimensions = '{"country": ["DZ", "PE"], "indicator": ["ENF.CONT.COEN.COST.ZS", "IC.REG.COST.PC.FE.ZS"]}') # df <- df[!is.na(value))] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df010 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- df[order(series_name, period)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value i <- 3 x3 <- df[series_name == sort(unique(series_name))[i]]$period y3 <- df[series_name == sort(unique(series_name))[i]]$value i <- 4 x4 <- df[series_name == sort(unique(series_name))[i]]$period y4 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen", "purple") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 7, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "l") points(x2, y2, col = cols[2], pch = PCH) lines(x3, y3, col = cols[3], type = "l") points(x3, y3, col = cols[3], pch = PCH) lines(x4, y4, col = cols[4], type = "l") points(x4, y4, col = cols[4], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` # Fetch time series with a `query` The query is a Google-like search that will filter/select time series from a provider's dataset. ## Fetch one series from dataset 'WEO by countries (2019-10 release)' (WEO:2019-10) of IMF ```{r, eval = FALSE} df <- rdb("IMF", "WEO:2019-10", query = "France current account balance percent") df <- df[!is.na(value))] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df014 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen", "purple") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 0.5, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` ## Fetch series from dataset 'WEO by countries (2019-10 release)' (WEO:2019-10) of IMF ```{r, eval = FALSE} df <- rdb("IMF", "WEO:2019-10", query = "current account balance percent") df <- df[!is.na(value))] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df015 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = `WEO Country`)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # ggtitle("Current account balance (% GDP)") + # dbnomics(legend.direction = "horizontal") i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value i <- 3 x3 <- df[series_name == sort(unique(series_name))[i]]$period y3 <- df[series_name == sort(unique(series_name))[i]]$value i <- 4 x4 <- df[series_name == sort(unique(series_name))[i]]$period y4 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen", "purple") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value), max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "l") points(x2, y2, col = cols[2], pch = PCH) lines(x3, y3, col = cols[3], type = "l") points(x3, y3, col = cols[3], pch = PCH) lines(x4, y4, col = cols[4], type = "l") points(x4, y4, col = cols[4], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` # Fetch time series found on the web site When you don't know the codes of the dimensions, provider, dataset or series, you can: - go to the page of a dataset on [DBnomics website](https://db.nomics.world), for example [Doing Business](https://db.nomics.world/WB/DB), - select some dimensions by using the input widgets of the left column, ![](dbnomics002.png) - click on "*Copy API link*" in the menu of the "*Download*" button, ![](dbnomics003.png) - use the `rdb(api_link = ...)` function such as below. ```{r, eval = FALSE} df <- rdb(api_link = "https://api.db.nomics.world/v22/series/WB/DB?dimensions=%7B%22country%22%3A%5B%22FR%22%2C%22IT%22%2C%22ES%22%5D%7D&q=IC.REG.PROC.FE.NO&observations=1&format=json&align_periods=1&offset=0&facets=0") df <- df[!is.na(value))] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df011 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- df[order(period, series_name)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_step(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value i <- 3 x3 <- df[series_name == sort(unique(series_name))[i]]$period y3 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 plot( x1, y1, col = cols[1], type = "s", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 1.2, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "s") points(x2, y2, col = cols[2], pch = PCH) lines(x3, y3, col = cols[3], type = "s") points(x3, y3, col = cols[3], pch = PCH) legend( "bottomleft", inset = 0.005, legend = sort(unique(df$series_name)), col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` In the event that you only use the argument `api_link`, you can drop the name and run: ```{r, eval = FALSE} df <- rdb("https://api.db.nomics.world/v22/series/WB/DB?dimensions=%7B%22country%22%3A%5B%22FR%22%2C%22IT%22%2C%22ES%22%5D%7D&q=IC.REG.PROC.FE.NO&observations=1&format=json&align_periods=1&offset=0&facets=0") ``` # Fetch time series from the cart On the cart page of the [DBnomics website](https://db.nomics.world), click on "*Copy API link*" and copy-paste it as an argument of the `rdb(api_link = ...)` function. Please note that when you update your cart, you have to copy this link again, because the link itself contains the ids of the series in the cart.
![](dbnomics005.png)
```{r, eval = FALSE} df <- rdb(api_link = "https://api.db.nomics.world/v22/series?observations=1&series_ids=BOE/6008/RPMTDDC,BOE/6231/RPMTBVE") df <- df[!is.na(value))] ``` ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df012 data.table::setDT(df) ``` ```{r, echo = FALSE} df[ , series_name := sapply( series_name, function(y) { paste0( paste0( strsplit(y, "institutions' ")[[1]], collapse = "institutions'\n" ), "\n" ) } ) ] ``` ```{r, echo = FALSE} df <- df[order(period, series_name)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df, aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen") PCH <- 18 plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 4*10^3, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "l") points(x2, y2, col = cols[2], pch = PCH) legend( "bottomleft", inset = 0.005, legend = sort(unique(df$series_name)), col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ``` # Fetch the available datasets of a provider When fetching series from [DBnomics](https://db.nomics.world), you need to give a provider and a dataset before specifying correct dimensions. With the function `rdb_datasets`, you can download the list of the available datasets for a provider. For example, to fetch the **IMF** datasets, you have to use: ```{r, eval = FALSE} rdb_datasets(provider_code = "IMF") ``` The result is a named list (its name is **IMF**) with one element which is a `data.table`: ```{r, eval = TRUE, echo = FALSE} str(rdbnomics:::rdbnomics_df016) ``` With the same function, if you want to fetch the available datasets for multiple providers, you can give a vector of providers and get a named list. ```{r, eval = FALSE} rdb_datasets(provider_code = c("IMF", "BDF")) ``` ```{r, eval = TRUE, echo = FALSE} str(rdbnomics:::rdbnomics_df018) ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df018 DT <- sapply(DT, function(y) { paste0(": ", nrow(y)) }) DT <- paste0("Number of datasets for ", names(DT), " ", unname(DT)) cat(DT, sep = "\n") ``` In the event that you only request the datasets for one provider, if you define `simplify = TRUE`, then the result will be a `data.table` not a named list. ```{r, eval = FALSE} rdb_datasets(provider_code = "IMF", simplify = TRUE) ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df017 data.table::setDT(DT) display_table(DT) ``` The extent of datasets gathered by [DBnomics](https://db.nomics.world) can be appreciate by using the function with the argument `provider_code` set to `NULL`: ```{r, eval = FALSE} options(rdbnomics.progress_bar_datasets = TRUE) rdb_datasets() options(rdbnomics.progress_bar_datasets = FALSE) ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df019 DT <- data.table(Provider = names(DT), `Number of datasets` = sapply(DT, nrow)) DT <- DT[order(Provider)] display_table(DT) ``` # Fetch the possible dimensions of available datasets of a provider When fetching series from [DBnomics](https://db.nomics.world), it can be interesting and especially useful to specify dimensions for a particular dataset to download only the series you want to analyse. With the function `rdb_dimensions`, you can download these dimensions and their meanings. For example, for the dataset **WEO:2019-10** of the **IMF**, you may use: ```{r, eval = FALSE} rdb_dimensions(provider_code = "IMF", dataset_code = "WEO:2019-10") ``` The result is a nested named list (its names are **IMF**, **WEO:2019-10** and the dimensions names) with a `data.table` at the end of each branch: ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df020 DT <- DT$IMF$`WEO:2019-10` DT <- paste0("Number of dimensions for IMF/WEO:2019-10 : ", length(DT)) cat(DT, sep = "\n") ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df020 DT <- DT$IMF$`WEO:2019-10`[[1]] display_table(DT) ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df020 DT <- DT$IMF$`WEO:2019-10`[[2]] display_table(DT) ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df020 DT <- DT$IMF$`WEO:2019-10`[[3]] display_table(DT) ``` In the event that you only request the dimensions for one dataset for one provider, if you define `simplify = TRUE`, then the result will be a named list `data.table` not a nested named list. ```{r, eval = FALSE} rdb_dimensions(provider_code = "IMF", dataset_code = "WEO:2019-10", simplify = TRUE) ``` ```{r, eval = TRUE, echo = FALSE} str(rdbnomics:::rdbnomics_df021) ``` You can measure the vast extent of datasets gathered by [DBnomics](https://db.nomics.world) by downloading all the possible dimensions. To do this, you have to set the arguments `provider_code` and `dataset_code` to `NULL`. It's relatively long to run and heavy to show so we display the first 100. ```{r, eval = FALSE} options(rdbnomics.progress_bar_datasets = TRUE) rdb_dimensions() options(rdbnomics.progress_bar_datasets = FALSE) ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df022 DT <- DT[order(Provider, Dataset)] DT <- head(DT, 100) display_table(DT) # rdbnomics_df022 %>% # sapply(function(u) { # sapply( # u, # function(x) { # sapply( # x, # function(y) { # nrow(y) # }, # simplify = FALSE # ) %>% # { # data.table(Dimension = names(.), `Number of codes` = unname(.)) # } # }, # simplify = FALSE # ) %>% # rbindlist(idcol = "Dataset") # }, # simplify = FALSE # ) %>% # rbindlist(idcol = "Provider") ``` # Fetch the series codes and names of available datasets of a provider You can download the list of series, and especially their codes, of a dataset's provider by using the function `rdb_series`. The result is a nested named list with a `data.table` at the end of each branch. If you define `simplify = TRUE`, then the result will be a `data.table` not a nested named list. For example, for the **IMF** provider and the dataset **WEO:2019-10**, the command is (only first 100): ```{r, eval = FALSE} rdb_series(provider_code = "IMF", dataset_code = "WEO:2019-10", simplify = TRUE) ``` ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df023 DT <- head(DT, 100) display_table(DT) ``` Like the function `rdb()`, you can add features to `rdb_series()`. You can ask for the series with specific `dimensions`: ```{r, eval = FALSE} rdb_series(provider_code = "IMF", dataset_code = "WEO:2019-10", dimensions = list(`weo-subject` = "NGDP_RPCH"), simplify = TRUE) ``` or with a `query`: ```{r, eval = FALSE} rdb_series(provider_code = "IMF", dataset_code = c("WEO:2019-10", "WEOAGG:2019-10"), query = "NGDP_RPCH") ``` We ask the user to use this function parsimoniously because there are a huge amount of series per dataset. Please only fetch for one dataset if you need it or visit the website [https://db.nomics.world](https://db.nomics.world). For example, for the **IMF** provider, the number of series is (only first 5): ```{r, eval = TRUE, echo = FALSE} DT <- rdbnomics:::rdbnomics_df024 DT <- DT[order(-`Number of series`)] DT <- head(DT, 5) display_table(DT) ``` # Proxy configuration or connection error `Could not resolve host` When using the function `rdb`, you may come across the following error: ```{r, eval = FALSE} Error in open.connection(con, "rb") : Could not resolve host: api.db.nomics.world ``` To get round this situation, you have two options: 1. configure **curl** to use a specific and authorized proxy. 2. use the default R internet connection i.e. the Internet Explorer proxy defined in *internet2.dll*. ## Configure **curl** to use a specific and authorized proxy In **rdbnomics**, by default the function `curl_fetch_memory` (of the package **curl**) is used to fetch the data. If a specific proxy must be used, it is possible to define it permanently with the package option `rdbnomics.curl_config` or on the fly through the argument `curl_config`. Because the object is a named list, its elements are passed to the connection (the `curl_handle` object created internally with `new_handle()`) with `handle_setopt()` before using `curl_fetch_memory`. To see the available parameters, run `names(curl_options())` in *R* or visit the website https://curl.haxx.se/libcurl/c/curl_easy_setopt.html. Once they are chosen, you define the curl object as follows: ```{r, eval = FALSE} h <- list( proxy = "", proxyport = , proxyusername = "", proxypassword = "" ) ``` ### Set the connection up for a session The curl connection can be set up for a session by modifying the following package option: ```{r, eval = FALSE} options(rdbnomics.curl_config = h) ``` When fetching the data, the following command is executed: ```{r, eval = FALSE} hndl <- curl::new_handle() curl::handle_setopt(hndl, .list = getOption("rdbnomics.curl_config")) curl::curl_fetch_memory(url = <...>, handle = hndl) ``` After configuration, just use the standard functions of **rdbnomics** e.g.: ```{r, eval = FALSE} df1 <- rdb(ids = "AMECO/ZUTN/EA19.1.0.0.0.ZUTN") ``` This option of the package can be disabled with: ```{r, eval = FALSE} options(rdbnomics.curl = NULL) ``` ### Use the connection only for a function call If a complete configuration is not needed but just an "on the fly" execution, then use the argument `curl_config` of the function `rdb`: ```{r, eval = FALSE} df1 <- rdb(ids = "AMECO/ZUTN/EA19.1.0.0.0.ZUTN", curl_config = h) ``` ## Use the default R internet connection To retrieve the data with the default R internet connection, **rdbnomics** will use the base function `readLines`. ### Set the connection up for a session To activate this feature for a session, you need to enable an option of the package: ```{r, eval = FALSE} options(rdbnomics.use_readLines = TRUE) ``` And then use the standard function as follows: ```{r, eval = FALSE} df1 <- rdb(ids = "AMECO/ZUTN/EA19.1.0.0.0.ZUTN") ``` This configuration can be disabled with: ```{r, eval = FALSE} options(rdbnomics.use_readLines = FALSE) ``` ### Use the connection only for a function call If you just want to do it once, you may use the argument `use_readLines` of the function `rdb`: ```{r, eval = FALSE} df1 <- rdb(ids = "AMECO/ZUTN/EA19.1.0.0.0.ZUTN", use_readLines = TRUE) ``` # Transform time series with filters The **rdbnomics** package can interact with the *Time Series Editor* of DBnomics to transform time series by applying filters to them. Available filters are listed on the filters page [https://editor.nomics.world/filters](https://editor.nomics.world/filters). Here is an example of how to proceed to interpolate two annual time series with a monthly frequency, using a spline interpolation: ```{r, eval = FALSE} filters <- list( code = "interpolate", parameters = list(frequency = "monthly", method = "spline") ) ``` The request is then: ```{r, eval = FALSE} df <- rdb( ids = c("AMECO/ZUTN/EA19.1.0.0.0.ZUTN", "AMECO/ZUTN/DNK.1.0.0.0.ZUTN"), filters = filters ) ``` If you want to apply more than one filter, the `filters` argument will be a list of valid filters: ```{r, eval = FALSE} filters <- list( list( code = "interpolate", parameters = list(frequency = "monthly", method = "spline") ), list( code = "aggregate", parameters = list(frequency = "bi-annual", method = "end_of_period") ) ) df <- rdb( ids = c("AMECO/ZUTN/EA19.1.0.0.0.ZUTN", "AMECO/ZUTN/DNK.1.0.0.0.ZUTN"), filters = filters ) ``` The `data.table` columns change a little bit when filters are used. There are two new columns: - `period_middle_day`: the middle day of `original_period` (can be useful when you compare graphically interpolated series and original ones). - `filtered` (boolean): `TRUE` if the series is filtered, `FALSE` otherwise. The content of two columns are modified: - `series_code`: same as before for original series, but the suffix `_filtered` is added for filtered series. - `series_name`: same as before for original series, but the suffix ` (filtered)` is added for filtered series. ```{r, eval = TRUE, echo = FALSE} df <- rdbnomics:::rdbnomics_df013 data.table::setDT(df) ``` ```{r, echo = FALSE} df <- df[order(filtered, series_name, period)] df <- reorder_cols(df) display_table(df) ``` ```{r, echo = FALSE, fig.align = 'center'} # ggplot(df[!is.na(value)], aes(x = period, y = value, color = series_name)) + # geom_line(size = 1.2) + # geom_point(size = 2) + # dbnomics() df <- df[!is.na(value)] i <- 1 x1 <- df[series_name == sort(unique(series_name))[i]]$period y1 <- df[series_name == sort(unique(series_name))[i]]$value i <- 2 x2 <- df[series_name == sort(unique(series_name))[i]]$period y2 <- df[series_name == sort(unique(series_name))[i]]$value i <- 3 x3 <- df[series_name == sort(unique(series_name))[i]]$period y3 <- df[series_name == sort(unique(series_name))[i]]$value i <- 4 x4 <- df[series_name == sort(unique(series_name))[i]]$period y4 <- df[series_name == sort(unique(series_name))[i]]$value cols <- c("red", "blue", "darkgreen", "purple") PCH <- 18 legend_text <- sort(unique(df$series_name)) plot( x1, y1, col = cols[1], type = "l", xlab = "", ylab = "", xlim = c(min(df$period), max(df$period)), ylim = c(min(df$value) - 4, max(df$value)), panel.first = grid(lty = 1) ) points(x1, y1, col = cols[1], pch = PCH) lines(x2, y2, col = cols[2], type = "l") points(x2, y2, col = cols[2], pch = PCH) lines(x3, y3, col = cols[3], type = "l") points(x3, y3, col = cols[3], pch = PCH) lines(x4, y4, col = cols[4], type = "l") points(x4, y4, col = cols[4], pch = PCH) legend( "bottomleft", inset = 0.005, legend = legend_text, col = cols, lty = 1, pch = PCH, box.lty = 0, cex = 0.7 ) mtext( text = "DBnomics ", side = 3, col = "grey", font = 3, adj = 1 ) ```