---
title: "clidatajp"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{clidatajp}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
# Japan Meteorological Agency ('JMA') web page
'JMA' web page consists of some layers.
You can use different function for download each component.
- Area select: download_area_links()
- Country select: download_links()
- Station select: download_links()
- Climate data of a station: download_climate()
# Links for climate data and station information
I recommend to use existing data,
which are already downloaded and cleaned up.
When you want to download links for climate data,
use download_area_links() and download_links().
download_area_links() returns links for 6 areas.
download_links() returns links for countries and stations.
For polite scraping, 5 sec interval is set in download_links(),
it takes about 15 minutes to get all station links.
Please use existing links by "data(station_links)",
if you do not need to renew links.
```{r setup}
library(clidatajp)
library(magrittr)
library(dplyr)
library(tibble)
library(ggplot2)
library(stringi)
```
```{r station_links, eval = FALSE}
# existing data
data(station_links)
station_links %>%
dplyr::mutate("station" := stringi::stri_unescape_unicode(station)) %>%
print() %>%
`$`("station") %>%
clean_station() %>%
dplyr::bind_cols(station_links["url"])
# Download new data
# If you want links for all countries and all sations, remove head().
url <- "https://www.data.jma.go.jp/gmd/cpd/monitor/nrmlist/"
res <- gracefully_fail(url)
if(!is.null(res)){
area_links <- download_area_links()
station_links <- NULL
area_links <- head(area_links) # for test
for(i in seq_along(area_links)){
print(stringr::str_c("area: ", i, " / ", length(area_links)))
country_links <- download_links(area_links[i])
country_links <- head(country_links) # for test
for(j in seq_along(country_links)){
print(stringr::str_c(" country: ", j, " / ", length(country_links)))
station_links <- c(station_links, download_links(country_links[j]))
}
}
station_links <- tibble::tibble(url = station_links)
station_links
}
```
# Climate data
I recommend to use existing data,
which are already downloaded and cleaned up.
When you want to know how to prepare "data(climate_jp)",
please check url shown below.
https://github.com/matutosi/clidatajp/blob/main/data-raw/climate_jp.R
For polite scraping, 5 sec interval is set in download_climate(),
it takes over 5 hours to get world climate data of all stations
because of huge amount of data (3444 stations).
Please use existing data by "data(climate_world)",
if you do not need to renew climate data.
```{r climate_data, eval = FALSE}
# existing data
data(climate_jp)
climate_jp %>%
dplyr::mutate_if(is.character, stringi::stri_unescape_unicode)
data(climate_world)
climate_world %>%
dplyr::mutate_if(is.character, stringi::stri_unescape_unicode)
# Download new data
# If you want links for all countries and all sations, remove head().
url <- "https://www.data.jma.go.jp/gmd/cpd/monitor/nrmlist/"
res <- gracefully_fail(url)
if(!is.null(res)){
station_links <-
station_links %>%
head() %>%
`$`("url")
climate <- list()
for(i in seq_along(station_links)){
print(stringr::str_c(i, " / ", length(station_links)))
climate[[i]] <- download_climate(station_links[i])
}
world_climate <- dplyr::bind_rows(climate)
world_climate
}
```
## Plot
Clean up data before drawing plot.
```{r clean_data}
data(climate_world)
data(climate_jp)
climate <-
dplyr::bind_rows(climate_world, climate_jp) %>%
dplyr::mutate_if(is.character, stringi::stri_unescape_unicode) %>%
dplyr::group_by(country, station) %>%
dplyr::filter(sum(is.na(temperature), is.na(precipitation)) == 0) %>%
dplyr::filter(period != "1991-2020" | is.na(period))
climate <-
climate %>%
dplyr::summarise(temp = mean(as.numeric(temperature)), prec = sum(as.numeric(precipitation))) %>%
dplyr::left_join(dplyr::distinct(dplyr::select(climate, station:altitude))) %>%
dplyr::left_join(tibble::tibble(NS = c("S", "N"), ns = c(-1, 1))) %>%
dplyr::left_join(tibble::tibble(WE = c("W", "E"), we = c(-1, 1))) %>%
dplyr::group_by(station) %>%
dplyr::mutate(lat = latitude * ns, lon = longitude * we)
```
Draw a world map with temperature.
```{r temperature}
climate %>%
ggplot2::ggplot(aes(lon, lat, colour = temp)) +
scale_colour_gradient2(low = "blue", mid = "gray", high = "red", midpoint = 15) +
geom_point() +
coord_fixed() +
theme_bw() +
theme(legend.key.size = unit(0.3, 'cm'))
# ggsave("temperature.png")
```
Draw a world map with precipitation except over 5000 mm/yr (to avoid extended legend).
```{r precipitation}
climate %>%
dplyr::filter(prec < 5000) %>%
ggplot2::ggplot(aes(lon, lat, colour = prec)) +
scale_colour_gradient2(low = "yellow", mid = "gray", high = "blue", midpoint = 1500) +
geom_point() +
coord_fixed() +
theme_bw() +
theme(legend.key.size = unit(0.3, 'cm'))
# ggsave("precipitation.png")
```
Show relationships between temperature and precipitation except Japan.
```{r except_japan}
japan <- stringi::stri_unescape_unicode("\\u65e5\\u672c")
climate %>%
dplyr::filter(country != japan) %>%
ggplot2::ggplot(aes(temp, prec)) +
geom_point() +
theme_bw() +
theme(legend.position="none")
# ggsave("climate_nojp.png")
```
Show relationships between temperature and precipitation including Japan.
```{r all_data}
climate %>%
ggplot2::ggplot(aes(temp, prec)) +
geom_point() +
theme_bw()
# ggsave("climate_all.png")
```
Show relationships between temperature and precipitation.
Blue: Japan, red: others.
```{r compare_japan}
climate %>%
dplyr::mutate(jp = (country == japan)) %>%
ggplot2::ggplot(aes(temp, prec, colour = jp)) +
geom_point() +
theme_bw() +
theme(legend.position="none")
# ggsave("climate_compare_jp.png")
```