--- title: "clidatajp" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{clidatajp} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # Japan Meteorological Agency ('JMA') web page 'JMA' web page consists of some layers. You can use different function for download each component. - Area select: download_area_links() - Country select: download_links() - Station select: download_links() - Climate data of a station: download_climate() # Links for climate data and station information I recommend to use existing data, which are already downloaded and cleaned up. When you want to download links for climate data, use download_area_links() and download_links(). download_area_links() returns links for 6 areas. download_links() returns links for countries and stations. For polite scraping, 5 sec interval is set in download_links(), it takes about 15 minutes to get all station links. Please use existing links by "data(station_links)", if you do not need to renew links. ```{r setup} library(clidatajp) library(magrittr) library(dplyr) library(tibble) library(ggplot2) library(stringi) ``` ```{r station_links, eval = FALSE} # existing data data(station_links) station_links %>% dplyr::mutate("station" := stringi::stri_unescape_unicode(station)) %>% print() %>% `$`("station") %>% clean_station() %>% dplyr::bind_cols(station_links["url"]) # Download new data # If you want links for all countries and all sations, remove head(). url <- "https://www.data.jma.go.jp/gmd/cpd/monitor/nrmlist/" res <- gracefully_fail(url) if(!is.null(res)){ area_links <- download_area_links() station_links <- NULL area_links <- head(area_links) # for test for(i in seq_along(area_links)){ print(stringr::str_c("area: ", i, " / ", length(area_links))) country_links <- download_links(area_links[i]) country_links <- head(country_links) # for test for(j in seq_along(country_links)){ print(stringr::str_c(" country: ", j, " / ", length(country_links))) station_links <- c(station_links, download_links(country_links[j])) } } station_links <- tibble::tibble(url = station_links) station_links } ``` # Climate data I recommend to use existing data, which are already downloaded and cleaned up. When you want to know how to prepare "data(climate_jp)", please check url shown below. https://github.com/matutosi/clidatajp/blob/main/data-raw/climate_jp.R For polite scraping, 5 sec interval is set in download_climate(), it takes over 5 hours to get world climate data of all stations because of huge amount of data (3444 stations). Please use existing data by "data(climate_world)", if you do not need to renew climate data. ```{r climate_data, eval = FALSE} # existing data data(climate_jp) climate_jp %>% dplyr::mutate_if(is.character, stringi::stri_unescape_unicode) data(climate_world) climate_world %>% dplyr::mutate_if(is.character, stringi::stri_unescape_unicode) # Download new data # If you want links for all countries and all sations, remove head(). url <- "https://www.data.jma.go.jp/gmd/cpd/monitor/nrmlist/" res <- gracefully_fail(url) if(!is.null(res)){ station_links <- station_links %>% head() %>% `$`("url") climate <- list() for(i in seq_along(station_links)){ print(stringr::str_c(i, " / ", length(station_links))) climate[[i]] <- download_climate(station_links[i]) } world_climate <- dplyr::bind_rows(climate) world_climate } ``` ## Plot Clean up data before drawing plot. ```{r clean_data} data(climate_world) data(climate_jp) climate <- dplyr::bind_rows(climate_world, climate_jp) %>% dplyr::mutate_if(is.character, stringi::stri_unescape_unicode) %>% dplyr::group_by(country, station) %>% dplyr::filter(sum(is.na(temperature), is.na(precipitation)) == 0) %>% dplyr::filter(period != "1991-2020" | is.na(period)) climate <- climate %>% dplyr::summarise(temp = mean(as.numeric(temperature)), prec = sum(as.numeric(precipitation))) %>% dplyr::left_join(dplyr::distinct(dplyr::select(climate, station:altitude))) %>% dplyr::left_join(tibble::tibble(NS = c("S", "N"), ns = c(-1, 1))) %>% dplyr::left_join(tibble::tibble(WE = c("W", "E"), we = c(-1, 1))) %>% dplyr::group_by(station) %>% dplyr::mutate(lat = latitude * ns, lon = longitude * we) ``` Draw a world map with temperature. ```{r temperature} climate %>% ggplot2::ggplot(aes(lon, lat, colour = temp)) + scale_colour_gradient2(low = "blue", mid = "gray", high = "red", midpoint = 15) + geom_point() + coord_fixed() + theme_bw() + theme(legend.key.size = unit(0.3, 'cm')) # ggsave("temperature.png") ``` Draw a world map with precipitation except over 5000 mm/yr (to avoid extended legend). ```{r precipitation} climate %>% dplyr::filter(prec < 5000) %>% ggplot2::ggplot(aes(lon, lat, colour = prec)) + scale_colour_gradient2(low = "yellow", mid = "gray", high = "blue", midpoint = 1500) + geom_point() + coord_fixed() + theme_bw() + theme(legend.key.size = unit(0.3, 'cm')) # ggsave("precipitation.png") ``` Show relationships between temperature and precipitation except Japan. ```{r except_japan} japan <- stringi::stri_unescape_unicode("\\u65e5\\u672c") climate %>% dplyr::filter(country != japan) %>% ggplot2::ggplot(aes(temp, prec)) + geom_point() + theme_bw() + theme(legend.position="none") # ggsave("climate_nojp.png") ``` Show relationships between temperature and precipitation including Japan. ```{r all_data} climate %>% ggplot2::ggplot(aes(temp, prec)) + geom_point() + theme_bw() # ggsave("climate_all.png") ``` Show relationships between temperature and precipitation. Blue: Japan, red: others. ```{r compare_japan} climate %>% dplyr::mutate(jp = (country == japan)) %>% ggplot2::ggplot(aes(temp, prec, colour = jp)) + geom_point() + theme_bw() + theme(legend.position="none") # ggsave("climate_compare_jp.png") ```