CiNii(国立情報学研究所の論文DB)にAPIでアクセスする

fujimoto@sakushin-u.ac.jp

2018/10/21

参考リンク

library

library(rvest)
library(tidyverse)
library(patchwork)

functions

トークン生成

make_token <- function(URL,query_w,appID,start_y,end_y){
  token <- paste0(URL,"?","q=",query_w,"&appID=",appID,"&format=rss","&count=200",
               "&year_from=",start_y,"&year_to=",end_y)
  return(token)
}

取得結果をggplotでグラフ表示

plot_hit2 <- function(query_w,hit_year){
  tbl_df(hit_year) %>% rename(Year=year,Count=hits) %>% mutate(Cumsum=cumsum(Count)) -> .gd0

  p <- ggplot(.gd0)
  p <- p + 
    geom_point(aes(x= Year,y= Count),colour="blue") + geom_segment(aes(x=Year,y=Count,xend=Year,yend=0),colour="grey")
  p1 <- p + ggtitle(paste0("CiNiiで検索(hit数):",query_w)) + theme_grey(base_family = "sans")
  

  p <- ggplot(.gd0)
  p <- p + geom_line(aes(x=Year,y=Cumsum)) + geom_point(aes(x=Year,y=Cumsum))
  p2 <- p + ggtitle(paste0("CiNiiで検索(累積):",query_w)) + theme_grey(base_family = "sans")
  
  print(p2 + p1 + plot_layout(ncol = 1, heights = c(3, 1)))
}

Token の形成,resultの取得

基本設定

appID = "XXXXXXXXXXXXXXXXXXX"
URL = "http://ci.nii.ac.jp/opensearch/search"

年次をLoopさせてhit数を取得

query_w = "対応分析"#URLencode は不要
start <- 1980
end <- 2018

year_nn <- data.frame()
for(i in start:end){
  res.xml <- read_xml(make_token(URL,query_w,appID,i,i))
  xml_children(res.xml)[[1]] %>% as_list() -> res.header
  res.header$totalResults[[1]] %>% as.numeric() -> nn
  year_nn <- rbind(year_nn,c(i,nn))
}
colnames(year_nn) <- c("year","hits")
year_nn %>% mutate(hitcum=cumsum(hits)) %>% tail()
##    year hits hitcum
## 34 2013   26    262
## 35 2014   28    290
## 36 2015   27    317
## 37 2016   19    336
## 38 2017   19    355
## 39 2018   15    370

グラフで表示

「対応分析」でhitした論文数と累計

plot_hit2("対応分析",year_nn)