xml_parsing_via_api
This is an old revision of the document!
searchPeople <- "https://kobis.or.kr/kobisopenapi/webservice/rest/people/searchPeopleList.xml" KEY <- 'e95ca8d1202a4ffe248c09f1e1268cae' name <- "크리스토퍼놀란" name <- iconv(name, to="utf8") url <-URLencode(iconv(searchPeople, to="utf8")) url spRes <- GET(url, query= list("key" = KEY, "peopleNm"= name)) spRes spParsed <- xmlParse(sres) spParsed xmlPeople <- getNodeSet(spParsed, "//people") xmlPeople xmlFilm <- getNodeSet(spParsed, "//people/filmoNames") xmlFilm filmCh <- xmlToDataFrame(xmlFilm, stringsAsFactors = F) filmCh library(tidyr) films <- filmCh films %>% mutate(text = strsplit(as.character(text), "\\|")) %>% unnest(text) fs <- mutate(films, text = strsplit(as.character(text), "\\|")) fs <- unnest(fs, text) fs str(fs) data.frame(fs) fs[1,] sFilms <- "https://kobis.or.kr/kobisopenapi/webservice/rest/movie/searchMovieList.xml" KEY <- 'e95ca8d1202a4ffe248c09f1e1268cae' dir <- "크리스토퍼놀란" dir <- iconv(dir, to="utf8") url <-URLencode(iconv(sFilms, to="utf8")) url sfRes <- GET(url, query= list("key" = KEY, "directorNm"= dir)) sfRes sfParsed <- xmlParse(sfRes) sfParsed xmlId <- getNodeSet(sfParsed, "//movie/movieCd") xmlId xmlF <- getNodeSet(sfParsed, "//movie/movieNm") xmlF filmId <- xmlToDataFrame(xmlId, stringsAsFactors = F) filmId filmN <- xmlToDataFrame(xmlF, stringsAsFactors = F) filmN str(filmN) fg <- data.frame(filmId, filmN) fg fg <- unnest(fg, text.1) fg sFd <- "http://www.kobis.or.kr/kobisopenapi/webservice/rest/movie/searchMovieInfo.xml" KEY <- 'e95ca8d1202a4ffe248c09f1e1268cae' filmId[1,] name <- fg$text[1] name # name <- iconv(name, to="utf8") url <-URLencode(iconv(sFd, to="utf8")) url sfdr <- GET(url, query= list("key" = KEY, "movieCd"= name)) sfdr sfdrP <- xmlParse(sfdr) sfdrP xmlA <- getNodeSet(sfdrP, "//actors/actor/peopleNm") xmlA actor <- xmlToDataFrame(xmlA, stringsAsFactors = F) actor dir mv <- fg$text.1[1] dtoa <- data.frame(dir,actor, name, mv) dtoa write.csv(dtoa, file="fstdy.csv", fileEncoding = "euc-kr")
xml_parsing_via_api.1732757423.txt.gz · Last modified: 2024/11/28 10:30 by hkimscil