xml_parsing_via_api
Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
xml_parsing_via_api [2024/11/28 10:30] – created hkimscil | xml_parsing_via_api [2024/12/04 07:20] (current) – hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
< | < | ||
+ | library(XML) | ||
+ | library(httr) | ||
+ | library(tidyr) | ||
+ | library(tidyverse) | ||
+ | |||
searchPeople <- " | searchPeople <- " | ||
KEY <- ' | KEY <- ' | ||
Line 9: | Line 14: | ||
spRes <- GET(url, query= list(" | spRes <- GET(url, query= list(" | ||
spRes | spRes | ||
- | spParsed <- xmlParse(sres) | + | spParsed <- xmlParse(spRes) |
spParsed | spParsed | ||
xmlPeople <- getNodeSet(spParsed, | xmlPeople <- getNodeSet(spParsed, | ||
Line 17: | Line 22: | ||
filmCh <- xmlToDataFrame(xmlFilm, | filmCh <- xmlToDataFrame(xmlFilm, | ||
filmCh | filmCh | ||
+ | View(filmCh) | ||
+ | |||
- | library(tidyr) | ||
films <- filmCh | films <- filmCh | ||
films %> | films %> | ||
mutate(text = strsplit(as.character(text), | mutate(text = strsplit(as.character(text), | ||
unnest(text) | unnest(text) | ||
+ | |||
fs <- mutate(films, | fs <- mutate(films, | ||
fs <- unnest(fs, text) | fs <- unnest(fs, text) | ||
Line 30: | Line 37: | ||
fs[1,] | fs[1,] | ||
+ | ################################## | ||
sFilms <- " | sFilms <- " | ||
Line 47: | Line 55: | ||
xmlF | xmlF | ||
filmId <- xmlToDataFrame(xmlId, | filmId <- xmlToDataFrame(xmlId, | ||
+ | filmId | ||
+ | colnames(filmId) <- " | ||
filmId | filmId | ||
filmN <- xmlToDataFrame(xmlF, | filmN <- xmlToDataFrame(xmlF, | ||
+ | colnames(filmN) <- " | ||
filmN | filmN | ||
str(filmN) | str(filmN) | ||
- | fg <- data.frame(filmId, filmN) | + | fg <- cbind(filmId, filmN) |
fg | fg | ||
- | fg <- unnest(fg, | + | fg <- unnest(fg, |
fg | fg | ||
+ | |||
+ | ############### | ||
sFd <- " | sFd <- " | ||
KEY <- ' | KEY <- ' | ||
filmId[1,] | filmId[1,] | ||
- | name <- fg$text[1] | + | fg$filmId[1] |
- | name | + | fg$film[1] |
- | # name <- iconv(name, to=" | + | id <- fg$filmId[1] |
+ | id | ||
url < | url < | ||
url | url | ||
- | sfdr <- GET(url, query= list(" | + | sfdr <- GET(url, query= list(" |
sfdr | sfdr | ||
sfdrP <- xmlParse(sfdr) | sfdrP <- xmlParse(sfdr) | ||
Line 71: | Line 85: | ||
xmlA <- getNodeSet(sfdrP, | xmlA <- getNodeSet(sfdrP, | ||
xmlA | xmlA | ||
+ | |||
actor <- xmlToDataFrame(xmlA, | actor <- xmlToDataFrame(xmlA, | ||
actor | actor | ||
+ | colnames(actor) <- " | ||
+ | actor | ||
+ | |||
+ | |||
dir | dir | ||
- | mv <- fg$text.1[1] | + | mv <- fg$film[1] |
- | dtoa <- data.frame(dir, | + | mv |
+ | dtoa <- data.frame(dir, | ||
dtoa | dtoa | ||
write.csv(dtoa, | write.csv(dtoa, | ||
- | |||
</ | </ | ||
xml_parsing_via_api.1732757423.txt.gz · Last modified: 2024/11/28 10:30 by hkimscil