Importación
library(XML) # Función readHTMLTable
library(reshape2) # Función colsplit
url <- "http://www.imdb.com/chart"
peliculas <- readHTMLTable(url, which = 1, stringsAsFactors = FALSE)
Manipulación
peliculas <- transform(peliculas, peliculas = colsplit(peliculas[[2]], pattern = "\n", names = c("rank", "title", "year")))
peliculas[, c(1, 2, 6, 7)] <- list(NULL)
colnames(peliculas) <- c("weekend", "gross", "weeks", "title", "year")
peliculas$title <- gsub("^ +|+ $", "", peliculas$title)
peliculas$year <- gsub("[[:punct:]]|[[:space:]]", "", peliculas$year)
peliculas$gross <- as.numeric(gsub("\\$|M", "", peliculas$gross))
peliculas$weekend <- as.numeric(gsub("\\$|M", "", peliculas$weekend))
peliculas$weeks <- as.integer(peliculas$weeks)
peliculas$year <- as.integer(peliculas$year)
peliculas <- peliculas[c(4, 5, 1, 2, 3)]
peliculas
# 2014/09/22
title year weekend gross weeks
1 The Maze Runner 2014 32.5 32.5 1
2 A Walk Among the Tombstones 2014 13.1 13.1 1
3 This Is Where I Leave You 2014 11.9 11.9 1
4 No Good Deed 2014 10.2 40.1 2
5 Dolphin Tale 2 2014 9.0 27.1 2
6 Guardians of the Galaxy 2014 5.2 313.7 8
7 Let's Be Cops 2014 2.7 77.2 6
8 Teenage Mutant Ninja Turtles 2014 2.6 185.0 7
9 The Drop 2014 2.0 7.7 2
10 If I Stay 2014 1.8 47.7 5
Recaudación del fin de semana
# Especificamos los márgenes del área del gráfico
par(mar = c(5, 15, 4, 2))
# Creamos el gráfico de barras horizontal
bp <- barplot(rev(peliculas$weekend),
names.arg = rev(peliculas$title),
horiz = TRUE,
col = "lightskyblue", border= "white",
xlim = c(0, 35), xlab = "USD $ Millions",
las = 1)
# Añadimos etiquetas a las barras
text(y = bp, x = 0, round(rev(peliculas$weekend), 1), pos = 4)
Recaudación acumulada
# Ordenamos descendentemente por recaudación acumulada (gross)
peliculas <- peliculas[order(-peliculas[,4]), ]
# 2014/09/22
title year weekend gross weeks
6 Guardians of the Galaxy 2014 5.2 313.7 8
8 Teenage Mutant Ninja Turtles 2014 2.6 185.0 7
7 Let's Be Cops 2014 2.7 77.2 6
10 If I Stay 2014 1.8 47.7 5
4 No Good Deed 2014 10.2 40.1 2
1 The Maze Runner 2014 32.5 32.5 1
5 Dolphin Tale 2 2014 9.0 27.1 2
2 A Walk Among the Tombstones 2014 13.1 13.1 1
3 This Is Where I Leave You 2014 11.9 11.9 1
9 The Drop 2014 2.0 7.7 2
# Márgenes del área del gráfico
par(mar = c(5, 15, 4, 2))
Gráfico de barras
bp <- barplot(rev(gross),
names.arg = rev(title),
horiz = TRUE,
col = "lightsalmon", border= "white",
xlim = c(0, 350), xlab = "USD $ Millions",
las = 1)
# Etiquetas de las barras
text(y = bp, x = 0, round(rev(gross), 1), pos = 4)
Entradas relacionadas
No hay comentarios:
Publicar un comentario