Problem
We want to create a population pyramid with ggplot2. Our example is the Spanish population by age and gender in 2020.
Example
# A tibble: 42 x 3
Age Gender Total
1 "0-4 " male 1018039
2 "0-4 " female 963653
3 "5-9" male 1196380
4 "5-9" female 1129063
5 "10-14" male 1297635
6 "10-14" female 1225863
7 "15-19 " male 1232566
8 "15-19 " female 1156455
9 "20-24 " male 1207902
10 "20-24 " female 1152765
# ... with 32 more rows
df <- structure(list(Age = c("0-4 ", "0-4 ", "5-9", "5-9", "10-14",
"10-14", "15-19 ", "15-19 ", "20-24 ", "20-24 ", "25-29 ", "25-29 ",
"30-34 ", "30-34 ", "35-39 ", "35-39 ", "40-44 ", "40-44 ", "45-49 ",
"45-49 ", "50-54 ", "50-54 ", "55-59 ", "55-59 ", "60-64 ", "60-64 ",
"65-69 ", "65-69 ", "70-74 ", "70-74 ", "75-79 ", "75-79 ", "80-84 ",
"80-84 ", "85-89 ", "85-89 ", "90-94 ", "90-94 ", "95-99 ", "95-99 ",
"100+", "100+"), Gender = c("male", "female", "male", "female",
"male", "female", "male", "female", "male", "female", "male",
"female", "male", "female", "male", "female", "male", "female",
"male", "female", "male", "female", "male", "female", "male",
"female", "male", "female", "male", "female", "male", "female",
"male", "female", "male", "female", "male", "female", "male",
"female", "male", "female"), Total = c(1018039L, 963653L, 1196380L,
1129063L, 1297635L, 1225863L, 1232566L, 1156455L, 1207902L, 1152765L,
1308197L, 1275776L, 1421558L, 1417845L, 1702135L, 1688865L, 2024303L,
1971909L, 1968659L, 1926866L, 1828015L, 1840434L, 1652558L, 1712299L,
1410111L, 1502563L, 1153768L, 1270544L, 1020478L, 1191698L, 773823L,
974046L, 513692L, 759379L, 361702L, 634714L, 133032L, 302885L,
27305L, 84007L, 3732L, 13576L)), class = "data.frame", row.names = c(NA,
-42L))
tibble(df)
Solution
First we need to create two columns. One to convert male population to negative so it is reversed in the plot. Another column with the population percentage by age and sex.
df <- df %>%
group_by(Gender) %>%
mutate(
Population = ifelse(Gender == "female", Total,-Total),
Percent = ifelse(Gender == "female", 100 * (Total / sum(Total)),-100 * (Total / sum(Total))))
df$Age <- factor(df$Age, levels=unique(df$Age)) # To keep the original order of the character vector
ggplot(df, aes(x = Age, Population, fill = Gender)) +
geom_bar(data = filter(df, Gender == "female"), stat = "identity") +
geom_bar(data = filter(df, Gender == "male"), stat = "identity") +
scale_y_continuous(breaks = seq(-2000000, 2000000, 500000), labels = comma(abs(seq(-2000000, 2000000, 500000))))+
coord_flip()
ggplot(df, aes(x = Age, Percent, fill = Gender)) +
geom_bar(data = filter(df, Gender == "female"), stat = "identity") +
geom_bar(data = filter(df, Gender == "male"), stat = "identity") +
scale_y_continuous(breaks = seq(-10, 10, 2), labels = comma(abs(seq(-10, 10, 2)))) +
coord_flip()
References
No hay comentarios:
Publicar un comentario