class: inverse, center, middle # 36-315: Statistical Graphics and Visualization ## Lecture 10 Meghan Hall <br> Department of Statistics & Data Science <br> Carnegie Mellon University <br> June 14, 2021 --- layout: true <div class="my-footer"><span>cmu-36315.netlify.app</span></div> --- # From last time <br> .large[More details] <br> .medium[colors, fonts, annotations] <br> .large[Putting it all together with themes] <br> .medium[built-in, external options, custom] --- # Schedule going forward .large[Lab 7 tomorrow] <br> .medium[HW 3 due too] <br> .large[Midterm review lecture on Wednesday] <br> .large[Midterm on Thursday] <br> .medium[no lab on Thursday *or* lecture on Friday] <br> .large[HW 4 due *next* Tuesday] --- # Tips on debugging code <br> .large[Always code iteratively] <br> .medium[build graphs piece by piece] -- <br> .large[Debug that way too] <br> .medium[get down to the minimum chunk that works] --- # Today <br> .large[Extensions to `ggplot2`] <br> .medium[in addition to the ones we've already covered] <br> .large[New plot types *and* new techniques for details] --- # Already covered <br> <br> .large[`ggridges`] <br> .medium[building ridgeline plots] <br> .medium[viewing/comparing distributions] <br> <br> -- <br> .large[`ggbeeswarm`] <br> .medium[beeswarm plots to visualize distributions] <br> .medium["smart" jittering] <br> <br> -- <br> .large[For details] <br> .medium[`ggtext`, `ggrepel`, `scico`] <br> .medium[`ggthemes`, `ggdark`, `hrbrthemes`] --- # `ggforce` <br> <br> .large["Accelerating `ggplot2`"] <br> .medium[[ggforce.data-imaginist.com](https://ggforce.data-imaginist.com/reference/index.html)] <br> <br> -- <br> .large[`geom_sina`] <br> .medium[keeps jittered points restricted within the violin] <br> <br> --- # `ggforce` <img src="figs/Lec10/force-1-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `ggforce`] .tiny[ ```r txhousing %>% filter(year >= 2010) %>% ggplot(aes(x = as.character(year), y = median)) + geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + # replace this # geom_jitter(alpha = .25, width = .3, size = 0.5, color = "#bb0000") + scale_y_continuous(labels = dollar, breaks = seq(100000, 300000, 50000)) + labs(title = "The distribution of median home prices by city in Texas") + # with this * geom_sina(alpha = 0.25, width = .3, size = 0.5, color = "#bb0000") + theme(axis.title = element_blank(), panel.background = element_blank(), panel.grid.major.y = element_line(color = "grey90", size = 0.2), panel.border = element_rect(color = "black", fill = NA, size = 0.5), axis.ticks = element_blank(), axis.text = element_text(size = 10, face = 2), plot.title.position = "plot") ``` ] --- # `ggforce` <img src="figs/Lec10/force-2-1.png" width="504" style="display: block; margin: auto;" /> --- # `ggforce` <br> <br> .large["Accelerating `ggplot2`"] <br> .medium[[ggforce.data-imaginist.com](https://ggforce.data-imaginist.com/reference/index.html)] <br> <br> <br> .large[`geom_sina`] <br> .medium[keeps jittered points restricted within the violin] <br> <br> <br> .large[`geom_mark_circle` (and related `geom`s)] <br> .medium[circle and label points within a group] <br> <br> --- # `ggforce` <img src="figs/Lec10/force-3-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `ggforce`] .tiny[ ```r penguins %>% filter(!is.na(body_mass_g) & !is.na(flipper_length_mm) & species != "Chinstrap") %>% ggplot(aes(x = body_mass_g, y = flipper_length_mm)) + * geom_mark_circle(aes(fill = species, label = species)) + geom_point() + scale_x_continuous(limits = c(2240, 6500)) + scale_y_continuous(limits = c(160, 250)) + labs(y = "Flipper Length (mm)", x = "Body Mass (g)", title = "Gentoo penguins tend to be heavier and have longer flippers", caption = "Data from the palmerpenguins package") + theme(legend.position = "none", panel.background = element_blank(), panel.grid.major = element_line(color = "grey90", size = 0.2), axis.ticks = element_line(color = "grey90", size = 0.2), legend.key = element_rect(fill = "transparent")) ``` ] --- # `ggforce` <img src="figs/Lec10/force-4-1.png" width="504" style="display: block; margin: auto;" /> --- # `ggwaffle` <br> <br> .large[For waffle charts] <br> .medium[[https://liamgilbey.github.io/ggwaffle/](https://liamgilbey.github.io/ggwaffle/)] <br> <br> <br> .large[Another option for showing parts of a whole] <br> .medium[when overall *n* and # of categories is small] <br> <br> <br> .large[`waffle_iron` to get data prepped] <br> <br> <br> .large[`geom_waffle` and `theme_waffle` to make the plot] --- # `ggwaffle` ```r penguins$species <- as.character(penguins$species) penguin_waffle <- waffle_iron(penguins, aes_d(group = species)) ``` <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> y </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> x </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> group </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> Adelie </td> </tr> <tr> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> Adelie </td> </tr> <tr> <td style="text-align:right;"> 3 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> Adelie </td> </tr> <tr> <td style="text-align:right;"> 4 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> Adelie </td> </tr> <tr> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> Adelie </td> </tr> </tbody> </table> --- .h1[# `ggwaffle`] .tiny[ ```r penguin_waffle %>% ggplot(aes(x, y, fill = group)) + geom_waffle() + coord_equal() + theme_waffle() + scale_fill_brewer(type = "qual") + theme(legend.position = "bottom", axis.title.x = element_blank(), axis.title.y = element_blank(), legend.title = element_blank()) ``` <img src="figs/Lec10/waffle-3-1.png" width="6in" style="display: block; margin: auto;" /> ] --- # `ggwaffle` <img src="figs/Lec10/waffle-3-1.png" width="504" height="50%" style="display: block; margin: auto;" /> --- # `ggExtra` <br> <br> .large[For adding marginal distributions] <br> .medium[[https://cran.r-project.org/web/packages/ggExtra/vignettes/ggExtra.html](https://cran.r-project.org/web/packages/ggExtra/vignettes/ggExtra.html)] <br> .medium[helpful to add context to a scatter plot, especially if *n* is high] <br> <br> <br> .large[`ggMarginal`] <br> .medium[defaults to a very plain density curve] <br> .medium[add `groupFill = TRUE` to color by group to match plot] <br> .medium[add `type = "histogram"` to show a density plot instead] <br> .medium[same alpha/size/color aesthetics apply] --- # `ggExtra` <img src="figs/Lec10/extra-1-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `ggExtra`] .tiny[ ```r *plot <- penguins %>% ggplot(aes(x = flipper_length_mm, y = body_mass_g, color = species)) + geom_point(alpha = 0.5, size = 3) + scale_color_brewer(palette = "Set2", name = NULL) + labs(x = "Flipper Length (mm)", y = "Body Mass (g)", caption = "Data from the palmerpenguins package") + theme(legend.position = c(0.2, 0.8), panel.background = element_blank(), panel.grid.major = element_line(color = "grey90", size = 0.2), axis.ticks = element_line(color = "grey90", size = 0.2), legend.key = element_rect(fill = "transparent"), legend.background = element_rect(fill = "transparent"), plot.caption.position = "plot", plot.caption = element_text(hjust = 0, face = "italic")) *ggMarginal(plot, groupFill = TRUE) ``` ] --- # `ggExtra` <img src="figs/Lec10/extra-2-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `ggExtra`] .tiny[ ```r *plot <- penguins %>% ggplot(aes(x = flipper_length_mm, y = body_mass_g, color = species)) + geom_point(alpha = 0.5, size = 3) + scale_color_brewer(palette = "Set2", name = NULL) + labs(x = "Flipper Length (mm)", y = "Body Mass (g)", caption = "Data from the palmerpenguins package") + theme(legend.position = c(0.2, 0.8), panel.background = element_blank(), panel.grid.major = element_line(color = "grey90", size = 0.2), axis.ticks = element_line(color = "grey90", size = 0.2), legend.key = element_rect(fill = "transparent"), legend.background = element_rect(fill = "transparent"), plot.caption.position = "plot", plot.caption = element_text(hjust = 0, face = "italic")) *ggMarginal(plot, type = "histogram", alpha = 0.5) ``` <img src="figs/Lec10/extra-3-1.png" width="6in" style="display: block; margin: auto;" /> ] --- # `ggExtra` <img src="figs/Lec10/extra-3-1.png" width="504" style="display: block; margin: auto;" /> --- # `ggtext` <br> <br> .large[For any kind of text manipulation] <br> .medium[[https://github.com/wilkelab/ggtext](https://github.com/wilkelab/ggtext)] <br> .medium[using simple HTML] <br> <br> <br> -- .large[Already saw an example to color the title to match groups] <br> .medium[another option to get rid of legends] <br> .medium[just add `element_markdown()` in the `theme` layer for that element] --- # `ggtext` <img src="figs/Lec10/text-1-1.png" width="504" style="display: block; margin: auto;" /> --- # `ggtext` <br> <br> .large[For any kind of text manipulation] <br> .medium[[https://github.com/wilkelab/ggtext](https://github.com/wilkelab/ggtext)] <br> .medium[using simple HTML] <br> <br> <br> .large[Already saw an example to color the title to match groups] <br> .medium[another option to get rid of legends] <br> .medium[just add `element_markdown()` in the `theme` layer for that element] <br> <br> <br> .large[Also useful for text boxes] --- .h1[# `ggtext`] .tiny[ ```r economics %>% ggplot(aes(x = date, y = unemploy, color = recession, group = 1)) + geom_line(size = 1) + scale_color_manual(values = c("dark grey","#bb0000")) + labs(y = "Unemployed (in thousands)", x = NULL, title = "The number of unemployed people tends to increase during a <span style = 'color:#bb0000;'>**recession**</span>") + theme(panel.background = element_blank(), panel.grid.major = element_line(color = "grey90", size = 0.3), panel.border = element_rect(color = "black", fill = NA, size = 0.5), axis.ticks = element_blank(), # plot.title = element_markdown(), #no longer need this line plot.title.position = "plot", legend.position = "none", * plot.title = element_textbox_simple(size = 13, lineheight = 1, * padding = margin(5.5, 5.5, 5.5, 5.5), * margin = margin(0, 0, 5.5, 0), * fill = "#d4d4d4", r = grid::unit(8, "pt"))) ``` ] --- # `ggtext` <img src="figs/Lec10/text-2-1.png" width="504" style="display: block; margin: auto;" /> --- # `ggtext` <img src="figs/Lec10/text-3-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `ggtext`] .tiny[ ```r penguins %>% ggplot(aes(x = body_mass_g, y = ..count..)) + geom_density_line(data = select(penguins, -species), aes(fill = "all penguins"), color = "transparent") + geom_density_line(aes(fill = "species"), color = "transparent") + facet_wrap(~species, nrow = 1) + scale_fill_manual(values = c("grey","#0C8346"), name = NULL, guide = guide_legend(direction = "horizontal")) + labs(x = "Body Mass (g)", title = "Comparing the distribution of body mass by penguin species", subtitle = "Gentoo penguins tend to be the heaviest") + theme(legend.position = "bottom", panel.background = element_blank(), panel.grid.major = element_line(color = "grey90", size = 0.3), strip.background = element_blank(), * strip.text = element_textbox( * size = 11, * color = "white", fill = "#0C8346", box.color = "#4A618C", * halign = 0.5, linetype = 1, r = unit(5, "pt"), width = unit(1, "npc"), * padding = margin(2, 0, 1, 0), margin = margin(3, 3, 3, 3)), panel.border = element_rect(color = "black", fill = NA, size = 0.5), axis.ticks = element_blank(), plot.title.position = "plot") ``` ] --- # `ggtext` <img src="figs/Lec10/text-4-1.png" width="504" style="display: block; margin: auto;" /> --- # `gghighlight` <br> <br> .large[Very useful for highlighting certain groups & applying aesthetics] <br> .medium[[https://cran.r-project.org/web/packages/gghighlight/vignettes/gghighlight.html](https://cran.r-project.org/web/packages/gghighlight/vignettes/gghighlight.html)] <br> .medium[to avoid spaghetti graphs] <br> <br> -- <br> .large[Easy to highlight/label based on specific values] <br> .medium[or a specific number (e.g., top 5)] <br> .medium[can control aesthetics of unhighlighted categories] <br> --- # `gghighlight` <img src="figs/Lec10/highlight-1-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `gghighlight`] .tiny[ ```r constructor_pts %>% filter(year == 2020) %>% ggplot(aes(x = round, y = points, color = name, group = name)) + geom_line() + * gghighlight(max(points) > 180 & max(points) < 250, * label_params = list(size = 4.5)) + scale_x_continuous(breaks = seq(1, 17, 1)) + scale_color_manual(values = c("#E0610E","#F596C8", "#FFF500","dark grey")) + labs(title = "The race for third place during the 2020 F1 season", subtitle = "While Mercedes and Red Bull ran off with the first two placings, three teams battled all year long for third place", x = "Race Round", y = "Accumulated Points") + theme(legend.position = "none", panel.background = element_blank(), panel.grid.major.y = element_line(color = "grey90", size = 0.2), axis.ticks = element_blank(), panel.border = element_rect(color = "black", fill = NA, size = 0.5)) ``` ] --- .h1[# `gghighlight`] .tiny[ ```r constructor_pts %>% filter(year == 2020) %>% ggplot(aes(x = round, y = points, color = name, group = name)) + geom_line() + * gghighlight(max(points), max_highlight = 5L) + scale_x_continuous(breaks = seq(1, 17, 1)) + labs(title = "The race for third place during the 2020 F1 season", subtitle = "While Mercedes and Red Bull ran off with the first two placings, three teams battled all year long for third place", x = "Race Round", y = "Accumulated Points") + theme(legend.position = "none", panel.background = element_blank(), panel.grid.major.y = element_line(color = "grey90", size = 0.2), axis.ticks = element_blank(), panel.border = element_rect(color = "black", fill = NA, size = 0.5)) ``` ] --- # `gghighlight` <img src="figs/Lec10/highlight-2-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `gghighlight`] .tiny[ ```r constructor_pts %>% filter(year == 2020) %>% ggplot(aes(x = round, y = points, color = name, group = name)) + geom_line() + * gghighlight(max(points) > 180 & max(points) < 250, * unhighlighted_params = list(linetype = "dotted", alpha = 0.8)) + scale_x_continuous(breaks = seq(1, 17, 1)) + scale_color_manual(values = c("#E0610E","#F596C8", "#FFF500","dark grey")) + labs(title = "The race for third place during the 2020 F1 season", subtitle = "While Mercedes and Red Bull ran off with the first two placings, three teams battled all year long for third place", x = "Race Round", y = "Accumulated Points") + theme(legend.position = "none", panel.background = element_blank(), panel.grid.major.y = element_line(color = "grey90", size = 0.2), axis.ticks = element_blank(), panel.border = element_rect(color = "black", fill = NA, size = 0.5)) ``` ] --- # `gghighlight` <img src="figs/Lec10/highlight-3-1.png" width="504" style="display: block; margin: auto;" /> --- # `cowplot` <br> <br> .large[Useful functions for manipulating graphs] <br> .medium[[https://wilkelab.org/cowplot/articles/introduction.html](https://wilkelab.org/cowplot/articles/introduction.html)] <br> .medium[adding images to plots, arranging plots] <br> .medium[adding joint titles and joint legends] <br> <br> <br> .large[Also includes themes] --- # `cowplot` <img src="figs/Lec10/cowplot-1-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `cowplot`] .medium[**adding an image to a plot**] .tiny[ ```r *p <- penguins %>% filter(species != "Gentoo") %>% mutate(label = case_when(flipper_length_mm == 192 & body_mass_g == 2700 ~ "Chinstrap", flipper_length_mm == 184 & body_mass_g == 4650 ~ "Adelie")) %>% ggplot(aes(x = flipper_length_mm, y = body_mass_g, size = bill_length_mm, color = species)) + ... *ggdraw() + * draw_plot(p) + * draw_image("https://allisonhorst.github.io/palmerpenguins/reference/figures/palmerpenguins.png", * x = 1, y = 0.35, hjust = 1, vjust = 1, halign = 1, valign = 1, width = 0.15) ``` ] --- # `cowplot` <img src="figs/Lec10/cowplot-full-1.png" width="504" style="display: block; margin: auto;" /> --- # `cowplot` `scatter` <img src="figs/Lec10/cowplot-3-1.png" width="504" style="display: block; margin: auto;" /> --- # `cowplot` `bar` <img src="figs/Lec10/cowplot-4-1.png" width="504" style="display: block; margin: auto;" /> --- # `cowplot` **creating an inset plot** ```r ggdraw(scatter) + draw_plot(bar, x = .55, y = .55, width = .4, height = .4) + draw_plot_label(c("A.", "B."), x = c(0, 0.45), y = c(0.95, 0.95), size = 12) ``` --- # `cowplot` <img src="figs/Lec10/cowplot-5-1.png" width="504" style="display: block; margin: auto;" /> --- # `cowplot` ```r plot_grid(scatter, bar, labels = c("A.", "B."), label_size = 12) ``` <img src="figs/Lec10/cowplot-6-1.png" width="720" style="display: block; margin: auto;" /> --- # `cowplot` ```r plot_grid(scatter, bar, labels = c("A.", "B."), label_size = 12, align = "h") ``` <img src="figs/Lec10/cowplot-7-1.png" width="720" style="display: block; margin: auto;" /> --- # `cowplot` ```r plot_grid(scatter, bar, labels = c("A.", "B."), label_size = 12, align = "h", rel_widths = c(2, 1)) ``` <img src="figs/Lec10/cowplot-8-1.png" width="720" style="display: block; margin: auto;" /> --- # `cowplot` **sharing titles among plots** ```r plots <- plot_grid(scatter, bar, labels = c("A.", "B."), label_size = 12, align = "h") title <- ggdraw() + draw_label("Gentoo penguins tend to be heavier, with longer and shallower bills", fontface = 'bold', x = 0, hjust = 0) + theme(plot.margin = margin(0, 0, 0, 5)) plot_grid(title, plots, ncol = 1, # necessary so that the title is small rel_heights = c(0.1, 1)) ``` --- # `cowplot` <img src="figs/Lec10/cowplot-9-1.png" width="864" style="display: block; margin: auto;" /> --- # `cowplot` **sharing legends among plots** ```r plots_shared_legend <- plot_grid(scatter + theme(legend.position="none"), bar, labels = c("A.", "B."), label_size = 12, align = "h") legend <- get_legend(scatter + theme(legend.position = "top")) plot_grid(title, legend, plots_shared_legend, ncol = 1, rel_heights = c(.1, .05, 1)) ``` --- # `cowplot` <img src="figs/Lec10/cowplot-10-1.png" width="864" style="display: block; margin: auto;" /> --- # `cowplot` <img src="figs/Lec10/cowplot-11-1.png" width="504" style="display: block; margin: auto;" /> --- # `cowplot` <img src="figs/Lec10/cowplot-12-1.png" width="504" style="display: block; margin: auto;" /> --- # `corrplot` <br> <br> .large[Easy way to create a correlation matrix] <br> .medium[[https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html](https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html)] <br> .medium[aka a correlogram] <br> <br> <br> .large[Most useful when you represent correlations as color as well as size] <br> .medium[but always examine the raw data as well] --- # `corrplot` ```r corr <- cor(penguins %>% select(`Bill Length` = bill_length_mm, `Bill Depth` = bill_depth_mm, `Flipper Length` = flipper_length_mm, `Body Mass` = body_mass_g) %>% drop_na()) corrplot(corr, method = "circle") ``` --- # `corrplot` <img src="figs/Lec10/corr-1-1.png" width="504" style="display: block; margin: auto;" /> --- # `corrplot` ```r corrplot(corr, method = "number") ``` --- # `corrplot` <img src="figs/Lec10/corr-2-1.png" width="504" style="display: block; margin: auto;" /> --- # `corrplot` ```r corrplot(corr, method = "color") ``` --- # `corrplot` <img src="figs/Lec10/corr-3-1.png" width="504" style="display: block; margin: auto;" /> --- # `corrplot` ```r diag(corr) = NA # changes diagonal corrleations to NA corrplot(corr, type = "lower", tl.srt = 45, # controls angle of text # controls color palette col = brewer.pal(n = 8, name = "RdGy"), # gets rid of background grid addgrid.col = NA, # controls text color tl.col = "black", # controls label of NA values na.label = "x") ``` --- # `corrplot` <img src="figs/Lec10/corr-4-1.png" width="504" style="display: block; margin: auto;" /> --- # `ggalt` <br> <br> .large[Very easy to make a dumbbell plot] <br> .medium[[https://github.com/hrbrmstr/ggalt](https://github.com/hrbrmstr/ggalt)] <br> .medium[various miscellaneous functions] <br> <br> -- <br> .large[Dumbbell plots] <br> .medium[great way to show specific change over two points in time] <br> .medium[encoded by position, so axis doesn't have to start at zero] <br> .medium[can handle many categories as bars are narrow (similar to lollipop)] <br> --- # `ggalt` **What's the increase in median listing price, by city, from 2006 to 2015?** <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> month </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> sales </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> volume </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> median </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> listings </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> inventory </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 72 </td> <td style="text-align:right;"> 5380000 </td> <td style="text-align:right;"> 71400 </td> <td style="text-align:right;"> 701 </td> <td style="text-align:right;"> 6.3 </td> <td style="text-align:right;"> 2000.000 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 6505000 </td> <td style="text-align:right;"> 58700 </td> <td style="text-align:right;"> 746 </td> <td style="text-align:right;"> 6.6 </td> <td style="text-align:right;"> 2000.083 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:right;"> 130 </td> <td style="text-align:right;"> 9285000 </td> <td style="text-align:right;"> 58100 </td> <td style="text-align:right;"> 784 </td> <td style="text-align:right;"> 6.8 </td> <td style="text-align:right;"> 2000.167 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 4 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 9730000 </td> <td style="text-align:right;"> 68600 </td> <td style="text-align:right;"> 785 </td> <td style="text-align:right;"> 6.9 </td> <td style="text-align:right;"> 2000.250 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 141 </td> <td style="text-align:right;"> 10590000 </td> <td style="text-align:right;"> 67300 </td> <td style="text-align:right;"> 794 </td> <td style="text-align:right;"> 6.8 </td> <td style="text-align:right;"> 2000.333 </td> </tr> </tbody> </table> --- # `ggalt` **What's the increase in median listing price, by city, from 2006 to 2015?** ```r txhousing %>% filter((year == 2006 | year == 2015) & !(city %in% c("South Padre Island", "Kerrville"))) %>% group_by(city, year) %>% summarize(median = mean(median, na.rm = TRUE)) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> median </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2006 </td> <td style="text-align:right;"> 100291.7 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2015 </td> <td style="text-align:right;"> 137414.3 </td> </tr> <tr> <td style="text-align:left;"> Amarillo </td> <td style="text-align:right;"> 2006 </td> <td style="text-align:right;"> 114216.7 </td> </tr> <tr> <td style="text-align:left;"> Amarillo </td> <td style="text-align:right;"> 2015 </td> <td style="text-align:right;"> 147957.1 </td> </tr> <tr> <td style="text-align:left;"> Arlington </td> <td style="text-align:right;"> 2006 </td> <td style="text-align:right;"> 130308.3 </td> </tr> </tbody> </table> --- # `ggalt` **What's the increase in median listing price, by city, from 2006 to 2015?** ```r txhousing %>% filter((year == 2006 | year == 2015) & !(city %in% c("South Padre Island", "Kerrville"))) %>% group_by(city, year) %>% summarize(median = mean(median, na.rm = TRUE)) %>% pivot_wider(names_from = year, values_from = median) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> 2006 </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> 2015 </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 100291.7 </td> <td style="text-align:right;"> 137414.3 </td> </tr> <tr> <td style="text-align:left;"> Amarillo </td> <td style="text-align:right;"> 114216.7 </td> <td style="text-align:right;"> 147957.1 </td> </tr> <tr> <td style="text-align:left;"> Arlington </td> <td style="text-align:right;"> 130308.3 </td> <td style="text-align:right;"> 170142.9 </td> </tr> <tr> <td style="text-align:left;"> Austin </td> <td style="text-align:right;"> 171766.7 </td> <td style="text-align:right;"> 259000.0 </td> </tr> <tr> <td style="text-align:left;"> Bay Area </td> <td style="text-align:right;"> 144766.7 </td> <td style="text-align:right;"> 190300.0 </td> </tr> </tbody> </table> --- # `ggalt` <img src="figs/Lec10/dumbbell-1-1.png" width="504" style="display: block; margin: auto;" /> --- .h1[# `ggalt`] .tiny[ ```r txhousing %>% filter((year == 2006 | year == 2015) & !(city %in% c("South Padre Island", "Kerrville"))) %>% group_by(city, year) %>% summarize(median = mean(median, na.rm = TRUE)) %>% pivot_wider(names_from = year, values_from = median) %>% * ggplot(aes(x = `2006`, xend = `2015`, y = reorder(city, `2015`), group = city)) + * geom_dumbbell(size = 1.5, color = "#d4d4d4", * colour_x = "#BA95DB", colour_xend = "#693498", * dot_guide = TRUE, dot_guide_size = 0.25) + scale_x_continuous(labels = dollar) + labs(title = "Median listing price increases from 2006 to 2015") + theme_linedraw() + theme(axis.ticks = element_blank(), panel.grid.minor = element_blank(), panel.grid.major.y = element_blank(), axis.title = element_blank()) ``` ] --- # `ggalt` **which cities saw the greatest increase in this time?** ```r txhousing %>% filter((year == 2006 | year == 2015) & !(city %in% c("South Padre Island", "Kerrville"))) %>% group_by(city, year) %>% summarize(median = mean(median, na.rm = TRUE)) %>% pivot_wider(names_from = year, values_from = median) ``` <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> 2006 </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> 2015 </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 100291.7 </td> <td style="text-align:right;"> 137414.3 </td> </tr> <tr> <td style="text-align:left;"> Amarillo </td> <td style="text-align:right;"> 114216.7 </td> <td style="text-align:right;"> 147957.1 </td> </tr> <tr> <td style="text-align:left;"> Arlington </td> <td style="text-align:right;"> 130308.3 </td> <td style="text-align:right;"> 170142.9 </td> </tr> <tr> <td style="text-align:left;"> Austin </td> <td style="text-align:right;"> 171766.7 </td> <td style="text-align:right;"> 259000.0 </td> </tr> <tr> <td style="text-align:left;"> Bay Area </td> <td style="text-align:right;"> 144766.7 </td> <td style="text-align:right;"> 190300.0 </td> </tr> </tbody> </table> --- # `ggalt` **which cities saw the greatest increase in this time?** ```r txhousing %>% filter((year == 2006 | year == 2015) & !(city %in% c("South Padre Island", "Kerrville"))) %>% group_by(city, year) %>% summarize(median = mean(median, na.rm = TRUE)) %>% pivot_wider(names_from = year, values_from = median) %>% * mutate(change = (`2015` - `2006`) / `2006`) %>% * arrange(desc(change)) ``` <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> 2006 </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> 2015 </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> change </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Odessa </td> <td style="text-align:right;"> 87750.0 </td> <td style="text-align:right;"> 176114.3 </td> <td style="text-align:right;"> 1.0070004 </td> </tr> <tr> <td style="text-align:left;"> Midland </td> <td style="text-align:right;"> 130790.0 </td> <td style="text-align:right;"> 239950.0 </td> <td style="text-align:right;"> 0.8346204 </td> </tr> <tr> <td style="text-align:left;"> Irving </td> <td style="text-align:right;"> 125741.7 </td> <td style="text-align:right;"> 202900.0 </td> <td style="text-align:right;"> 0.6136258 </td> </tr> <tr> <td style="text-align:left;"> Fort Bend </td> <td style="text-align:right;"> 170016.7 </td> <td style="text-align:right;"> 272400.0 </td> <td style="text-align:right;"> 0.6021959 </td> </tr> <tr> <td style="text-align:left;"> San Angelo </td> <td style="text-align:right;"> 98775.0 </td> <td style="text-align:right;"> 156728.6 </td> <td style="text-align:right;"> 0.5867231 </td> </tr> </tbody> </table> --- .h1[# `ggalt` + `ggtext`] .tiny[ ```r txhousing %>% filter((year == 2006 | year == 2015) & !(city %in% c("South Padre Island", "Kerrville"))) %>% group_by(city, year) %>% summarize(median = mean(median, na.rm = TRUE)) %>% pivot_wider(names_from = year, values_from = median) %>% * mutate(color = ifelse(city %in% c("Midland","Odessa"), "#bb0000", "black"), * name = ifelse(city %in% c("Midland","Odessa"), * glue("<b style='color:{color}'>{city}</b>"), city)) %>% ggplot(aes(x = `2006`, xend = `2015`, y = reorder(name, `2015`), group = city)) + geom_dumbbell(size = 1.5, color = "#d4d4d4", colour_x = "#BA95DB", colour_xend = "#693498", dot_guide = TRUE, dot_guide_size = 0.25) + scale_x_continuous(labels = dollar) + labs(title = "Median listing price increases from 2006 to 2015", * subtitle = "Midland & Odessa saw the largest increase over this time") + theme_linedraw() + theme(axis.ticks = element_blank(), panel.grid.minor = element_blank(), panel.grid.major.y = element_blank(), axis.title = element_blank(), * axis.text.y = element_markdown(), plot.subtitle = element_text(margin = margin(-5, 0, 5, 0))) ``` ] --- # `ggalt` + `ggtext` <img src="figs/Lec10/dumbbell-2-1.png" width="504" style="display: block; margin: auto;" /> ] --- # Bullet graph <br> <br> .large[Useful for showing progress toward a goal] <br> .medium[using bars of varying widths] <br> .medium[can also show performance compared to a baseline] <br> .medium[or even a change over time, as appropriate] <br> <br> -- <br> .large[No easy package solution, so time for DIY] <br> .medium[using `geom_col`] <br> --- # Bullet graph **did SUV manufacturers improve on their highway mpg from 1999 to 2008?** ```r mpg %>% filter(class == "suv") %>% group_by(manufacturer, year) %>% summarize(mean = mean(hwy)) ``` <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> manufacturer </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> mean </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> chevrolet </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:right;"> 16.33333 </td> </tr> <tr> <td style="text-align:left;"> chevrolet </td> <td style="text-align:right;"> 2008 </td> <td style="text-align:right;"> 17.50000 </td> </tr> <tr> <td style="text-align:left;"> dodge </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:right;"> 16.00000 </td> </tr> <tr> <td style="text-align:left;"> dodge </td> <td style="text-align:right;"> 2008 </td> <td style="text-align:right;"> 16.00000 </td> </tr> <tr> <td style="text-align:left;"> ford </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:right;"> 17.33333 </td> </tr> </tbody> </table> --- # Bullet graph **did SUV manufacturers improve on their highway mpg from 1999 to 2008?** ```r *mpg_bullet <- mpg %>% filter(class == "suv") %>% group_by(manufacturer, year) %>% summarize(mean = mean(hwy)) %>% * mutate(year = as.character(year), * width = ifelse(year == "1999", 0.9, 0.5)) ``` <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> manufacturer </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> mean </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> width </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> chevrolet </td> <td style="text-align:left;"> 1999 </td> <td style="text-align:right;"> 16.33333 </td> <td style="text-align:right;"> 0.9 </td> </tr> <tr> <td style="text-align:left;"> chevrolet </td> <td style="text-align:left;"> 2008 </td> <td style="text-align:right;"> 17.50000 </td> <td style="text-align:right;"> 0.5 </td> </tr> <tr> <td style="text-align:left;"> dodge </td> <td style="text-align:left;"> 1999 </td> <td style="text-align:right;"> 16.00000 </td> <td style="text-align:right;"> 0.9 </td> </tr> <tr> <td style="text-align:left;"> dodge </td> <td style="text-align:left;"> 2008 </td> <td style="text-align:right;"> 16.00000 </td> <td style="text-align:right;"> 0.5 </td> </tr> <tr> <td style="text-align:left;"> ford </td> <td style="text-align:left;"> 1999 </td> <td style="text-align:right;"> 17.33333 </td> <td style="text-align:right;"> 0.9 </td> </tr> </tbody> </table> --- # Bullet graph ```r mpg_bullet %>% * mutate(manufacturer = str_to_title(manufacturer)) %>% * ggplot(aes(x = mean, y = manufacturer, fill = year)) + * geom_col(width = mpg_bullet$width) + scale_fill_manual(values = c("grey", "#098641"), name = NULL) + scale_alpha_manual(values = c(0.3, 1), guide = FALSE) ``` --- # Bullet graph <img src="figs/Lec10/bullet-5-1.png" width="504" style="display: block; margin: auto;" /> --- # Bullet graph **changes to make?** - remove x-axis title and adjust y-axis title - clean up background, lines, etc. - add a good title - find a way to take care of the legend - take the bars all the way to the axis - reorder bars in a meaningful way --- .h1[# Bullet graph] .tiny[ ```r mpg_bullet %>% mutate(manufacturer = str_to_title(manufacturer)) %>% ggplot(aes(x = mean, y = manufacturer, fill = year)) + geom_col(width = mpg_bullet$width) + scale_fill_manual(values = c("grey", "#098641"), name = NULL) + scale_alpha_manual(values = c(0.3, 1), guide = FALSE) + * scale_x_continuous(expand = expansion(mult = c(0, 0.1))) + labs(subtitle = "Jeep only manufacturer to show negative change", title = "The highway mpg among SUVs, by manufacturer, in <span style = 'color:#7e7e7e;'>**1999**</span> and <span style = 'color:#098641;'>**2008**</span>", x = "Highway miles per gallon") + theme_linedraw() + theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank(), * plot.title = element_markdown(), axis.ticks = element_blank(), axis.title.y = element_blank(), legend.position = "none") ``` ] --- # Bullet graph <img src="figs/Lec10/bullet-6-1.png" width="504" style="display: block; margin: auto;" /> --- # Bullet graph **changes to make?** - remove x-axis title and adjust y-axis title - clean up background, lines, etc. - add a good title - find a way to take care of the legend - take the bars all the way to the axis - **reorder bars in a meaningful way** --- # Bullet graph **to reorder the bars** ```r mpg_bullet %>% group_by(manufacturer) %>% mutate(max = max(mean)) ``` <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> manufacturer </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> mean </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> width </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> max </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> chevrolet </td> <td style="text-align:left;"> 1999 </td> <td style="text-align:right;"> 16.33333 </td> <td style="text-align:right;"> 0.9 </td> <td style="text-align:right;"> 17.50000 </td> </tr> <tr> <td style="text-align:left;"> chevrolet </td> <td style="text-align:left;"> 2008 </td> <td style="text-align:right;"> 17.50000 </td> <td style="text-align:right;"> 0.5 </td> <td style="text-align:right;"> 17.50000 </td> </tr> <tr> <td style="text-align:left;"> dodge </td> <td style="text-align:left;"> 1999 </td> <td style="text-align:right;"> 16.00000 </td> <td style="text-align:right;"> 0.9 </td> <td style="text-align:right;"> 16.00000 </td> </tr> <tr> <td style="text-align:left;"> dodge </td> <td style="text-align:left;"> 2008 </td> <td style="text-align:right;"> 16.00000 </td> <td style="text-align:right;"> 0.5 </td> <td style="text-align:right;"> 16.00000 </td> </tr> <tr> <td style="text-align:left;"> ford </td> <td style="text-align:left;"> 1999 </td> <td style="text-align:right;"> 17.33333 </td> <td style="text-align:right;"> 0.9 </td> <td style="text-align:right;"> 18.66667 </td> </tr> </tbody> </table> --- .h1[# Bullet graph] .tiny[ ```r mpg_bullet %>% mutate(manufacturer = str_to_title(manufacturer)) %>% group_by(manufacturer) %>% mutate(max = max(mean)) %>% * ggplot(aes(x = mean, y = reorder(manufacturer, max), fill = year)) + geom_col(width = mpg_bullet$width) + scale_fill_manual(values = c("grey", "#098641"), name = NULL) + scale_alpha_manual(values = c(0.3, 1), guide = FALSE) + * scale_x_continuous(expand = expansion(mult = c(0, 0.1))) + labs(subtitle = "Jeep only manufacturer to show negative change", title = "The highway mpg among SUVs, by manufacturer, in <span style = 'color:#7e7e7e;'>**1999**</span> and <span style = 'color:#098641;'>**2008**</span>", x = "Highway miles per gallon") + theme_linedraw() + theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank(), * plot.title = element_markdown(), axis.ticks = element_blank(), axis.title.y = element_blank(), legend.position = "none") ``` ] --- # Bullet graph <img src="figs/Lec10/bullet-9-1.png" width="504" style="display: block; margin: auto;" /> --- # Upcoming <br> .large[Lecture 11 on Wednesday June 16 **midterm review**] <br> .large[Homework 3 due Tuesday June 15] <br> .large[Lab 7 on Tuesday June 15]