class: inverse, center, middle # 36-315: Statistical Graphics and Visualization ## Lecture 7 Meghan Hall <br> Department of Statistics & Data Science <br> Carnegie Mellon University <br> June 7, 2021 --- layout: true <div class="my-footer"><span>cmu-36315.netlify.app</span></div> --- # From last time <br> .large[Scatter plots] <br> .medium[Considerations, overplotting, line of best fit] <br> .large[Relational data] <br> .medium[Practicing joins with `dplyr`] --- # Updates <br> .large[Homework] <br> .medium[due tomorrow!] <br> <br> <br> .large[Lab 5 tomorrow] <br> <br> <br> .large[Schedule going forward] --- # Today <br> .large[Grab bag!] <br> .medium[maps, pie charts, heat maps] <br> .large[Data manipulation] <br> .medium[aggregating, joining, pivoting] --- class: left # Today's agenda <br> .large[ 1. heat maps 2. dot plots 3. pie charts & parts of a whole 4. maps ] --- # Today's data <br> <br> <br> .center[`tx_housing`]<br> .center[`lincoln_weather`]<br> .center[`storms`]<br> .center[`penguins`]<br> .center[`tx_data`]<br> .center[`us_rent_income`]<br> --- class: left # Today's agenda <br> .large[ 1. **heat maps** 2. dot plots 3. pie charts & parts of a whole 4. maps ] --- # Heat maps <br> <br> .large[Mapping data values onto color] <br> <br> -- <br> .large[Can be difficult to determine exact values] <br> .medium[but very useful for identifying broader trends] <br> <br> -- <br> .large[Order data in a way that's useful for your data/question] <br> .medium[if ordering is appropriate] <br> <br> -- <br> .large[Can also be used for chronological/time series data] --- # Heat maps <img src="figs/Lec7/laptop-1.png" width="504" style="display: block; margin: auto;" /> --- # Today's data <br> .center[`txhousing`]<br> <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> month </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> sales </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> volume </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> median </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> listings </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> inventory </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 72 </td> <td style="text-align:right;"> 5380000 </td> <td style="text-align:right;"> 71400 </td> <td style="text-align:right;"> 701 </td> <td style="text-align:right;"> 6.3 </td> <td style="text-align:right;"> 2000.000 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 6505000 </td> <td style="text-align:right;"> 58700 </td> <td style="text-align:right;"> 746 </td> <td style="text-align:right;"> 6.6 </td> <td style="text-align:right;"> 2000.083 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:right;"> 130 </td> <td style="text-align:right;"> 9285000 </td> <td style="text-align:right;"> 58100 </td> <td style="text-align:right;"> 784 </td> <td style="text-align:right;"> 6.8 </td> <td style="text-align:right;"> 2000.167 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 4 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 9730000 </td> <td style="text-align:right;"> 68600 </td> <td style="text-align:right;"> 785 </td> <td style="text-align:right;"> 6.9 </td> <td style="text-align:right;"> 2000.250 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 141 </td> <td style="text-align:right;"> 10590000 </td> <td style="text-align:right;"> 67300 </td> <td style="text-align:right;"> 794 </td> <td style="text-align:right;"> 6.8 </td> <td style="text-align:right;"> 2000.333 </td> </tr> </tbody> </table> -- <br> <br> .center[**how has the median listing price changed over time in Texas cities?**] --- # Aggregating data **two methods, depending on needs** ```r txhousing %>% group_by(city, year) %>% summarize(mean_price = mean(median, na.rm = TRUE)) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> mean_price </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 66600.00 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2001 </td> <td style="text-align:right;"> 70975.00 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2002 </td> <td style="text-align:right;"> 68600.00 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2003 </td> <td style="text-align:right;"> 71933.33 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2004 </td> <td style="text-align:right;"> 74866.67 </td> </tr> </tbody> </table> --- # Aggregating data **two methods, depending on needs** ```r txhousing %>% group_by(city, year) %>% mutate(mean_price = mean(median, na.rm = TRUE)) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> city </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> month </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> sales </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> volume </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> median </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> listings </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> inventory </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> date </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> mean_price </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 72 </td> <td style="text-align:right;"> 5380000 </td> <td style="text-align:right;"> 71400 </td> <td style="text-align:right;"> 701 </td> <td style="text-align:right;"> 6.3 </td> <td style="text-align:right;"> 2000.000 </td> <td style="text-align:right;"> 66600 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 6505000 </td> <td style="text-align:right;"> 58700 </td> <td style="text-align:right;"> 746 </td> <td style="text-align:right;"> 6.6 </td> <td style="text-align:right;"> 2000.083 </td> <td style="text-align:right;"> 66600 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:right;"> 130 </td> <td style="text-align:right;"> 9285000 </td> <td style="text-align:right;"> 58100 </td> <td style="text-align:right;"> 784 </td> <td style="text-align:right;"> 6.8 </td> <td style="text-align:right;"> 2000.167 </td> <td style="text-align:right;"> 66600 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 4 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 9730000 </td> <td style="text-align:right;"> 68600 </td> <td style="text-align:right;"> 785 </td> <td style="text-align:right;"> 6.9 </td> <td style="text-align:right;"> 2000.250 </td> <td style="text-align:right;"> 66600 </td> </tr> <tr> <td style="text-align:left;"> Abilene </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 141 </td> <td style="text-align:right;"> 10590000 </td> <td style="text-align:right;"> 67300 </td> <td style="text-align:right;"> 794 </td> <td style="text-align:right;"> 6.8 </td> <td style="text-align:right;"> 2000.333 </td> <td style="text-align:right;"> 66600 </td> </tr> </tbody> </table> --- # Heat maps ```r txhousing_plot %>% ggplot(aes(x = year, y = city, fill = mean_price)) + geom_tile() ``` -- <img src="figs/Lec7/heat-1-1.png" width="504" style="display: block; margin: auto;" /> --- # Heat maps ```r txhousing_plot %>% * ggplot(aes(x = year, y = reorder(city, rank_2015), fill = mean_price)) + geom_tile() + * scale_fill_gradient(low = "#fafafa", high = "#191970", * labels = dollar, name = "") ``` -- <img src="figs/Lec7/heat-2-1.png" width="504" style="display: block; margin: auto;" /> --- # Heat maps ```r txhousing_plot %>% ggplot(aes(x = year, y = reorder(city, rank_2015), fill = mean_price)) + geom_tile() + scale_fill_gradient(low = "#fafafa", high = "#191970", labels = dollar, name = "") + * coord_cartesian(expand = FALSE) + labs(title = "Listings Price in Texas Cities") + theme(axis.ticks = element_blank(), axis.title = element_blank()) ``` --- # Heat maps <img src="figs/Lec7/heat-3-1.png" width="504" style="display: block; margin: auto;" /> --- # Today's data <br> .center[`lincoln_weather`]<br> <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> CST </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> Max Temperature [F] </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> 2016-1-1 </td> <td style="text-align:right;"> 37 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-2 </td> <td style="text-align:right;"> 41 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-3 </td> <td style="text-align:right;"> 37 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-4 </td> <td style="text-align:right;"> 30 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-5 </td> <td style="text-align:right;"> 38 </td> </tr> </tbody> </table> -- <br> <br> .center[**how did the max temperature change by day and month?**] --- # Practice with `lubridate` ```r lincoln_weather %>% select(CST, temp = `Max Temperature [F]`) %>% mutate(date = ymd(CST), month = month(date), day = day(date)) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> CST </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> temp </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> date </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> month </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> day </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> 2016-1-1 </td> <td style="text-align:right;"> 37 </td> <td style="text-align:left;"> 2016-01-01 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 1 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-2 </td> <td style="text-align:right;"> 41 </td> <td style="text-align:left;"> 2016-01-02 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 2 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-3 </td> <td style="text-align:right;"> 37 </td> <td style="text-align:left;"> 2016-01-03 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 3 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-4 </td> <td style="text-align:right;"> 30 </td> <td style="text-align:left;"> 2016-01-04 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 4 </td> </tr> <tr> <td style="text-align:left;"> 2016-1-5 </td> <td style="text-align:right;"> 38 </td> <td style="text-align:left;"> 2016-01-05 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 5 </td> </tr> </tbody> </table> --- # Heat maps ```r lincoln_weather %>% select(CST, temp = `Max Temperature [F]`) %>% mutate(date = ymd(CST), month = month(date), day = day(date)) %>% ggplot(aes(x = month, y = day, fill = temp)) + * geom_tile(color = "white") ``` --- # Heat maps <img src="figs/Lec7/heat-4-1.png" width="504" style="display: block; margin: auto;" /> --- # Heat maps ```r lincoln_weather %>% select(CST, temp = `Max Temperature [F]`) %>% mutate(date = ymd(CST), month = month(date), day = day(date)) %>% ggplot(aes(x = month, y = day, fill = temp)) + geom_tile(color = "white") + * scale_y_continuous(trans = "reverse", * breaks = seq(1, 31, 5)) + * scale_x_continuous(breaks = seq(1, 12, 1)) ``` --- # Heat maps <img src="figs/Lec7/heat-5-1.png" width="504" style="display: block; margin: auto;" /> --- # Heat maps ```r lincoln_weather %>% select(CST, temp = `Max Temperature [F]`) %>% mutate(date = ymd(CST), month = month(date), day = day(date)) %>% ggplot(aes(x = month, y = day, fill = temp)) + geom_tile(color = "white") + scale_y_continuous(trans = "reverse", breaks = seq(1, 31, 5)) + scale_x_continuous(breaks = seq(1, 12, 1)) + * labs(title = "Max Temp in Lincoln, NE in 2016") + * scale_fill_gradient(low = "blue", high = "yellow", * breaks = seq(20, 90, 10), * name = " °F") + * theme(axis.ticks = element_blank()) + * coord_cartesian(expand = FALSE) ``` --- # Heat maps <img src="figs/Lec7/heat-6-1.png" width="504" style="display: block; margin: auto;" /> --- class: left # Today's agenda <br> .large[ 1. heat maps 2. **dot plots** 3. pie charts & parts of a whole 4. maps ] --- # Dot plots <br> <br> .large[Combines the advantages of a **lollipop chart**] <br> .medium[narrow profile means you can show a variable with many values] <br> .medium[dots draw the eye to the end point, making for useful comparisons] <br> <br> -- <br> .large[With the benefits of encoding data by position] <br> .medium[axis doesn't need to start at zero] <br> .medium[useful when zero isn't particularly relevant to your data] <br> <br> -- <br> .large[Values must be ordered for this to be effective] --- # Today's data <br> .center[`storms`]<br> <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> name </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> month </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> day </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> hour </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> lat </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> long </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> status </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> category </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> wind </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> pressure </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Amy </td> <td style="text-align:right;"> 1975 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:right;"> 27 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 27.5 </td> <td style="text-align:right;"> -79.0 </td> <td style="text-align:left;"> tropical depression </td> <td style="text-align:left;"> -1 </td> <td style="text-align:right;"> 25 </td> <td style="text-align:right;"> 1013 </td> </tr> <tr> <td style="text-align:left;"> Amy </td> <td style="text-align:right;"> 1975 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:right;"> 27 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:right;"> 28.5 </td> <td style="text-align:right;"> -79.0 </td> <td style="text-align:left;"> tropical depression </td> <td style="text-align:left;"> -1 </td> <td style="text-align:right;"> 25 </td> <td style="text-align:right;"> 1013 </td> </tr> <tr> <td style="text-align:left;"> Amy </td> <td style="text-align:right;"> 1975 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:right;"> 27 </td> <td style="text-align:right;"> 12 </td> <td style="text-align:right;"> 29.5 </td> <td style="text-align:right;"> -79.0 </td> <td style="text-align:left;"> tropical depression </td> <td style="text-align:left;"> -1 </td> <td style="text-align:right;"> 25 </td> <td style="text-align:right;"> 1013 </td> </tr> <tr> <td style="text-align:left;"> Amy </td> <td style="text-align:right;"> 1975 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:right;"> 27 </td> <td style="text-align:right;"> 18 </td> <td style="text-align:right;"> 30.5 </td> <td style="text-align:right;"> -79.0 </td> <td style="text-align:left;"> tropical depression </td> <td style="text-align:left;"> -1 </td> <td style="text-align:right;"> 25 </td> <td style="text-align:right;"> 1013 </td> </tr> <tr> <td style="text-align:left;"> Amy </td> <td style="text-align:right;"> 1975 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:right;"> 28 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 31.5 </td> <td style="text-align:right;"> -78.8 </td> <td style="text-align:left;"> tropical depression </td> <td style="text-align:left;"> -1 </td> <td style="text-align:right;"> 25 </td> <td style="text-align:right;"> 1012 </td> </tr> </tbody> </table> -- <br> <br> .center[**what's the max wind speed of each hurricane from 2011 onward?**] --- # Dot plots ```r storms_agg <- storms %>% filter(status == "hurricane" & year >= 2011) %>% group_by(name, year) %>% summarize(max_wind = max(wind)) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> name </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> max_wind </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Arthur </td> <td style="text-align:right;"> 2014 </td> <td style="text-align:right;"> 85 </td> </tr> <tr> <td style="text-align:left;"> Chris </td> <td style="text-align:right;"> 2012 </td> <td style="text-align:right;"> 75 </td> </tr> <tr> <td style="text-align:left;"> Cristobal </td> <td style="text-align:right;"> 2014 </td> <td style="text-align:right;"> 75 </td> </tr> <tr> <td style="text-align:left;"> Danny </td> <td style="text-align:right;"> 2015 </td> <td style="text-align:right;"> 110 </td> </tr> <tr> <td style="text-align:left;"> Edouard </td> <td style="text-align:right;"> 2014 </td> <td style="text-align:right;"> 105 </td> </tr> </tbody> </table> --- # Dot plots ```r storms_agg <- storms %>% filter(status == "hurricane" & year >= 2011) %>% group_by(name, year) %>% summarize(max_wind = max(wind)) %>% * mutate(name_date = paste0(name, " (", year, ")"), * wind_mph = max_wind * 1.151) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> name </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> max_wind </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> name_date </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> wind_mph </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Arthur </td> <td style="text-align:right;"> 2014 </td> <td style="text-align:right;"> 85 </td> <td style="text-align:left;"> Arthur (2014) </td> <td style="text-align:right;"> 97.835 </td> </tr> <tr> <td style="text-align:left;"> Chris </td> <td style="text-align:right;"> 2012 </td> <td style="text-align:right;"> 75 </td> <td style="text-align:left;"> Chris (2012) </td> <td style="text-align:right;"> 86.325 </td> </tr> <tr> <td style="text-align:left;"> Cristobal </td> <td style="text-align:right;"> 2014 </td> <td style="text-align:right;"> 75 </td> <td style="text-align:left;"> Cristobal (2014) </td> <td style="text-align:right;"> 86.325 </td> </tr> <tr> <td style="text-align:left;"> Danny </td> <td style="text-align:right;"> 2015 </td> <td style="text-align:right;"> 110 </td> <td style="text-align:left;"> Danny (2015) </td> <td style="text-align:right;"> 126.610 </td> </tr> <tr> <td style="text-align:left;"> Edouard </td> <td style="text-align:right;"> 2014 </td> <td style="text-align:right;"> 105 </td> <td style="text-align:left;"> Edouard (2014) </td> <td style="text-align:right;"> 120.855 </td> </tr> </tbody> </table> --- # Dot plots ```r storms_agg %>% * ggplot(aes(x = wind_mph, y = reorder(name_date, wind_mph))) + geom_point(color = "dark blue", size = 2) + labs(x = "Max Wind Speed (mph)", y = "", title = "Max wind speed of hurricanes, 2011 to 2015") ``` -- <img src="figs/Lec7/dot-5-1.png" width="504" style="display: block; margin: auto;" /> --- class: left # Today's agenda <br> .large[ 1. heat maps 2. dot plots 3. **pie charts & parts of a whole** 4. maps ] --- # Parts of a whole .center[![chart](figs/Lec7/table.png)] .right[*Fundamentals of Data Visualization*, Claus Wilke] --- # Pie charts <br> .large[Use with caution!] <br> <br> -- <br> .large[**Only when parts add up to 100%**] <br> <br> -- <br> .large[Only when number of categories is small] <br> .medium[otherwise too difficult to determine differences] <br> <br> -- <br> .large[Never show multiple pie charts over time] <br> .medium[generally another viz type would be better] --- # Today's data <br> .center[`penguins`]<br> <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> species </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> island </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> bill_length_mm </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> bill_depth_mm </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> flipper_length_mm </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> body_mass_g </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> sex </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 39.1 </td> <td style="text-align:right;"> 18.7 </td> <td style="text-align:right;"> 181 </td> <td style="text-align:right;"> 3750 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 39.5 </td> <td style="text-align:right;"> 17.4 </td> <td style="text-align:right;"> 186 </td> <td style="text-align:right;"> 3800 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 40.3 </td> <td style="text-align:right;"> 18.0 </td> <td style="text-align:right;"> 195 </td> <td style="text-align:right;"> 3250 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> </td> <td style="text-align:right;"> </td> <td style="text-align:right;"> </td> <td style="text-align:right;"> </td> <td style="text-align:left;"> </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 36.7 </td> <td style="text-align:right;"> 19.3 </td> <td style="text-align:right;"> 193 </td> <td style="text-align:right;"> 3450 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 2007 </td> </tr> </tbody> </table> -- <br> <br> .center[**how many penguin species are represented in this data?**] --- # Pie charts ```r penguins %>% count(species) %>% ggplot(aes(x="", y = n, fill = species)) + * geom_bar(stat = "identity", width = 1, color = "black") + * coord_polar("y", start = 0) ``` -- <img src="figs/Lec7/pie-1-1.png" width="504" style="display: block; margin: auto;" /> --- # Pie charts ```r penguins %>% count(species) %>% ggplot(aes(x="", y = n, fill = species)) + geom_bar(stat = "identity", width = 1, color = "black") + coord_polar("y", start = 0) + * theme_void() ``` -- <img src="figs/Lec7/pie-2-1.png" width="504" style="display: block; margin: auto;" /> --- # Pie charts ```r penguins %>% count(species) %>% * mutate(label = paste0(species, "\nn = ", n)) %>% ggplot(aes(x="", y = n, fill = species)) + geom_bar(stat = "identity", width = 1, color = "black") + coord_polar("y", start = 0) + geom_text(aes(label = label), position = position_stack(vjust = 0.5)) + theme_void() ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> species </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> n </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> label </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:right;"> 152 </td> <td style="text-align:left;"> Adelie n = 152 </td> </tr> <tr> <td style="text-align:left;"> Chinstrap </td> <td style="text-align:right;"> 68 </td> <td style="text-align:left;"> Chinstrap n = 68 </td> </tr> <tr> <td style="text-align:left;"> Gentoo </td> <td style="text-align:right;"> 124 </td> <td style="text-align:left;"> Gentoo n = 124 </td> </tr> </tbody> </table> --- # Pie charts ```r penguins %>% count(species) %>% * mutate(label = paste0(species, "\nn = ", n)) %>% ggplot(aes(x="", y = n, fill = species)) + geom_bar(stat = "identity", width = 1, color = "black") + coord_polar("y", start = 0) + * geom_text(aes(label = label), * position = position_stack(vjust = 0.5)) + theme_void() ``` --- # Pie charts <img src="figs/Lec7/pie-5-1.png" width="504" style="display: block; margin: auto;" /> --- # Pie charts ```r penguins %>% count(species) %>% mutate(label = paste0(species, "\nn = ", n)) %>% ggplot(aes(x="", y = n, fill = species)) + geom_bar(stat = "identity", width = 1, color = "black") + coord_polar("y", start = 0) + * scale_fill_manual(values = c("#496185","#965641","#D7B377")) + geom_text(aes(label = label), position = position_stack(vjust = 0.5)) + * labs(title = "The distribution of penguin species") + theme_void() + * theme(legend.position = "none", * plot.title = element_text(hjust = 0.5)) ``` --- # Pie charts <img src="figs/Lec7/pie-6-1.png" width="504" style="display: block; margin: auto;" /> --- # Today's data <br> .center[`penguins`]<br> <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> species </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> island </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> bill_length_mm </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> bill_depth_mm </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> flipper_length_mm </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> body_mass_g </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> sex </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> year </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 39.1 </td> <td style="text-align:right;"> 18.7 </td> <td style="text-align:right;"> 181 </td> <td style="text-align:right;"> 3750 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 39.5 </td> <td style="text-align:right;"> 17.4 </td> <td style="text-align:right;"> 186 </td> <td style="text-align:right;"> 3800 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 40.3 </td> <td style="text-align:right;"> 18.0 </td> <td style="text-align:right;"> 195 </td> <td style="text-align:right;"> 3250 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> </td> <td style="text-align:right;"> </td> <td style="text-align:right;"> </td> <td style="text-align:right;"> </td> <td style="text-align:left;"> </td> <td style="text-align:right;"> 2007 </td> </tr> <tr> <td style="text-align:left;"> Adelie </td> <td style="text-align:left;"> Torgersen </td> <td style="text-align:right;"> 36.7 </td> <td style="text-align:right;"> 19.3 </td> <td style="text-align:right;"> 193 </td> <td style="text-align:right;"> 3450 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 2007 </td> </tr> </tbody> </table> -- <br> <br> .center[**how does the distribution of body mass differ by penguin?**] --- # Comparing dist. to a whole **standard distribution with `geom_density`** <img src="figs/Lec7/dist-1-1.png" width="504" style="display: block; margin: auto;" /> --- # Comparing dist. to a whole **distribution comparison with `facet_wrap`** <img src="figs/Lec7/dist-2-1.png" width="504" style="display: block; margin: auto;" /> --- # Comparing dist. to a whole **distribution comparison with `geom_density`** <img src="figs/Lec7/dist-3-1.png" width="504" style="display: block; margin: auto;" /> --- # Comparing dist. to a whole **distribution comparison with `ggridges`** <img src="figs/Lec7/dist-4-1.png" width="504" style="display: block; margin: auto;" /> --- # Comparing dist. to a whole ```r penguins %>% ggplot(aes(x = body_mass_g, y = ..count..)) + * geom_density_line(data = select(penguins, -species), * aes(fill = "all penguins"), color = "transparent") ``` -- <img src="figs/Lec7/dist-5-1.png" width="504" style="display: block; margin: auto;" /> --- # Comparing dist. to a whole ```r penguins %>% ggplot(aes(x = body_mass_g, y = ..count..)) + geom_density_line(data = select(penguins, -species), aes(fill = "all penguins"), color = "transparent") + * geom_density_line(aes(fill = "species"), color = "transparent") + * facet_wrap(~species, nrow = 1) ``` -- <img src="figs/Lec7/dist-6-1.png" width="504" style="display: block; margin: auto;" /> --- # Comparing dist. to a whole ```r penguins %>% ggplot(aes(x = body_mass_g, y = ..count..)) + geom_density_line(data = select(penguins, -species), aes(fill = "all penguins"), color = "transparent") + geom_density_line(aes(fill = "species"), color = "transparent") + facet_wrap(~species, nrow = 1) + * scale_fill_manual(values = c("grey","#0C8346"), name = NULL, * guide = guide_legend(direction = "horizontal")) + * labs(x = "Body Mass (g)") + * theme(legend.position = "top") ``` --- # Comparing dist. to a whole <img src="figs/Lec7/dist-7-1.png" width="504" style="display: block; margin: auto;" /> --- class: left # Today's agenda <br> .large[ 1. heat maps 2. dot plots 3. pie charts & parts of a whole 4. **maps** ] --- # Maps <br> .large[Various methods to make maps with R/`ggplot2`] <br> .medium[an easy option is the `maps` package] <br> <br> -- <br> .large[Choropleths & cartograms] <br> <br> -- <br> .large[Be careful with continuous vs. discrete color scales] <br> .medium[continuous allows for more nuance but makes it hard to mark a specific value] <br> <br> -- <br> .large[For choropleth maps, watch out for areas drawing disproportionate attention due to their size] <br> .medium[counts are almost never appropriate] --- # Choropleth maps **use `map_data` from the `maps` package** ```r tx_map <- map_data("county", region = "texas") ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> long </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> lat </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> group </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> order </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> region </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> subregion </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> -95.75271 </td> <td style="text-align:right;"> 31.53560 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> </tr> <tr> <td style="text-align:right;"> -95.76989 </td> <td style="text-align:right;"> 31.55852 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> </tr> <tr> <td style="text-align:right;"> -95.76416 </td> <td style="text-align:right;"> 31.58143 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> </tr> <tr> <td style="text-align:right;"> -95.72979 </td> <td style="text-align:right;"> 31.58143 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 4 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> </tr> <tr> <td style="text-align:right;"> -95.74698 </td> <td style="text-align:right;"> 31.61008 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> </tr> </tbody> </table> --- # Choropleth maps ```r tx_map %>% ggplot(aes(x = long, y = lat, group = group)) + geom_polygon(fill = "gray", color = "white") + theme_void() ``` -- <img src="figs/Lec7/map-3-1.png" width="504" style="display: block; margin: auto;" /> --- # Today's data <br> .center[`tx_data`]<br> <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> County </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> Age </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> Total </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> Total_Male </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> Total_Female </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> Hispanic_Total </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:left;"> All Ages </td> <td style="text-align:right;"> 59025 </td> <td style="text-align:right;"> 36468 </td> <td style="text-align:right;"> 22557 </td> <td style="text-align:right;"> 10708 </td> </tr> <tr> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:left;"> < 1 Year </td> <td style="text-align:right;"> 703 </td> <td style="text-align:right;"> 356 </td> <td style="text-align:right;"> 347 </td> <td style="text-align:right;"> 161 </td> </tr> <tr> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:left;"> 1 Years </td> <td style="text-align:right;"> 666 </td> <td style="text-align:right;"> 333 </td> <td style="text-align:right;"> 333 </td> <td style="text-align:right;"> 159 </td> </tr> <tr> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:left;"> 2 Years </td> <td style="text-align:right;"> 584 </td> <td style="text-align:right;"> 290 </td> <td style="text-align:right;"> 294 </td> <td style="text-align:right;"> 134 </td> </tr> <tr> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:left;"> 3 Years </td> <td style="text-align:right;"> 569 </td> <td style="text-align:right;"> 293 </td> <td style="text-align:right;"> 276 </td> <td style="text-align:right;"> 134 </td> </tr> </tbody> </table> -- <br> <br> .center[**how does the share of Hispanic people vary by county?**] --- # Choropleth map ```r tx_data_agg <- tx_data %>% filter(Age == "All Ages" & County != "STATE OF TEXAS") %>% mutate(hispanic_perc = Hispanic_Total / Total) %>% select(County, hispanic_perc) %>% * mutate(county_lower = str_replace_all(County, " COUNTY", ""), * county_lower = str_to_lower(county_lower)) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> County </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> hispanic_perc </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> county_lower </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> <td style="text-align:left;"> anderson </td> </tr> <tr> <td style="text-align:left;"> ANDREWS COUNTY </td> <td style="text-align:right;"> 0.6212459 </td> <td style="text-align:left;"> andrews </td> </tr> <tr> <td style="text-align:left;"> ANGELINA COUNTY </td> <td style="text-align:right;"> 0.2261372 </td> <td style="text-align:left;"> angelina </td> </tr> <tr> <td style="text-align:left;"> ARANSAS COUNTY </td> <td style="text-align:right;"> 0.2967524 </td> <td style="text-align:left;"> aransas </td> </tr> <tr> <td style="text-align:left;"> ARCHER COUNTY </td> <td style="text-align:right;"> 0.1045730 </td> <td style="text-align:left;"> archer </td> </tr> </tbody> </table> --- # Choropleth map ```r tx_map <- tx_map %>% left_join(tx_data_agg, by = c("subregion" = "county_lower")) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> long </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> lat </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> group </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> order </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> region </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> subregion </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> County </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> hispanic_perc </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> -95.75271 </td> <td style="text-align:right;"> 31.53560 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> </tr> <tr> <td style="text-align:right;"> -95.76989 </td> <td style="text-align:right;"> 31.55852 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> </tr> <tr> <td style="text-align:right;"> -95.76416 </td> <td style="text-align:right;"> 31.58143 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> </tr> <tr> <td style="text-align:right;"> -95.72979 </td> <td style="text-align:right;"> 31.58143 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 4 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> </tr> <tr> <td style="text-align:right;"> -95.74698 </td> <td style="text-align:right;"> 31.61008 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> </tr> </tbody> </table> --- # Choropleth map ```r tx_map %>% ggplot(aes(x = long, y = lat, group = group, fill = hispanic_perc)) + geom_polygon(color = "white") ``` -- <img src="figs/Lec7/map-8-1.png" width="504" style="display: block; margin: auto;" /> --- # Choropleth map ```r tx_map %>% ggplot(aes(x = long, y = lat, group = group, fill = hispanic_perc)) + geom_polygon(color = "white") + scale_fill_gradient(low = "#fafafa", high = "#191970", breaks = seq(0, 0.75, 0.25), name = NULL, labels = percent) ``` -- <img src="figs/Lec7/map-9-1.png" width="504" style="display: block; margin: auto;" /> --- # Choropleth map ```r tx_map %>% ggplot(aes(x = long, y = lat, group = group, fill = hispanic_perc)) + geom_polygon(color = "white") + scale_fill_gradient(low = "#fafafa", high = "#191970", breaks = seq(0, 0.75, 0.25), name = NULL, labels = percent) + labs(title = "Percentage of Hispanic people by county in Texas", subtitle = "2019 population estimate") + theme_void() + theme(legend.position = c(0.2, 0.8)) ``` --- # Choropleth map <img src="figs/Lec7/map-10-1.png" width="504" style="display: block; margin: auto;" /> --- # Choropleth map **binning data for a discrete scale** ```r tx_map %>% mutate(perc_bin = case_when(hispanic_perc >= 0.8 ~ "80-100%", hispanic_perc >= 0.6 ~ "60-80%", hispanic_perc >= 0.4 ~ "40-60%", hispanic_perc >= 0.2 ~ "20-40%", TRUE ~ "0-20%")) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> long </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> lat </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> group </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> order </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> region </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> subregion </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> County </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> hispanic_perc </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> perc_bin </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> -95.75271 </td> <td style="text-align:right;"> 31.53560 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> <td style="text-align:left;"> 0-20% </td> </tr> <tr> <td style="text-align:right;"> -95.76989 </td> <td style="text-align:right;"> 31.55852 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> <td style="text-align:left;"> 0-20% </td> </tr> <tr> <td style="text-align:right;"> -95.76416 </td> <td style="text-align:right;"> 31.58143 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> <td style="text-align:left;"> 0-20% </td> </tr> <tr> <td style="text-align:right;"> -95.72979 </td> <td style="text-align:right;"> 31.58143 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 4 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> <td style="text-align:left;"> 0-20% </td> </tr> <tr> <td style="text-align:right;"> -95.74698 </td> <td style="text-align:right;"> 31.61008 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> texas </td> <td style="text-align:left;"> anderson </td> <td style="text-align:left;"> ANDERSON COUNTY </td> <td style="text-align:right;"> 0.1814147 </td> <td style="text-align:left;"> 0-20% </td> </tr> </tbody> </table> --- # Choropleth map ```r tx_map %>% mutate(perc_bin = case_when(hispanic_perc >= 0.8 ~ "80-100%", hispanic_perc >= 0.6 ~ "60-80%", hispanic_perc >= 0.4 ~ "40-60%", hispanic_perc >= 0.2 ~ "20-40%", TRUE ~ "0-20%")) %>% ggplot(aes(x = long, y = lat, group = group, fill = perc_bin)) + geom_polygon(color = "white") + * scale_fill_manual(values = c("#EEEEFB","#BDBDEF","#8C8CE3", * "#4A4AD3","#191970"), * name = NULL) + labs(title = "Percentage of Hispanic people by county in Texas", subtitle = "2019 population estimate") + theme_void() + theme(legend.position = c(0.17, 0.8)) ``` --- # Choropleth map <img src="figs/Lec7/map-13-1.png" width="504" style="display: block; margin: auto;" /> --- # Cartogram <br> .large[By definition:] <br> .medium[a map in which geographic size is altered to be directly proportional to a selected variable] <br> .medium[can be tricky to interpret] <br> <br> -- .center[![cartogram](figs/Lec7/cartogram.png)] .center[*data-to-viz.com*] --- # Cartogram <br> .large[By definition:] <br> .medium[a map in which geographic size is altered to be directly proportional to a selected variable] <br> .medium[can be tricky to interpret] <br> <br> <br> .large[Cartogram heatmap is a popular variation] <br> .medium[shows each state as a colored square] <br> .medium[doesn't weigh states by shape or size, though it equalizes population] <br> <br> -- <br> .large[Use the `statebins` package] --- # Today's data <br> .center[`us_rent_income`]<br> <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> GEOID </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> NAME </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> variable </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> estimate </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> moe </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> 01 </td> <td style="text-align:left;"> Alabama </td> <td style="text-align:left;"> income </td> <td style="text-align:right;"> 24476 </td> <td style="text-align:right;"> 136 </td> </tr> <tr> <td style="text-align:left;"> 01 </td> <td style="text-align:left;"> Alabama </td> <td style="text-align:left;"> rent </td> <td style="text-align:right;"> 747 </td> <td style="text-align:right;"> 3 </td> </tr> <tr> <td style="text-align:left;"> 02 </td> <td style="text-align:left;"> Alaska </td> <td style="text-align:left;"> income </td> <td style="text-align:right;"> 32940 </td> <td style="text-align:right;"> 508 </td> </tr> <tr> <td style="text-align:left;"> 02 </td> <td style="text-align:left;"> Alaska </td> <td style="text-align:left;"> rent </td> <td style="text-align:right;"> 1200 </td> <td style="text-align:right;"> 13 </td> </tr> <tr> <td style="text-align:left;"> 04 </td> <td style="text-align:left;"> Arizona </td> <td style="text-align:left;"> income </td> <td style="text-align:right;"> 27517 </td> <td style="text-align:right;"> 148 </td> </tr> </tbody> </table> -- <br> <br> .center[**how does the mean rent price vary by state?**] --- # Cartogram ```r us_rent_income %>% select(-c(moe)) %>% pivot_wider(names_from = variable, values_from = estimate) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> GEOID </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> NAME </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> income </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> rent </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> 01 </td> <td style="text-align:left;"> Alabama </td> <td style="text-align:right;"> 24476 </td> <td style="text-align:right;"> 747 </td> </tr> <tr> <td style="text-align:left;"> 02 </td> <td style="text-align:left;"> Alaska </td> <td style="text-align:right;"> 32940 </td> <td style="text-align:right;"> 1200 </td> </tr> <tr> <td style="text-align:left;"> 04 </td> <td style="text-align:left;"> Arizona </td> <td style="text-align:right;"> 27517 </td> <td style="text-align:right;"> 972 </td> </tr> <tr> <td style="text-align:left;"> 05 </td> <td style="text-align:left;"> Arkansas </td> <td style="text-align:right;"> 23789 </td> <td style="text-align:right;"> 709 </td> </tr> <tr> <td style="text-align:left;"> 06 </td> <td style="text-align:left;"> California </td> <td style="text-align:right;"> 29454 </td> <td style="text-align:right;"> 1358 </td> </tr> </tbody> </table> --- # Cartogram ```r us_rent_income %>% select(-c(moe)) %>% pivot_wider(names_from = variable, values_from = estimate) %>% mutate(rent_bin = cut(rent, breaks = c(-Inf, 800, 1000, 1200, Inf), labels = c("< $800","$800 - $1000", "$1000 - $1200","1200+"))) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> GEOID </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> NAME </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> income </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> rent </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> rent_bin </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> 01 </td> <td style="text-align:left;"> Alabama </td> <td style="text-align:right;"> 24476 </td> <td style="text-align:right;"> 747 </td> <td style="text-align:left;"> < $800 </td> </tr> <tr> <td style="text-align:left;"> 02 </td> <td style="text-align:left;"> Alaska </td> <td style="text-align:right;"> 32940 </td> <td style="text-align:right;"> 1200 </td> <td style="text-align:left;"> $1000 - $1200 </td> </tr> <tr> <td style="text-align:left;"> 04 </td> <td style="text-align:left;"> Arizona </td> <td style="text-align:right;"> 27517 </td> <td style="text-align:right;"> 972 </td> <td style="text-align:left;"> $800 - $1000 </td> </tr> <tr> <td style="text-align:left;"> 05 </td> <td style="text-align:left;"> Arkansas </td> <td style="text-align:right;"> 23789 </td> <td style="text-align:right;"> 709 </td> <td style="text-align:left;"> < $800 </td> </tr> <tr> <td style="text-align:left;"> 06 </td> <td style="text-align:left;"> California </td> <td style="text-align:right;"> 29454 </td> <td style="text-align:right;"> 1358 </td> <td style="text-align:left;"> 1200+ </td> </tr> </tbody> </table> --- # Cartogram ```r us_rent_income %>% select(-c(moe)) %>% pivot_wider(names_from = variable, values_from = estimate) %>% mutate(rent_bin = cut(rent, breaks = c(-Inf, 800, 1000, 1200, Inf), labels = c("< $800","$800 - $1000", "$1000 - $1200","1200+"))) %>% rename(state = NAME) ``` -- <table class="table" style="font-size: 16px; width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> GEOID </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> state </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> income </th> <th style="text-align:right;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> rent </th> <th style="text-align:left;font-weight: bold;color: white !important;background-color: #bb0000 !important;"> rent_bin </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> 01 </td> <td style="text-align:left;"> Alabama </td> <td style="text-align:right;"> 24476 </td> <td style="text-align:right;"> 747 </td> <td style="text-align:left;"> < $800 </td> </tr> <tr> <td style="text-align:left;"> 02 </td> <td style="text-align:left;"> Alaska </td> <td style="text-align:right;"> 32940 </td> <td style="text-align:right;"> 1200 </td> <td style="text-align:left;"> $1000 - $1200 </td> </tr> <tr> <td style="text-align:left;"> 04 </td> <td style="text-align:left;"> Arizona </td> <td style="text-align:right;"> 27517 </td> <td style="text-align:right;"> 972 </td> <td style="text-align:left;"> $800 - $1000 </td> </tr> <tr> <td style="text-align:left;"> 05 </td> <td style="text-align:left;"> Arkansas </td> <td style="text-align:right;"> 23789 </td> <td style="text-align:right;"> 709 </td> <td style="text-align:left;"> < $800 </td> </tr> <tr> <td style="text-align:left;"> 06 </td> <td style="text-align:left;"> California </td> <td style="text-align:right;"> 29454 </td> <td style="text-align:right;"> 1358 </td> <td style="text-align:left;"> 1200+ </td> </tr> </tbody> </table> --- # Cartogram ```r us_rent_income %>% select(-c(moe)) %>% pivot_wider(names_from = variable, values_from = estimate) %>% mutate(rent_bin = cut(rent, breaks = c(-Inf, 800, 1000, 1200, Inf), labels = c("< $800","$800 - $1000", "$1000 - $1200","1200+"))) %>% rename(state = NAME) %>% * statebins(value_col = "rent_bin", name = "Average Rent", * round = TRUE, # creates a square w/ rounded edges * ggplot2_scale_function = scale_fill_manual, * values = c("#EEEEFB","#BDBDEF","#4A4AD3","#191970")) + labs(title = "Average Rent Per State (2017)") + theme_statebins() ``` --- # Cartogram <img src="figs/Lec7/cart-7-1.png" width="504" style="display: block; margin: auto;" /> --- # Upcoming <br> .large[Homework 2 due Tuesday June 8] <br> .large[Lab 5 due Wednesday June 8] <br> .large[Lecture 8 on Wednesday June 9]