across() function appreciation

Wrangle and change multiple columns with the across() function from dplyr.

library(tidyverse)
library(gapminder)
library(ggthemes)

So quick! So simple!

library(dplyr)

# Assuming df is your dataframe and it includes columns mission_1 to mission_4
df <- tibble(
  mission_1 = c("MissionA", NA, "MissionC", "MissionD"),
  mission_2 = c(NA, "MissionE", "MissionF", NA),
  mission_3 = c("MissionG", "MissionH", NA, "MissionJ"),
  mission_4 = c(NA, NA, "MissionK", "MissionL")
)

df <- df %>%
  rowwise() %>%
  mutate(non_na_string_count = sum(!is.na(c_across(mission_1:mission_4)) & nchar(c_across(mission_1:mission_4)) > 0)) %>%
  ungroup()

print(df)

tcc %<>%  
janitor::clean_names() %>% 
  group_by(year, contributor) %>% 
  summarise(across(c(experts_on_mission, formed_police_units, 
                     inidividual_police, civilian_police, troops,
                     observers, total), ~sum(.x, na.rm = TRUE))) -> tcc_sum

pema %>% 
  mutate(across(c(signature, namepko, country), as.factor)) %>%
  mutate(date = as.Date(date, format = "%d/%m/%Y")) %>%    # 18/06/1998
  select(!c(mandate_renewal:mandate_completeadjustment,
            comments)) %>% 
  select(where(~ !all(is.na(.)))) %>% 
  mutate(across(where(is.character), ~ if_else(nchar(.) > 0, 1, 0))) %>% 
  mutate(across(where(is.numeric), ~replace_na(., 0))) %>% 
  mutate(across(where(is.numeric), ~if_else(. != 0, 1, 0))) %>% 
  mutate(year = year(date)) -> pema_check

Mutate all numeric variables and calculate the country mean across all years in the dataset.

Then use .names = argument to give a new column variable name!

gapminder%>%
  group_by(continent) %>% 
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>%  
  mutate(across(where(is.numeric), mean, na.rm = TRUE,
                   .names = "avg_{col}")) %>% 
  mutate(across(where(is.numeric), log,
                   .names = "ln_{col}")) %>% 
  ggplot(aes(x = ln_avg_gdpPercap, 
             y = ln_avg_lifeExp, 
             group = continent)) + 
  geom_point() +  geom_label(aes(label = continent, 
                                 fill = continent), 
                             color = "#f0f0f0", 
                             size = 8) -> my_plot 

And optional code if you want to make the graph a bit prettier.

First dark hex colors:

my_palette <- c("570211","7e3110","004540","032c4d","360825")

add_hashtag <- function(my_vec){
  hash_vec <-  paste0('#', my_vec)
  return(hash_vec)
}

pal_hash <- add_hashtag(my_palette)

And some labelling and adjusting the look of the plot

my_plot + ggtitle("Scatterplot of average GDP and life expectancy, 1952-2007") +
  xlab("Average GDP per capita (logged)") +
  ylab("Average life expectancy (logged)") + 
  ggthemes::theme_fivethirtyeight() + xlim(7.5, 10.1) + 
    scale_fill_manual(values = pal_hash) +
  theme(legend.position = "none",
        plot.title = element_text(size = 25),
        text = element_text(family = "Arial")) 

Leave a comment