User:Ralbegen/Opinion poll code

From Wikipedia, the free encyclopedia

For the graph of UK general election opinion polls I maintain:

align=left

I have a csv of general election results called gen2019.csv that looks like this:

date,Conservatives,Labour,Lib Dems,SNP,Greens,Others
2019-12-12,.447,.329,.118,.040,.028,.043

And a csv of polls called pollsafter2019.csv organised like this:

date,Conservatives,Labour,Lib Dems,SNP,Greens,Others
2020-01-10,.44,.29,.11,.03,.05,.06
2020-01-17,.47,.30,.09,.05,.04,.06
...

(The full csv as of time of writing, which should work with the end date in the below code, is at the bottom of this page)

The R code goes like this:

# Libraries ---------------------------------------------------------------

library(here)
library(tidyverse)
library(scales)
library(grid)
library(ggrepel)
library(lubridate)

# Functions ---------------------------------------------------------------

import_plot_data <- function(path, import_others = FALSE) {
  initial_data <- read_csv(here(path)) %>%
    pivot_longer(cols = 2:dim(.)[2],
                 names_to = "party",
                 values_to = "value"
    ) %>%
    drop_na(value) %>%
    arrange(date,
            party)
  
  if (import_others) {
    data <- initial_data
  } else {
    data <- initial_data %>%
      filter(party != "Others", party != "Other")
  }
  
  return(data)
}

import_colours <- function(path, import_others = FALSE) {
  initial_colours <- read_csv(here(path)) %>%
    arrange(party)
  
  if (import_others) {
    colours <- initial_colours
  } else if (!import_others) {
    colours <- initial_colours %>%
      filter(party != "Others", party != "Other")
  } else return(null)

  return(colours)
}

calculate_label_positions <- function(polls, plot_span) {
  return(polls %>%
           mutate(date = as.numeric(date)) %>%
           nest(-party) %>%
           mutate(m = map(data,
                          loess,
                          formula = value ~ date,
                          span    = plot_span),
                  fitted  = map(m, `[[`, "fitted")) %>%
           select(-m) %>%
           unnest(cols = c(data, fitted)) %>%
           arrange(date, party) %>%
           group_by(party) %>%
           mutate(date = as.Date(date, origin = "1970-01-01")) %>%
           slice(which.max(as.Date(date))) %>%
           ungroup()
  )
}

generate_poll_plot <- function(polls, 
                               elections,
                               party_colours,
                               label_positions,
                               plot_span,
                               upper_limit,
                               break_period,
                               first_date,
                               last_date
) {
  
  poll_plot_c <- ggplot(data = polls,
                        mapping = aes(x = date, y = value, colour = party)) +
    geom_point(data = elections, shape = "square") +
    geom_vline(xintercept = elections$date, alpha = .25) +
    geom_point(alpha = .5, size = 1) +
    geom_smooth(method = "loess",
                formula = y ~ x,
                se = FALSE,
                n = 1000,
                span = plot_span,
                alpha = 0) +
    scale_colour_manual(values = party_colours) +
    labs(x = "", y = "") +
    theme(legend.position = "none") + 
    scale_y_continuous(labels = percent_format(accuracy = 1),
                       breaks = seq(from = .0, to = upper_limit, .1), 
                       limits = c(-.01, upper_limit + .01),
                       expand = c(0, 0)) +
    scale_x_date(date_labels = "%b %Y",
                 breaks = break_period,
                 limits = c(first_date, last_date),
                 expand = c(0, 0)) +
    geom_text_repel(data = label_positions,
                    mapping = aes(label = party, colour = party, y = fitted),
                    direction = "y",
                    hjust = "left",
                    nudge_x = 16,
                    min.segment.length = 2,
                    xlim = c(first_date, last_date + duration(1000, "years"))) +
    theme(plot.margin = unit(c(1,6,.5,1), "lines"),
          panel.background = element_rect(fill = "#EEEEEE"))
  
  poll_plot_uc <- ggplotGrob(poll_plot_c)
  poll_plot_uc$layout$clip[poll_plot_uc$layout$name == "panel"] <- "off"
  return(poll_plot_uc)
}

plot_polls <- function(dir, 
                       polls, 
                       elections, 
                       colours, 
                       include_others = FALSE, 
                       plot_span,
                       draw = TRUE,
                       break_period,
                       size) {
  
  elections_to_plot <- import_plot_data(path = paste0(dir, "/", elections),
                                        import_others = include_others)
  polls_to_plot <- import_plot_data(path = paste0(dir, "/", polls),
                                    import_others = include_others)
  
  upper_limit <- ceiling(max(polls_to_plot$value * 10))/10
  
  label_positions <- calculate_label_positions(polls_to_plot, plot_span)
  
  first_date <- floor_date(elections_to_plot$date[[1]], unit = "month")
  last_date <- as.Date(label_positions$date[[1]]) + duration(7, "days")
  
  party_colours <- import_colours(path = paste0(dir, "/", colours),
                                  import_others = include_others)$colour
  
  poll_plot <- generate_poll_plot(polls_to_plot, 
                                  elections_to_plot,
                                  party_colours,
                                  label_positions,
                                  plot_span,
                                  upper_limit,
                                  break_period,
                                  first_date,
                                  last_date)
  
  if (draw == TRUE) grid.draw(poll_plot)
  if (size == "primary") {
    plot_width <- 720
    plot_height <- 420
  } else if (size == "secondary") {
    plot_width <- 500
    plot_height <- 300
  } else print("Invalid height!")
  
  ggsave(filename = paste0(dir, "/plots/plot_", date(), ".svg"),
         plot = poll_plot,
         scale = 0.4,
         width = plot_width,
         height = plot_height,
         units = "mm")
}

Please feel free to adapt the code for your own purposes! Just please note I'm not likely to be much help if you need technical assistance with it.

The full csv is here:

date,Conservatives,Labour,Lib Dems,SNP,Greens,Others
2020-01-10,.44,.29,.11,.03,.05,.06
2020-01-17,.47,.30,.09,.05,.04,.06
2020-01-26,.49,.29,.10,.05,.04,.03
2020-01-31,.44,.33,.10,.05,.03,.05
2020-02-02,.49,.30,.08,.04,.05,.04
2020-02-03,.47,.30,.11,.04,.05,.03
2020-02-07,.41,.29,.11,.05,.08,.07
2020-02-10,.48,.28,.10,.04,.06,.04
2020-02-12,.49,.31,.09,.04,.04,.03
2020-02-14,.47,.32,.07,.06,.04,.04
2020-02-20,.47,.31,.09,.04,.04,.04
2020-03-06,.45,.28,.11,.03,.06,.06
2020-03-09,.50,.29,.11,.04,.02,.05
2020-03-13,.49,.32,.06,.05,.05,.04
2020-03-16,.52,.30,.09,.04,.04,.01
2020-03-20,.51,.31,.07,.05,.03,.03
2020-03-23,.47,.29,.08,.05,.05,.06
2020-03-26,.54,.28,.07,.04,.04,.02
2020-03-27,.54,.28,.06,.05,.03,.03
2020-04-02,.49,.29,.08,.04,.04,.03
2020-04-02,.52,.28,.08,.05,.05,.06
2020-04-03,.53,.30,.07,.05,.03,.03
2020-04-09,.55,.29,.05,.05,.04,.05
2020-04-09,.46,.29,.10,.03,.06,.02
2020-04-17,.52,.31,.08,.04,.03,.02
2020-04-17,.53,.32,.05,.04,.03,.03
2020-04-17,.51,.32,.06,.05,.03,.03
2020-04-20,.54,.28,.09,.04,.04,.02
2020-04-23,.50,.33,.07,.05,.03,.02
2020-04-26,.50,.33,.07,.05,.04,.01
2020-04-28,.48,.31,.08,.04,.05,.05
2020-05-01,.51,.33,.06,.05,.03,.03
2020-05-06,.50,.30,.07,.04,.05,.05
2020-05-06,.50,.31,.07,.04,.05,.03
2020-05-07,.49,.33,.06,.05,.05,.03
2020-05-11,.51,.32,.07,.05,.02,.04
2020-05-14,.49,.34,.06,.05,.03,.03
2020-05-15,.47,.35,.09,.04,.03,.02
2020-05-17,.46,.33,.07,.04,.04,.04
2020-05-19,.48,.33,.06,.05,.05,.04
2020-05-22,.47,.35,.06,.05,.03,.03
2020-05-26,.46,.33,.08,.05,.04,.04
2020-05-26,.44,.38,.06,.05,.04,.04
2020-05-27,.43,.38,.07,.05,.04,.05
2020-05-27,.43,.37,.09,.05,.03,.03
2020-05-28,.43,.38,.08,.03,.04,.04
2020-05-29,.43,.39,.06,.05,.03,.04
2020-05-30,.45,.35,.06,.05,.05,.03
2020-06-03,.43,.36,.09,.04,.05,.04
2020-06-03,.41,.39,.07,.04,.04,.05
2020-06-05,.41,.38,.08,.02,.04,.06
2020-06-05,.43,.40,.06,.05,.03,.03
2020-06-10,.43,.38,.10,.04,.03,.04
2020-06-10,.42,.36,.08,.05,.04,.05
2020-06-11,.41,.39,.09,.05,.04,.02
2020-06-12,.45,.37,.06,.05,.04,.03
2020-06-12,.44,.39,.06,.05,.02,.02
2020-06-14,.40,.36,.09,.05,.03,.08
2020-06-15,.43,.35,.08,.05,.04,.03
2020-06-18,.43,.38,.08,.04,.04,.04
2020-06-19,.44,.40,.05,.05,.03,.03
2020-06-25,.43,.36,.08,.05,.04,.04
2020-06-25,.44,.38,.07,.04,.04,.02
2020-06-26,.43,.39,.06,.05,.04,.04
2020-06-28,.45,.37,.06,.05,.04,.03
2020-07-01,.42,.38,.08,.04,.05,.02
2020-07-03,.41,.37,.08,.05,.04,.05
2020-07-03,.41,.36,.07,.03,.05,.08
2020-07-06,.44,.37,.07,.04,.04,.03
2020-07-08,.44,.39,.07,.04,.04,.02
2020-07-09,.46,.36,.06,.05,.03,.04
2020-07-10,.44,.38,.07,.03,.03,.05
2020-07-10,.42,.38,.06,.06,.04,.04
2020-07-12,.42,.36,.08,.04,.05,.05
2020-07-13,.45,.35,.09,.05,.02,.05
2020-07-15,.44,.37,.08,.04,.04,.02
2020-07-17,.44,.36,.06,.06,.04,.04
2020-07-19,.43,.37,.06,.05,.02,.07
2020-07-22,.44,.36,.08,.04,.05,.04
2020-07-23,.45,.35,.07,.05,.04,.05
2020-07-24,.42,.38,.06,.05,.04,.05
2020-07-29,.43,.38,.07,.04,.04,.03
2020-07-31,.41,.38,.06,.06,.04,.06
2020-07-31,.43,.35,.06,.05,.05,.05
2020-08-03,.44,.35,.08,.04,.05,.03
2020-08-04,.45,.37,.06,.05,.05,.02
2020-08-05,.42,.36,.08,.05,.05,.04
2020-08-10,.42,.35,.08,.06,.03,.06
2020-08-12,.44,.35,.05,.06,.03,.05
2020-08-12,.43,.36,.09,.04,.04,.03
2020-08-14,.42,.39,.05,.05,.03,.06
2020-08-16,.42,.37,.07,.04,.03,.07
2020-08-19,.40,.38,.06,.05,.06,.06
2020-08-19,.44,.37,.07,.04,.04,.03
2020-08-21,.41,.37,.09,.04,.04,.04
2020-08-24,.42,.37,.09,.04,.05,.03
2020-08-25,.43,.36,.06,.05,.04,.04
2020-08-28,.40,.40,.06,.05,.03,.02
2020-09-02,.43,.37,.08,.04,.04,.03
2020-09-04,.40,.38,.08,.05,.04,.06
2020-09-04,.43,.37,.06,.05,.04,.05
2020-09-08,.42,.38,.06,.05,.05,.04
2020-09-09,.42,.37,.06,.05,.04,.05
2020-09-11,.42,.39,.06,.05,.04,.04
2020-09-16,.41,.39,.08,.05,.05,.03
2020-09-16,.40,.38,.08,.05,.04,.06
2020-09-17,.40,.40,.06,.04,.05,.05
2020-09-18,.40,.37,.08,.06,.05,.03
2020-09-20,.40,.37,.08,.05,.03,.07
2020-09-21,.40,.38,.09,.04,.04,.07
2020-09-23,.40,.40,.07,.04,.05,.03
2020-09-24,.41,.38,.06,.04,.05,.03
2020-09-25,.39,.42,.05,.06,.04,.04
2020-09-25,.42,.38,.06,.05,.04,.05
2020-09-27,.41,.38,.08,.05,.03,.04
2020-09-30,.39,.39,.06,.05,.05,.06
2020-10-01,.39,.39,.08,.05,.05,.05
2020-10-04,.42,.39,.07,.04,.03,.05
2020-10-06,.41,.37,.07,.04,.04,.07
2020-10-07,.41,.38,.05,.05,.06,.05
2020-10-07,.41,.39,.08,.04,.04,.03
2020-10-09,.40,.40,.06,.06,.03,.06
2020-10-11,.39,.39,.07,.05,.04,.03
2020-10-15,.39,.38,.06,.05,.06,.07
2020-10-17,.41,.38,.05,.05,.06,.05
2020-10-18,.42,.36,.08,.04,.03,.07
2020-10-21,.40,.40,.07,.05,.04,.04
2020-10-22,.40,.39,.07,.05,.05,.06
2020-10-23,.38,.40,.06,.05,.05,.06
2020-10-24,.42,.39,.07,.03,.03,.07
2020-10-26,.42,.39,.07,.04,.03,.04
2020-10-28,.37,.42,.08,.06,.05,.03
2020-10-28,.39,.41,.07,.04,.04,.04
2020-10-29,.38,.38,.06,.05,.05,.07
2020-11-02,.40,.40,.07,.05,.03,.06
2020-11-05,.35,.40,.07,.05,.04,.09
2020-11-06,.39,.37,.09,.05,.04,.06
2020-11-06,.38,.42,.07,.05,.03,.06
2020-11-09,.40,.36,.08,.05,.05,.05
2020-11-09,.40,.36,.08,.05,.05,.05
2020-11-11,.40,.40,.07,.05,.05,.03
2020-11-12,.38,.40,.05,.05,.05,.06
2020-11-15,.41,.38,.05,.05,.04,.07
2020-11-18,.38,.37,.07,.06,.06,.07
2020-11-19,.40,.39,.08,.05,.04,.04
2020-11-20,.41,.38,.06,.06,.04,.05
2020-11-22,.39,.37,.07,.05,.04,.08
2020-11-27,.37,.40,.05,.06,.05,.08
2020-11-28,.39,.37,.07,.05,.05,.06
2020-11-28,.37,.38,.09,.04,.04,.08
2020-11-29,.39,.38,.08,.05,.03,.08
2020-12-02,.40,.37,.09,.05,.06,.04
2020-12-03,.38,.38,.06,.05,.05,.07
2020-12-04,.38,.40,.06,.06,.03,.08
2020-12-08,.40,.38,.06,.05,.04,.07
2020-12-09,.37,.38,.08,.05,.06,.07
2020-12-10,.39,.37,.08,.05,.05,.06
2020-12-10,.41,.41,.06,.05,.05,.02
2020-12-13,.38,.37,.08,.05,.05,.07
2020-12-14,.38,.37,.10,.05,.03,.06
2020-12-16,.39,.37,.06,.05,.06,.07
2020-12-17,.39,.39,.06,.05,.04,.07
2020-12-21,.41,.39,.08,.05,.04,.04
2020-12-22,.37,.41,.05,.05,.05,.06
2020-12-22,.39,.38,.08,.05,.04,.07
2020-12-29,.36,.38,.09,.04,.07,.07
2020-12-30,.43,.38,.04,.05,.05,.06
2021-01-05,.39,.39,.06,.05,.06,.06
2021-01-07,.39,.40,.06,.05,.04,.06
2021-01-10,.40,.37,.08,.04,.04,.07
2021-01-11,.41,.37,.08,.05,.05,.03
2021-01-13,.40,.38,.07,.04,.05,.07
2021-01-14,.38,.39,.05,.05,.06,.03
2021-01-15,.37,.41,.06,.06,.04,.06
2021-01-17,.39,.37,.07,.05,.03,.05
2021-01-18,.40,.38,.08,.04,.05,.04
2021-01-22,.39,.38,.05,.05,.06,.07
2021-01-23,.41,.39,.07,.04,.03,.06
2021-01-24,.40,.37,.08,.05,.03,.07
2021-01-25,.40,.37,.10,.04,.05,.04
2021-01-25,.42,.37,.08,.05,.04,.05
2021-01-27,.37,.41,.06,.05,.04,.07
2021-01-29,.41,.38,.07,.05,.04,.05
2021-01-31,.41,.38,.06,.05,.03,.07
2021-02-01,.43,.37,.05,.05,.05,.05
2021-02-01,.40,.38,.08,.05,.04,.04
2021-02-02,.39,.38,.07,.06,.06,.04
2021-02-04,.42,.38,.07,.05,.08,.00
2021-02-06,.39,.33,.09,.05,.07,.06
2021-02-07,.41,.37,.08,.04,.04,.07
2021-02-08,.43,.38,.07,.05,.04,.03
2021-02-10,.41,.36,.06,.05,.07,.07
2021-02-12,.42,.37,.06,.05,.05,.05
2021-02-14,.42,.37,.07,.04,.03,.07
2021-02-15,.41,.39,.08,.05,.05,.03
2021-02-18,.40,.37,.07,.06,.05,.05
2021-02-21,.40,.38,.07,.04,.03,.09
2021-02-22,.40,.33,.11,.04,.06,.07
2021-02-22,.43,.37,.07,.04,.05,.04
2021-02-26,.43,.36,.07,.06,.04,.04
2021-02-26,.42,.38,.06,.02,.04,.07
2021-02-26,.41,.36,.05,.05,.07,.05
2021-02-28,.43,.36,.07,.04,.03,.07
2021-03-01,.44,.38,.07,.04,.03,.03
2021-03-04,.45,.32,.06,.05,.07,.05
2021-03-07,.42,.36,.08,.04,.04,.08
2021-03-08,.45,.36,.07,.04,.04,.04
2021-03-10,.43,.33,.09,.05,.05,.05
2021-03-10,.42,.33,.07,.04,.06,.06
2021-03-12,.45,.38,.06,.05,.05,.01
2021-03-12,.43,.37,.06,.05,.04,.04
2021-03-14,.39,.37,.08,.04,.04,.08
2021-03-15,.43,.36,.07,.04,.05,.04
2021-03-16,.42,.37,.07,.05,.05,.05
2021-03-19,.39,.37,.09,.04,.06,.05
2021-03-19,.43,.34,.05,.05,.07,.06
2021-03-21,.42,.38,.06,.04,.03,.07
2021-03-22,.42,.36,.09,.04,.06,.04
2021-03-26,.42,.32,.08,.05,.07,.06
2021-03-26,.41,.37,.06,.06,.05,.06
2021-03-27,.44,.36,.06,.02,.04,.08
2021-03-29,.42,.34,.09,.07,.04,.05
2021-03-29,.44,.36,.07,.04,.04,.05
2021-04-01,.42,.34,.07,.05,.06,.06
2021-04-04,.42,.35,.08,.04,.03,.07
2021-04-05,.44,.34,.09,.04,.05,.04
2021-04-08,.41,.34,.06,.05,.06,.07
2021-04-09,.45,.36,.06,.06,.04,.04
2021-04-10,.45,.36,.06,.02,.04,.07
2021-04-10,.43,.35,.08,.03,.04,.06
2021-04-11,.42,.35,.07,.05,.04,.07
2021-04-12,.43,.36,.07,.04,.05,.06
2021-04-14,.43,.29,.08,.05,.08,.06
2021-04-18,.43,.34,.07,.05,.04,.07
2021-04-19,.40,.34,.09,.04,.07,.07
2021-04-19,.44,.34,.10,.04,.04,.06
2021-04-22,.40,.37,.08,.06,.05,.05
2021-04-22,.44,.34,.05,.05,.07,.05
2021-04-23,.44,.33,.07,.06,.05,.06
2021-04-25,.42,.35,.08,.05,.03,.05
2021-04-26,.39,.35,.09,.04,.06,.06
2021-04-26,.41,.33,.10,.05,.07,.05
2021-04-26,.44,.34,.08,.05,.04,.05
2021-04-28,.44,.33,.07,.04,.07,.05
2021-04-29,.39,.38,.09,.04,.06,.05
2021-04-29,.43,.34,.05,.05,.07,.06
2021-04-30,.42,.37,.07,.05,.04,.05
2021-04-30,.40,.39,.06,NA,.03,NA
2021-05-03,.40,.38,.07,.04,.06,.05