Analysis: Report matched

Authors

Jolyon Miles-Wilson

Celestin Okoroji

Published

April 28, 2025

Code

library(haven)
library(poLCA)
library(Hmisc)
library(dplyr)
library(ggplot2)
library(tidyr)
library(skimr)
library(kableExtra)
#library(MASS)
library(wesanderson)
library(ggrepel)
library(here)
library(emmeans)
#library(devtools)
#install_version("sjstats", version = "0.18.2")
library(sjstats)
library(readr)
library(sjPlot)
library(nnet)
library(apaTables)

Code

rm(list = ls())
options(scipen = 999)
colours <- wes_palette("GrandBudapest2",4,"discrete")
better_colours <- c('#8dd3c7','#bebada','#fb8072','#80b1d3','#fdb462')
many_colours <- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928','#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f')

Code

extract_glm_coefs <- function(mod, only_sig=F, decimal_places = 3){
  coefs <- coef(summary(mod)) 
  if(only_sig==T){
    coefs <- coefs[which(coefs[,4] < .05),]
  }
  coefs <- as_tibble(coefs, rownames="variable") %>% # specify new variable to add rownames to 
    mutate(
    or = round(exp(Estimate), decimal_places), .after=Estimate
    )
}

extract_lm_coefs <- function(mod, only_sig = F){
  coefs <- coef(summary(mod)) 
  if(only_sig==T){
    coefs <- coefs[which(coefs[,4] < .05),]
  }
  coefs <- as_tibble(coefs, rownames="variable") # specify new variable to add rownames to 
}

get_pvalue <- function(model){
  f_value <- summary(model)$fstatistic
  p_value <- pf(f_value['value'], f_value['numdf'], f_value['dendf'], lower.tail = F)
  attributes(p_value) <- NULL
  return(p_value)
}

Code

data <- readRDS("../Data/2025-04-07 - Cleaned_data.rds")

# Specify data to be used in income analysis
income_data <- filter(data, income_drop_all==0)

1 Outsourced workers are paid £2,951 less a year

“Overall, we find that outsourced workers are paid £2,951 less a year, and £63 less a week, than non-outsourced workers.”

Code

# Annual income
# Intercept only
mod_base <- lm(income_annual_all ~ 1, income_data, weights = NatRepemployees)
# H1
mod_annual <- lm(income_annual_all ~ Age + Gender + Has_Degree + Ethnicity_collapsed + Region + outsourcing_status + BORNUK_labelled, income_data, weights = NatRepemployees)

summary(mod_annual)

# F and p
f_annual <- round(anova(mod_base, mod_annual)[2,"F"],2)
p_annual <- anova(mod_base, mod_annual)[2,"Pr(>F)"]
if(p_annual < .001){
  p_annual = "< .001"
} else{
  p_annual = paste0("= ",round(p_annual,3))
}

# Degrees of freedom
dfs_annual <- as.list(anova(mod_base, mod_annual)[2,c("Df","Res.Df")])
# R2
rsquare_annual <- round(summary(mod_annual)$r.squared,2)

# Weekly income
# Intercept only
mod_base <- lm(income_weekly_all ~ 1, income_data, weights = NatRepemployees)
# H1
mod_weekly <- lm(income_weekly_all ~ Age + Gender + Has_Degree + Ethnicity_collapsed + Region + outsourcing_status + BORNUK_labelled, income_data, weights = NatRepemployees)
summary(mod_weekly)

# F and p
f_weekly <- round(anova(mod_base, mod_weekly)[2,"F"],2)
p_weekly <- anova(mod_base, mod_weekly)[2,"Pr(>F)"]
if(p_weekly < .001){
  p_weekly = "< .001"
} else{
  p_weekly = paste0("= ",round(p_weekly,3))
}

# Degrees of freedom
dfs_weekly <- as.list(anova(mod_base, mod_weekly)[2,c("Df","Res.Df")])
# R2
rsquare_weekly <- round(summary(mod_weekly)$r.squared,2)

The annual income model was statistically significant (R² = 0.18, F(34, 8256) = 54.66, p < .001). The table below shows the coefficients for the annual income model.

Code

labels <- c(
  'Intercept',
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Ethnicity: Arab/British Arab',
  'Ethnicity: Asian/Asian British',
  'Ethnicity: Black/African/Caribbean/Black British',
  'Ethnicity: Mixed/Multiple ethnic group',
  'Ethnicity: Other ethnic group',
  'Ethnicity: Prefer not to say',
  'Ethnicity: White other',
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber',
  'Outsourcing: Outsourced',
  'Migration: Arrived within the last year',
  'Migration: Arrived within the last 3 years',
  'Migration: Arrived within the last 5 years',
  'Migration: Arrived within the last 10 years',
  'Migration: Arrived within the last 15 years',
  'Migration: Arrived within the last 20 years',
  'Migration: Arrived within the last 30 years',
  'Migration: Arrived more than 30 years ago',
  'Migration: Prefer not to say'
)
  
tab_model(mod_annual, pred.labels = labels, dv.labels = "Annual income")

Table 1: Linear regression predicting annual income

	Annual income
Predictors	Estimates	CI	p
Intercept	23883.73	22626.81 – 25140.64	<0.001
Age	14.62	-6.00 – 35.24	0.165
Gender: Male	7027.53	6501.18 – 7553.88	<0.001
Gender: Other	944.17	-5768.31 – 7656.65	0.783
Gender: Prefer not to say	4159.04	-1081.10 – 9399.18	0.120
Education: Has degree	8194.74	7648.05 – 8741.42	<0.001
Education: Don't know	-1971.76	-4097.27 – 153.76	0.069
Ethnicity: Arab/British Arab	-167.76	-4857.92 – 4522.40	0.944
Ethnicity: Asian/Asian British	-465.21	-1559.07 – 628.65	0.404
Ethnicity: Black/African/Caribbean/Black British	-1178.97	-2782.72 – 424.79	0.150
Ethnicity: Mixed/Multiple ethnic group	-1509.33	-3488.27 – 469.61	0.135
Ethnicity: Other ethnic group	3584.34	-1007.55 – 8176.22	0.126
Ethnicity: Prefer not to say	-258.17	-1976.58 – 1460.23	0.768
Ethnicity: White other	-626.35	-1988.83 – 736.13	0.368
Region: East Midlands	-5770.04	-6985.32 – -4554.76	<0.001
Region: East of England	-4073.94	-5215.88 – -2931.99	<0.001
Region: North East	-4850.06	-6378.79 – -3321.33	<0.001
Region: North West	-4475.61	-5581.54 – -3369.68	<0.001
Region: Northern Ireland	-6545.93	-8319.18 – -4772.68	<0.001
Region: Scotland	-5465.81	-6653.77 – -4277.85	<0.001
Region: South East	-3406.44	-4446.13 – -2366.75	<0.001
Region: South West	-5685.35	-6871.46 – -4499.25	<0.001
Region: Wales	-5366.02	-6811.80 – -3920.25	<0.001
Region: West Midlands	-5002.23	-6158.23 – -3846.22	<0.001
Region: Yorkshire and the Humber	-5523.70	-6705.94 – -4341.47	<0.001
Outsourcing: Outsourced	-2951.02	-3660.76 – -2241.28	<0.001
Migration: Arrived within the last year	-6135.94	-8377.00 – -3894.88	<0.001
Migration: Arrived within the last 3 years	-2391.85	-4413.30 – -370.39	0.020
Migration: Arrived within the last 5 years	-2030.80	-4260.26 – 198.65	0.074
Migration: Arrived within the last 10 years	-609.19	-2354.44 – 1136.07	0.494
Migration: Arrived within the last 15 years	743.99	-1195.56 – 2683.54	0.452
Migration: Arrived within the last 20 years	1334.93	-748.43 – 3418.29	0.209
Migration: Arrived within the last 30 years	3501.23	1081.94 – 5920.53	0.005
Migration: Arrived more than 30 years ago	-198.94	-2116.97 – 1719.08	0.839
Migration: Prefer not to say	-2092.23	-5625.02 – 1440.55	0.246
Observations	8291
R² / R² adjusted	0.184 / 0.180

As expected, the model statistics for weekly income model were identical to the those of the annual income model. The model was statistically significant (R² = 0.18, F(34, 8256) = 54.66, p < .001). The table below shows the coefficients for the weekly income model.

Code

tab_model(mod_weekly, pred.labels = labels, dv.labels = "Weekly income")

Table 2: Linear regression predicting weekly income

	Weekly income
Predictors	Estimates	CI	p
Intercept	513.16	486.15 – 540.16	<0.001
Age	0.31	-0.13 – 0.76	0.165
Gender: Male	150.99	139.68 – 162.30	<0.001
Gender: Other	20.29	-123.94 – 164.51	0.783
Gender: Prefer not to say	89.36	-23.23 – 201.95	0.120
Education: Has degree	176.07	164.32 – 187.81	<0.001
Education: Don't know	-42.36	-88.03 – 3.30	0.069
Ethnicity: Arab/British Arab	-3.60	-104.38 – 97.17	0.944
Ethnicity: Asian/Asian British	-10.00	-33.50 – 13.51	0.404
Ethnicity: Black/African/Caribbean/Black British	-25.33	-59.79 – 9.13	0.150
Ethnicity: Mixed/Multiple ethnic group	-32.43	-74.95 – 10.09	0.135
Ethnicity: Other ethnic group	77.01	-21.65 – 175.67	0.126
Ethnicity: Prefer not to say	-5.55	-42.47 – 31.37	0.768
Ethnicity: White other	-13.46	-42.73 – 15.82	0.368
Region: East Midlands	-123.97	-150.08 – -97.86	<0.001
Region: East of England	-87.53	-112.07 – -63.00	<0.001
Region: North East	-104.21	-137.05 – -71.36	<0.001
Region: North West	-96.16	-119.92 – -72.40	<0.001
Region: Northern Ireland	-140.64	-178.74 – -102.54	<0.001
Region: Scotland	-117.44	-142.96 – -91.91	<0.001
Region: South East	-73.19	-95.53 – -50.85	<0.001
Region: South West	-122.15	-147.64 – -96.67	<0.001
Region: Wales	-115.29	-146.36 – -84.23	<0.001
Region: West Midlands	-107.48	-132.31 – -82.64	<0.001
Region: Yorkshire and the Humber	-118.68	-144.08 – -93.28	<0.001
Outsourcing: Outsourced	-63.40	-78.65 – -48.16	<0.001
Migration: Arrived within the last year	-131.83	-179.98 – -83.68	<0.001
Migration: Arrived within the last 3 years	-51.39	-94.82 – -7.96	0.020
Migration: Arrived within the last 5 years	-43.63	-91.53 – 4.27	0.074
Migration: Arrived within the last 10 years	-13.09	-50.59 – 24.41	0.494
Migration: Arrived within the last 15 years	15.99	-25.69 – 57.66	0.452
Migration: Arrived within the last 20 years	28.68	-16.08 – 73.44	0.209
Migration: Arrived within the last 30 years	75.23	23.25 – 127.21	0.005
Migration: Arrived more than 30 years ago	-4.27	-45.48 – 36.94	0.839
Migration: Prefer not to say	-44.95	-120.86 – 30.95	0.246
Observations	8291
R² / R² adjusted	0.184 / 0.180

2 Outsourced workers are paid less across most sectors and in most jobs

2.1 Sectors

Code

sector_summary_pay <- data %>%
  filter(income_drop_all == 0 & !is.na(income_weekly_all)) %>%
  group_by(SectorName, SectorName_labelled, outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_weekly_all, na.rm=T),
    wtd_avg_income = weighted.mean(income_weekly_all, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(SectorName) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    SectorName_labelled = case_when(SectorName_labelled == "NA" ~ NA,
                                    TRUE ~ SectorName_labelled),
    SectorName_short = SectorName_labelled
  ) %>%
  # make the sector names more readable
  separate_wider_delim(SectorName_short, names = c("SectorName_short", "SectorName_short_detail"), delim=";",
                       too_few = "align_start") %>%
  mutate(
    SectorName_short = factor(stringr::str_to_sentence(SectorName_short)),
    SectorName_short_detail = factor(stringr::str_to_sentence(SectorName_short_detail)),
  )

summary_weekly <- sector_summary_pay %>%
  group_by(SectorName_labelled) %>%
  mutate(
    min_n = min(n, na.rm=TRUE) 
  ) %>%
  filter(min_n >= 10) %>% # need to identify the unit occs that have an ok n
  ungroup()

write_csv(sector_summary_pay, file="../outputs/data/sector_summary_pay_weekly.csv")

plot_data <- sector_summary_pay %>%
  drop_na(SectorName_short) %>%
  droplevels() %>%
  ungroup()

# Filter for 'outsourced' level and reorder SectorName_short
not_outsourced_levels <- plot_data %>%
  filter(outsourcing_status == 'Not outsourced') %>%
  mutate(SectorName_short = forcats::fct_reorder(SectorName_short, N, .desc = FALSE))

# outsourced <- plot_data %>%
#   filter(outsourcing_status == 'Outsourced') %>%
#   mutate(
#     rank = rank(desc(perc))
#   )

# Apply the reordered levels back to the original data
plot_data <- plot_data %>%
  mutate(
    SectorName_short = factor(SectorName_short, levels = levels(not_outsourced_levels$SectorName_short)),
        ) %>%
  arrange(desc(SectorName_short))

annotation_df <- plot_data %>%
  #filter(outsourcing_status == "Not outsourced") %>%
  dplyr::select(SectorName_short, n) %>%
  group_by(SectorName_short) %>%
  summarise(
    N = sum(n)
  ) %>%
    mutate(
    ypos = max(plot_data$wtd_avg_income, na.rm=T) * 1.2
  ) 

plot_data %>%
  # mutate(
  #   SectorName = as.factor(SectorName)
  # ) %>%
  ggplot(., aes(wtd_avg_income,SectorName_short, size = perc, colour = outsourcing_status)) +
    geom_point(position = "dodge") + 
  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank())+
      #coord_flip() +
  scale_x_continuous(breaks=seq(0,max(plot_data$wtd_avg_income, na.rm=T), 100)) +
  scale_colour_manual(values=colours) +
  geom_text(inherit.aes=F,data=annotation_df, aes(x=ypos, y=SectorName_short, label = paste0("N = ", N)), hjust=1) +
  geom_label_repel(inherit.aes = F, aes(wtd_avg_income, SectorName_short, colour = outsourcing_status, label=paste0("n=",n)), size=3) +
  guides(size=FALSE) + # remove size legend as gauging size is difficult 
  xlab("Weighted average weekly income") + ylab("Sector") +
  labs(caption = "Size of bubble represents the size of the respective workforce within the sector")

Code

sectors_of_interest <- unique(plot_data$SectorName_labelled)
sectors_of_interest <- sectors_of_interest[1:13] %>%
  stringr::str_to_title()

2.2 Minor sub groups across sectors

Code

unit_occ_summary_pay <- data %>%
  filter(income_drop_all == 0 & !is.na(income_weekly_all)) %>%
  group_by(UnitOccupation_labelled, outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_weekly_all, na.rm=T),
    wtd_avg_income = weighted.mean(income_weekly_all, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(UnitOccupation_labelled) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    UnitOccupation_labelled = case_when(UnitOccupation_labelled == "NA" ~ NA,
                                    TRUE ~ UnitOccupation_labelled),
    UnitOccupation_labelled = stringr::str_to_title(UnitOccupation_labelled)
  )  %>%
  ungroup()

# write_csv(unit_occ_summary_pay, file="../outputs/data/minor_group_occupation_summary_pay_weekly_across_sectors.csv")

# need to identify the unit occs that have an ok n
# subste to occs with n>=10
unit_subset_weekly <- unit_occ_summary_pay %>%
  group_by(UnitOccupation_labelled) %>%
  mutate(
    min_n = min(n, na.rm=TRUE)
  ) %>%
  filter(min_n >= 10)

unit_subset <- unit_subset_weekly


# create a df with occs where outsourced paid less so we can just list it
paid_less <- unit_subset %>%
  pivot_wider(id_cols = c(UnitOccupation_labelled), names_from = outsourcing_status, values_from = c(wtd_avg_income, n)) %>%
  janitor::clean_names() %>%
  mutate(
    pay_penalty = wtd_avg_income_outsourced - wtd_avg_income_not_outsourced
  ) %>%
  filter(
    pay_penalty < 0
  )

# write_csv(paid_less, file="../outputs/data/minor_group_occupation_weekly_pay_penalty_across_sectors.csv")

#print(sector)
# subset to this sector and drop na occupatoins
plot_data <- unit_subset %>%
  filter(!is.na(UnitOccupation_labelled)) %>%
  droplevels() %>%
  ungroup()

# Order occs by N
# First filter for 'outsourced' level and reorder by N
not_outsourced_levels <- plot_data %>%
  dplyr::select(UnitOccupation_labelled, outsourcing_status, N) %>%
  distinct(UnitOccupation_labelled, N) %>%
  mutate(UnitOccupation_labelled = forcats::fct_reorder(UnitOccupation_labelled, N, .desc = FALSE))

# not_outsourced_levels <- plot_data %>%
#   filter(outsourcing_status == 'Not outsourced') %>%
#   mutate(UnitOccupation_labelled = forcats::fct_reorder(UnitOccupation_labelled, N, .desc = FALSE))
# Then apply the reordered levels back to the original data
plot_data <- plot_data %>%
  mutate(
    UnitOccupation_labelled = factor(UnitOccupation_labelled, levels = levels(not_outsourced_levels$UnitOccupation_labelled)),
        )

annotation_df <- plot_data %>%
  #filter(outsourcing_status == "Not outsourced") %>%
  dplyr::select(UnitOccupation_labelled, n) %>%
  group_by(UnitOccupation_labelled) %>%
  summarise(
    N = sum(n)
  ) %>%
    mutate(
    ypos = max(plot_data$wtd_avg_income, na.rm=T) * 1.2
  )


p <- plot_data %>%
  ggplot(., aes(wtd_avg_income, UnitOccupation_labelled, size = perc, colour = outsourcing_status)) +
  geom_point(position = "dodge") + 
geom_label_repel(inherit.aes = F, aes(wtd_avg_income, UnitOccupation_labelled, colour = outsourcing_status, label=paste0("n=",n)), size=3, 
                #force_pull = 2
                ) + 
  theme_minimal() +
  theme(legend.position = "bottom",
      legend.title = element_blank()) +

  #coord_flip() +
scale_x_continuous(breaks=scales::breaks_pretty(n=5)) +
  #breaks=seq(0,max(plot_data$wtd_avg_income, na.rm=T), 200)) +
scale_colour_manual(values=colours) +
geom_text(inherit.aes=F,data=annotation_df, aes(x=ypos, y=UnitOccupation_labelled, label = paste0("N = ", N)), hjust=1)  +
guides(size=FALSE) + # remove size legend as gauging size is difficult 
xlab("Weighted average weekly income") + ylab("Unit occupation") +
labs(caption = "Size of bubble represents the size of the respective workforce within the occupation") +
  ggtitle("All sectors")

show(p)

Code

# ggsave(here('outputs','figures','occupation_pay_plots','unit_occupation_pay_plot_all_sectors.png'), height = 8, width = 8, dpi=800, bg="white")

2.3 Minor subgroups within sectors

Code

unit_occ_in_sect_summary_pay <- data %>%
  filter(income_drop_all == 0 & !is.na(income_weekly_all)) %>%
  group_by(SectorName, SectorName_labelled, UnitOccupation_labelled, outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_weekly_all, na.rm=T),
    wtd_avg_income = weighted.mean(income_weekly_all, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(SectorName_labelled, UnitOccupation_labelled) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    UnitOccupation_labelled = case_when(UnitOccupation_labelled == "NA" ~ NA,
                                    TRUE ~ UnitOccupation_labelled),
    UnitOccupation_labelled = stringr::str_to_title(UnitOccupation_labelled),
    SectorName_labelled = stringr::str_to_title(SectorName_labelled)
    
  )  %>%
  ungroup()

summary_weekly <- unit_occ_in_sect_summary_pay %>%
  group_by(SectorName_labelled,UnitOccupation_labelled) %>%
  mutate(
    min_n = min(n, na.rm=TRUE) 
  ) %>%
  filter(min_n >= 10) %>% # need to identify the unit occs that have an ok n
  ungroup() %>%
  mutate(
    pay_frame = "weekly"
  )

# write_csv(unit_occ_in_sect_summary_pay, file="../outputs/data/minor_group_occupation_in_sector_summary_pay_weekly.csv")


unit_subset <- summary_weekly


# create a df with occs where outsourced paid less so we can just list it
paid_less <- unit_subset %>%
  pivot_wider(id_cols = c(SectorName_labelled, UnitOccupation_labelled), names_from = outsourcing_status, values_from = c(wtd_avg_income, n)) %>%
  janitor::clean_names() %>%
  mutate(
    pay_penalty = wtd_avg_income_outsourced - wtd_avg_income_not_outsourced
  ) %>%
  filter(
    pay_penalty < 0
  )

# write_csv(paid_less, file="../outputs/data/minor_group_occupation_in_sector_weekly_pay_penalty.csv")


for(sector in sectors_of_interest){
    #print(sector)
    # subset to this sector and drop na occupatoins
    plot_data <- unit_subset %>%
      filter(SectorName_labelled == sector) %>%
      filter(!is.na(UnitOccupation_labelled)) %>%
      droplevels() %>%
      ungroup()
    
    # Order occs by N
    # First filter for 'outsourced' level and reorder by N
    not_outsourced_levels <- plot_data %>%
      dplyr::select(UnitOccupation_labelled, outsourcing_status, N) %>%
      distinct(UnitOccupation_labelled, N) %>%
      mutate(UnitOccupation_labelled = forcats::fct_reorder(UnitOccupation_labelled, N, .desc = FALSE))
    
    # not_outsourced_levels <- plot_data %>%
    #   filter(outsourcing_status == 'Not outsourced') %>%
    #   mutate(UnitOccupation_labelled = forcats::fct_reorder(UnitOccupation_labelled, N, .desc = FALSE))
    # Then apply the reordered levels back to the original data
    plot_data <- plot_data %>%
      mutate(
        UnitOccupation_labelled = factor(UnitOccupation_labelled, levels = levels(not_outsourced_levels$UnitOccupation_labelled)),
            )
    
    annotation_df <- plot_data %>%
      #filter(outsourcing_status == "Not outsourced") %>%
      dplyr::select(UnitOccupation_labelled, n) %>%
      group_by(UnitOccupation_labelled) %>%
      summarise(
        N = sum(n)
      ) %>%
        mutate(
        ypos = max(plot_data$wtd_avg_income, na.rm=T) * 1.2
      )
    
    
    p <- plot_data %>%
      ggplot(., aes(wtd_avg_income, UnitOccupation_labelled, size = perc, colour = outsourcing_status)) +
      geom_point(position = "dodge") + 
    geom_label_repel(inherit.aes = F, aes(wtd_avg_income, UnitOccupation_labelled, colour = outsourcing_status, label=paste0("n=",n)), size=3, 
                    #force_pull = 2
                    ) + 
      theme_minimal() +
      theme(legend.position = "bottom",
          legend.title = element_blank()) +

      #coord_flip() +
    scale_x_continuous(breaks=scales::breaks_pretty(n=5)) +
      #breaks=seq(0,max(plot_data$wtd_avg_income, na.rm=T), 200)) +
    scale_colour_manual(values=colours) +
    geom_text(inherit.aes=F,data=annotation_df, aes(x=ypos, y=UnitOccupation_labelled, label = paste0("N = ", N)), hjust=1)  +
    guides(size=FALSE) + # remove size legend as gauging size is difficult 
    xlab("Weighted average weekly income") + ylab("Unit occupation") +
    labs(caption = "Size of bubble represents the size of the respective workforce within the occupation") +
      ggtitle(sector)
    
    show(p)
    
    # ggsave(here('outputs','figures','occupation_pay_plots',paste0('unit_occupation_pay_plot_weekly_', sector, '.png')), height = 8, width = 8, dpi=800, bg="white")
}

2.4 Major subgroups across sectors

Code

major_occ_summary_pay <- data %>%
  filter(income_drop_all == 0 & !is.na(income_weekly_all)) %>%
  group_by(MajorsubgroupOccupation_labelled, outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_weekly_all, na.rm=T),
    wtd_avg_income = weighted.mean(income_weekly_all, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(MajorsubgroupOccupation_labelled) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    MajorsubgroupOccupation_labelled = case_when(MajorsubgroupOccupation_labelled == "NA" ~ NA,
                                    TRUE ~ MajorsubgroupOccupation_labelled),
    MajorsubgroupOccupation_labelled = stringr::str_to_title(MajorsubgroupOccupation_labelled)
  )  %>%
  ungroup()

# write_csv(major_occ_summary_pay, file="../outputs/data/major_subgroup_across_sectors_occupation_summary_pay_weekly.csv")

# need to identify the unit occs that have an ok n
# subste to occs with n>=10
unit_subset_weekly <- major_occ_summary_pay %>%
  group_by(MajorsubgroupOccupation_labelled) %>%
  mutate(
    min_n = min(n, na.rm=TRUE)
  ) %>%
  filter(min_n >= 10)

unit_subset <- unit_subset_weekly


# create a df with occs where outsourced paid less so we can just list it
paid_less <- unit_subset %>%
  pivot_wider(id_cols = c(MajorsubgroupOccupation_labelled), names_from = outsourcing_status, values_from = c(wtd_avg_income, n)) %>%
  janitor::clean_names() %>%
  mutate(
    pay_penalty = wtd_avg_income_outsourced - wtd_avg_income_not_outsourced
  ) %>%
  filter(
    pay_penalty < 0
  )

# write_csv(paid_less, file="../outputs/data/major_subgroup_occupation_weekly_pay_penalty_across_sectors.csv")

#print(sector)
# subset to this sector and drop na occupatoins
plot_data <- unit_subset %>%
  filter(!is.na(MajorsubgroupOccupation_labelled)) %>%
  droplevels() %>%
  ungroup()

# Order occs by N
# First filter for 'outsourced' level and reorder by N
not_outsourced_levels <- plot_data %>%
  dplyr::select(MajorsubgroupOccupation_labelled, outsourcing_status, N) %>%
  distinct(MajorsubgroupOccupation_labelled, N) %>%
  mutate(MajorsubgroupOccupation_labelled = forcats::fct_reorder(MajorsubgroupOccupation_labelled, N, .desc = FALSE))

# not_outsourced_levels <- plot_data %>%
#   filter(outsourcing_status == 'Not outsourced') %>%
#   mutate(MajorsubgroupOccupation_labelled = forcats::fct_reorder(MajorsubgroupOccupation_labelled, N, .desc = FALSE))
# Then apply the reordered levels back to the original data
plot_data <- plot_data %>%
  mutate(
    MajorsubgroupOccupation_labelled = factor(MajorsubgroupOccupation_labelled, levels = levels(not_outsourced_levels$MajorsubgroupOccupation_labelled)),
        )

annotation_df <- plot_data %>%
  #filter(outsourcing_status == "Not outsourced") %>%
  dplyr::select(MajorsubgroupOccupation_labelled, n) %>%
  group_by(MajorsubgroupOccupation_labelled) %>%
  summarise(
    N = sum(n)
  ) %>%
    mutate(
    ypos = max(plot_data$wtd_avg_income, na.rm=T) * 1.2
  )


p <- plot_data %>%
  ggplot(., aes(wtd_avg_income, MajorsubgroupOccupation_labelled, size = perc, colour = outsourcing_status)) +
  geom_point(position = "dodge") + 
geom_label_repel(inherit.aes = F, aes(wtd_avg_income, MajorsubgroupOccupation_labelled, colour = outsourcing_status, label=paste0("n=",n)), size=3, 
                #force_pull = 2
                ) + 
  theme_minimal() +
  theme(legend.position = "bottom",
      legend.title = element_blank()) +

  #coord_flip() +
scale_x_continuous(breaks=scales::breaks_pretty(n=5)) +
  #breaks=seq(0,max(plot_data$wtd_avg_income, na.rm=T), 200)) +
scale_colour_manual(values=colours) +
geom_text(inherit.aes=F,data=annotation_df, aes(x=ypos, y=MajorsubgroupOccupation_labelled, label = paste0("N = ", N)), hjust=1)  +
guides(size=FALSE) + # remove size legend as gauging size is difficult 
xlab("Weighted average weekly income") + ylab("Major sub group occupation") +
labs(caption = "Size of bubble represents the size of the respective workforce within the occupation") +
  ggtitle("All sectors")

show(p)

Code

# ggsave(here('outputs','figures','occupation_pay_plots','major_subgroup_occupation_all_sectors_pay_plot.png'), height = 8, width = 8, dpi=800, bg="white")

2.5 Major subgroups within sectors

Code

occ_in_sect_summary_pay <- data %>%
  filter(income_drop_all == 0 & !is.na(income_weekly_all)) %>%
  group_by(SectorName, SectorName_labelled, MajorsubgroupOccupation_labelled, outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    avg_income = mean(income_weekly_all, na.rm=T),
    wtd_avg_income = weighted.mean(income_weekly_all, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(SectorName_labelled, MajorsubgroupOccupation_labelled) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    MajorsubgroupOccupation_labelled = case_when(MajorsubgroupOccupation_labelled == "NA" ~ NA,
                                    TRUE ~ MajorsubgroupOccupation_labelled),
    MajorsubgroupOccupation_labelled = stringr::str_to_title(MajorsubgroupOccupation_labelled),
    SectorName_labelled = stringr::str_to_title(SectorName_labelled)
    
  ) 

summary_weekly <- occ_in_sect_summary_pay %>%
  group_by(SectorName_labelled,MajorsubgroupOccupation_labelled) %>%
  mutate(
    min_n = min(n, na.rm=TRUE) 
  ) %>%
  filter(min_n >= 10) %>% # need to identify the unit occs that have an ok n
  ungroup() %>%
  mutate(
    pay_frame = "weekly"
  )

# write_csv(occ_in_sect_summary_pay, file="../outputs/data/major_subgroup_occupation_in_sector_summary_pay_weekly.csv")


for(sector in sectors_of_interest){
    #print(sector)
    # subset to this sector and drop na occupatoins
    plot_data <- occ_in_sect_summary_pay %>%
      filter(SectorName_labelled == sector) %>%
      filter(!is.na(MajorsubgroupOccupation_labelled)) %>%
      droplevels() %>%
      ungroup()
    
    # Order occs by N
    # First filter for 'outsourced' level and reorder by N
    not_outsourced_levels <- plot_data %>%
      dplyr::select(MajorsubgroupOccupation_labelled, outsourcing_status, N) %>%
      distinct(MajorsubgroupOccupation_labelled, N) %>%
      mutate(MajorsubgroupOccupation_labelled = forcats::fct_reorder(MajorsubgroupOccupation_labelled, N, .desc = FALSE))
    
    # not_outsourced_levels <- plot_data %>%
    #   filter(outsourcing_status == 'Not outsourced') %>%
    #   mutate(MajorsubgroupOccupation_labelled = forcats::fct_reorder(MajorsubgroupOccupation_labelled, N, .desc = FALSE))
    # Then apply the reordered levels back to the original data
    plot_data <- plot_data %>%
      mutate(
        MajorsubgroupOccupation_labelled = factor(MajorsubgroupOccupation_labelled, levels = levels(not_outsourced_levels$MajorsubgroupOccupation_labelled)),
            )
    
    annotation_df <- plot_data %>%
      #filter(outsourcing_status == "Not outsourced") %>%
      dplyr::select(MajorsubgroupOccupation_labelled, n) %>%
      group_by(MajorsubgroupOccupation_labelled) %>%
      summarise(
        N = sum(n)
      ) %>%
        mutate(
        ypos = max(plot_data$wtd_avg_income, na.rm=T) * 1.2
      )
    
    p <- plot_data %>%
      ggplot(., aes(wtd_avg_income, MajorsubgroupOccupation_labelled, size = perc, colour = outsourcing_status)) +
      geom_point(position = "dodge") + 
    geom_label_repel(inherit.aes = F, aes(wtd_avg_income, MajorsubgroupOccupation_labelled, colour = outsourcing_status, label=paste0("n=",n)), size=3, 
                    #force_pull = 2
                    ) + 
      theme_minimal() +
      theme(legend.position = "bottom",
          legend.title = element_blank()) +

      #coord_flip() +
    scale_x_continuous(breaks=seq(0,max(plot_data$wtd_avg_income, na.rm=T), 200)) +
    scale_colour_manual(values=colours) +
    geom_text(inherit.aes=F,data=annotation_df, aes(x=ypos, y=MajorsubgroupOccupation_labelled, label = paste0("N = ", N)), hjust=1)  +
    guides(size=FALSE) + # remove size legend as gauging size is difficult 
    xlab("Weighted average weekly income") + ylab("Major subgroup occupation") +
    labs(caption = "Size of bubble represents the size of the respective workforce within the occupation") +
      ggtitle(sector)
    
    show(p)
    
    # ggsave(here('outputs','figures','occupation_pay_plots',paste0('major_subgroup_occupation_pay_plot_weekly_', sector, '.png')), height = 8, width = 8, dpi=800, bg="white")
}

3 This group of workers is varied, spread across sectors and regions (chapter 2)

Code

sector_summary_3 <- data %>%
  #filter(income_drop_all == 0) %>%
  group_by(SectorName, SectorName_labelled, outsourcing_status) %>%
  summarise(
    n = n(),
    Frequency = sum(NatRepemployees),
    # avg_income = mean(income_annual_all, na.rm=T),
    # wtd_avg_income = weighted.mean(income_annual_all, w = NatRepemployees, na.rm=T)
  ) %>% 
  ungroup() %>%
  group_by(SectorName) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    perc = 100 * (Frequency/Sum),
    SectorName_labelled = case_when(SectorName_labelled == "NA" ~ NA,
                                    TRUE ~ SectorName_labelled),
    SectorName_short = SectorName_labelled
  ) %>%
  # make the sector names more readable
  separate_wider_delim(SectorName_short, names = c("SectorName_short", "SectorName_short_detail"), delim=";",
                       too_few = "align_start") %>%
  mutate(
    SectorName_short = factor(stringr::str_to_sentence(SectorName_short)),
    SectorName_short_detail = factor(stringr::str_to_sentence(SectorName_short_detail)),
  )

# write_csv(sector_summary_3, file="../outputs/data/sector_summary_3.csv")

The plot below shows the proportion of outsourced and not outsourced workers within each sector. I.e. this is showing what sectors have higher and lower proportions of outsourced workers.

Code

plot_data <- sector_summary_3 %>%
  drop_na(SectorName_short) %>%
    droplevels() %>%
  ungroup()

# Filter for 'outsourced' level and reorder SectorName_short
not_outsourced_levels <- plot_data %>%
  filter(outsourcing_status == 'Not outsourced') %>%
  mutate(SectorName_short = forcats::fct_reorder(SectorName_short, perc, .desc = TRUE))

outsourced <- plot_data %>%
  filter(outsourcing_status == 'Outsourced') %>%
  mutate(
    rank = rank(desc(perc))
  )

# Apply the reordered levels back to the original data
plot_data <- plot_data %>%
  mutate(
    SectorName_short = factor(SectorName_short, levels = levels(not_outsourced_levels$SectorName_short)),
         )

# annotation_df <- plot_data %>%
#   dplyr::select(SectorName_short, outsourcing_status, perc, n
# mutate(
  
annotation_df <- plot_data %>%
  filter(outsourcing_status == "Not outsourced") %>%
  dplyr::select(SectorName_short, N) %>%
  mutate(
    ypos = 80
  )

ggplot(plot_data, aes(SectorName_short, perc, fill = outsourcing_status)) +
  geom_col() +
  geom_text(inherit.aes=F,data=annotation_df, aes(x=SectorName_short, y=ypos, label = paste0("N = ", N)), hjust=1, nudge_y = 15) +
  coord_flip() +
  scale_fill_manual(values=many_colours) +
  scale_y_continuous(breaks=seq(0,100,10))

Code

# sector_key <- data.frame("number" = seq(1,length(unique(plot_data$SectorName_labelled)),1),
#                           "Sector" = levels(plot_data$SectorName_labelled))
# 
# sector_key %>%
#   kable() %>%
#   kable_styling(full_width = F)

The top three Sectors with the highest proportion of outsourced workers are:

ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS-AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN US (note that N = 31)
ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES
WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES

Note that for an undefined sector (‘Not found’) contained one of the largest proportions of outsourced workers (31% of workers in the ‘Not found’ category were outsourced).

A key takeaway here is that whereas the total outsourced population is 17%, this figure varies by sector, from 0% for Mining… and Extraterritoral organisations… all the way to 36% for Activities of households as employers, with 5 out 20 sectors having at least 20% of their workforce outsourced.

The plot below shows the proportion of workers within each region who are outsourced.

Code

region_statistics_2 <- data %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(Region, outsourcing_status) %>%
  summarise(
    Frequency = sum(NatRepemployees),
    n = n(),
  ) %>%
  mutate(
    N = sum(n),
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  ) %>%
  rename(
    `Outsourcing status` = outsourcing_status
  ) %>%
  ungroup()

reg_levels <- region_statistics_2 %>%
  filter(`Outsourcing status` == "Outsourced") %>%
  mutate(
    Region = forcats::fct_reorder(Region, Percentage, .desc=FALSE)
  )

annotation_df <- region_statistics_2 %>%
  filter(`Outsourcing status` == "Not outsourced") %>%
  dplyr::select(Region, N) %>%
  mutate(
    ypos = 100
  )

region_statistics_2 %>%
  mutate(
    Region = factor(Region, levels = levels(reg_levels$Region))
  ) %>%
  ggplot(., aes(Region, Percentage, fill = `Outsourcing status`)) +
  geom_col(colour="black") +
  geom_text(inherit.aes=F, data = annotation_df, aes(Region, ypos, label = paste0("N=",N)), hjust=1, nudge_y = -2) +
  coord_flip() +
  scale_fill_manual(values=many_colours) +
  theme_minimal()

Code

# readr::write_csv(region_statistics_2, file = "../outputs/data/region_stats_2.csv")

region_statistics_2_1 <- region_statistics_2 %>% 
  filter(`Outsourcing status` == "Outsourced" & Region != "London")

london_perc <- region_statistics_2[which(region_statistics_2$Region == "London" & region_statistics_2["Outsourcing status"] == "Outsourced"), "Percentage"]

Code

region_statistics_3 <- data %>%
  filter(outsourcing_status == "Outsourced") %>%
  # get values of labels
  # mutate_all(haven::as_factor) %>%
  group_by(Region) %>%
  summarise(
    Frequency = sum(NatRepemployees)
  ) %>%
  mutate(
    Sum = sum(Frequency),
    Percentage = 100 * (Frequency / Sum)
  )

# readr::write_csv(region_statistics_3, file = "../outputs/data/region_stats_3.csv")

We can also explore how the the entire outsourced workforce in the UK is distributed across the country. The table below shows the percentage of outsourced workers in each region as a proportion of the total UK workforce. They show where the UK’s outsourced workforce is concentrated.

Code

region_statistics_3 %>%
  mutate(
    Region = haven::as_factor(Region)
    ) %>%
  arrange(desc(Percentage)) %>%
  knitr::kable(.,digits = 2) %>%
  kable_styling(full_width = F)

Table 3: Distribution of outsourced workers across regions

Region	Frequency	Sum	Percentage
London	357.35	1708.36	20.92
North West	189.39	1708.36	11.09
South East	188.47	1708.36	11.03
West Midlands	161.49	1708.36	9.45
East Midlands	140.50	1708.36	8.22
Scotland	125.82	1708.36	7.37
East of England	125.49	1708.36	7.35
South West	120.50	1708.36	7.05
Yorkshire and the Humber	119.46	1708.36	6.99
Wales	83.25	1708.36	4.87
North East	53.06	1708.36	3.11
Northern Ireland	43.56	1708.36	2.55

4 Our data indicates a racialised picture of outsourcing in the UK

All models were run with a quasibinomial distribution to account for the survey weights. Because quasibinomial models don’t involve likelihoods, model fit could not be determined using information criteria. Instead, we use Tjur’s pseudo R² to appraise model fit. This approach uses predicted probabilities from the model to calculate the average predicted probability for each level of the outcome variable, then takes the difference in this average between the two levels. The result is a figure between 0 and 1 that estimates how different the average predicted probability in the outsourced group is from the average predicted probability in the not outsourced group. Values closer to 1 indicate the averages are more different and therefore that the model is more accurate in discriminating between the two groups.

Code

mod2 <- glm(outsourcing_status ~ Ethnicity_binary + Age + Gender + Has_Degree +  Region + BORNUK_labelled, data, weights = NatRepemployees, family="quasibinomial")

tjur2 <- data %>%
  mutate(pred = predict(mod2, type = "response")) %>%
  group_by(outsourcing_status) %>%
  summarise(weighted_mean = weighted.mean(pred, NatRepemployees)) %>%
  summarise(tjur_r2 = diff(weighted_mean)) %>%
  pull()

coefs <- extract_glm_coefs(mod2, only_sig=T)
ethn_coef <- coefs %>% filter(stringr::str_detect(variable, "Ethnicity"))

For model 1, the value of Tjur’s pseudo R² is 0.036. The table below shows the model coefficients.

Code

labels <- c(
  'Intercept',
  'Ethnicity: Not White',
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber',
  'Migration: Arrived within the last year',
  'Migration: Arrived within the last 3 years',
  'Migration: Arrived within the last 5 years',
  'Migration: Arrived within the last 10 years',
  'Migration: Arrived within the last 15 years',
  'Migration: Arrived within the last 20 years',
  'Migration: Arrived within the last 30 years',
  'Migration: Arrived more than 30 years ago',
  'Migration: Prefer not to say'
)


tab_model(mod2, 
          pred.labels = labels, 
          dv.labels = "Outsourcing",
          show.r2 = FALSE)

	Outsourcing
Predictors	Odds Ratios	CI	p
Intercept	0.51	0.40 – 0.64	<0.001
Ethnicity: Not White	1.18	1.01 – 1.37	0.031
Age	0.98	0.97 – 0.98	<0.001
Gender: Male	1.42	1.27 – 1.58	<0.001
Gender: Other	1.11	0.22 – 3.70	0.876
Gender: Prefer not to say	0.80	0.34 – 1.64	0.575
Education: Has degree	0.93	0.83 – 1.04	0.187
Education: Don't know	0.94	0.62 – 1.38	0.743
Region: East Midlands	0.89	0.71 – 1.12	0.314
Region: East of England	0.58	0.46 – 0.73	<0.001
Region: North East	0.62	0.44 – 0.84	0.003
Region: North West	0.83	0.67 – 1.02	0.081
Region: Northern Ireland	0.74	0.51 – 1.04	0.091
Region: Scotland	0.68	0.54 – 0.86	0.001
Region: South East	0.60	0.49 – 0.73	<0.001
Region: South West	0.65	0.51 – 0.82	<0.001
Region: Wales	0.86	0.65 – 1.12	0.269
Region: West Midlands	0.83	0.67 – 1.03	0.087
Region: Yorkshire and the Humber	0.66	0.52 – 0.83	<0.001
Migration: Arrived within the last year	1.97	1.36 – 2.83	<0.001
Migration: Arrived within the last 3 years	1.12	0.75 – 1.63	0.565
Migration: Arrived within the last 5 years	1.16	0.77 – 1.70	0.462
Migration: Arrived within the last 10 years	1.69	1.26 – 2.24	<0.001
Migration: Arrived within the last 15 years	1.58	1.13 – 2.17	0.006
Migration: Arrived within the last 20 years	1.69	1.18 – 2.38	0.004
Migration: Arrived within the last 30 years	0.50	0.26 – 0.87	0.023
Migration: Arrived more than 30 years ago	2.05	1.45 – 2.87	<0.001
Migration: Prefer not to say	1.18	0.74 – 1.83	0.471
Observations	10155

Code

mod <- glm(outsourcing_status ~ Ethnicity_collapsed + Age + Gender + Has_Degree + Region + BORNUK_labelled, data, weights = NatRepemployees, family="quasibinomial")

tjur <- data %>%
  mutate(pred = predict(mod, type = "response")) %>%
  group_by(outsourcing_status) %>%
  summarise(weighted_mean = weighted.mean(pred, NatRepemployees)) %>%
  summarise(tjur_r2 = diff(weighted_mean)) %>%
  pull()

coefs <- extract_glm_coefs(mod, only_sig=T)

ethn_coef <- coefs %>% filter(stringr::str_detect(variable, "Ethnicity"))

For model 2, the value of Tjur’s pseudo R² for this model is 0.039. The table below shows the model coefficients.

Code

labels <- c(
  'Intercept',
  'Ethnicity: Arab/British Arab',
  'Ethnicity: Asian/Asian British',
  'Ethnicity: Black/African/Caribbean/Black British',
  'Ethnicity: Mixed/Multiple ethnic group',
  'Ethnicity: Other ethnic group',
  'Ethnicity: Prefer not to say',
  'Ethnicity: White other',
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber',
  'Migration: Arrived within the last year',
  'Migration: Arrived within the last 3 years',
  'Migration: Arrived within the last 5 years',
  'Migration: Arrived within the last 10 years',
  'Migration: Arrived within the last 15 years',
  'Migration: Arrived within the last 20 years',
  'Migration: Arrived within the last 30 years',
  'Migration: Arrived more than 30 years ago',
  'Migration: Prefer not to say'
)

tab_model(mod, 
          pred.labels = labels, 
          dv.labels = "Outsourcing",
          show.r2 = FALSE)

	Outsourcing
Predictors	Odds Ratios	CI	p
Intercept	0.50	0.39 – 0.63	<0.001
Ethnicity: Arab/British Arab	1.96	0.91 – 4.07	0.075
Ethnicity: Asian/Asian British	1.22	1.00 – 1.48	0.053
Ethnicity: Black/African/Caribbean/Black British	1.43	1.08 – 1.89	0.012
Ethnicity: Mixed/Multiple ethnic group	1.37	0.95 – 1.93	0.082
Ethnicity: Other ethnic group	1.01	0.40 – 2.28	0.981
Ethnicity: Prefer not to say	1.26	0.95 – 1.66	0.101
Ethnicity: White other	0.76	0.58 – 0.99	0.041
Age	0.98	0.97 – 0.98	<0.001
Gender: Male	1.41	1.27 – 1.57	<0.001
Gender: Other	1.18	0.24 – 3.91	0.808
Gender: Prefer not to say	0.76	0.32 – 1.59	0.502
Education: Has degree	0.91	0.82 – 1.02	0.111
Education: Don't know	0.95	0.63 – 1.40	0.793
Region: East Midlands	0.90	0.71 – 1.13	0.368
Region: East of England	0.58	0.46 – 0.73	<0.001
Region: North East	0.61	0.44 – 0.84	0.003
Region: North West	0.83	0.67 – 1.02	0.081
Region: Northern Ireland	0.82	0.57 – 1.16	0.267
Region: Scotland	0.69	0.54 – 0.87	0.002
Region: South East	0.60	0.49 – 0.74	<0.001
Region: South West	0.66	0.52 – 0.84	0.001
Region: Wales	0.86	0.65 – 1.13	0.297
Region: West Midlands	0.83	0.66 – 1.03	0.087
Region: Yorkshire and the Humber	0.65	0.51 – 0.83	<0.001
Migration: Arrived within the last year	1.85	1.26 – 2.68	0.001
Migration: Arrived within the last 3 years	1.11	0.75 – 1.63	0.583
Migration: Arrived within the last 5 years	1.28	0.85 – 1.88	0.227
Migration: Arrived within the last 10 years	2.05	1.50 – 2.76	<0.001
Migration: Arrived within the last 15 years	1.82	1.30 – 2.54	<0.001
Migration: Arrived within the last 20 years	1.89	1.31 – 2.70	<0.001
Migration: Arrived within the last 30 years	0.54	0.28 – 0.95	0.045
Migration: Arrived more than 30 years ago	2.10	1.48 – 2.94	<0.001
Migration: Prefer not to say	1.17	0.73 – 1.82	0.499
Observations	10155

Code

mod <- glm(outsourcing_status ~ Ethnicity_collapsed_disaggregated + Age + Gender + Has_Degree + Region + BORNUK_labelled, data, weights = NatRepemployees, family="quasibinomial")
summary(mod)

# Test waht amount of variacne the unweighted would explain - similar
# test <- glm(outsourcing_status ~ Ethnicity_collapsed_disaggregated + Age + Gender + Has_Degree + Region + BORNUK_labelled, data, family="binomial")
# with(summary(test), 1 - deviance/null.deviance)


tjur <- data %>%
  filter(!is.na(Ethnicity_collapsed_disaggregated)) %>%
  mutate(pred = predict(mod, type = "response")) %>%
  group_by(outsourcing_status) %>%
  summarise(weighted_mean = weighted.mean(pred, NatRepemployees)) %>%
  summarise(tjur_r2 = diff(weighted_mean)) %>%
  pull()

coefs <- extract_glm_coefs(mod, only_sig=T)

ethn_coef <- coefs %>% filter(stringr::str_detect(variable, "Ethnicity"))

For model 3, the value of Tjur’s pseudo R² for this model is 0.043. The table below shows the model coefficients.

Code

labels <- c(
  'Intercept',
  "Ethnicity: Irish",
  "Ethnicity: Gypsy or Irish Traveller",
  "Ethnicity: Roma",
  "Ethnicity: Any other White background",
  "Ethnicity: White and Black Caribbean",
  "Ethnicity: White and Black African",
  "Ethnicity: White and Asian",
  "Ethnicity: Any other Mixed/Multiple ethnic background",
  "Ethnicity: Indian",
  "Ethnicity: Pakistani",
  "Ethnicity: Bangladeshi",
  "Ethnicity: Chinese",
  "Ethnicity: Any other Asian background",
  "Ethnicity: African",
  "Ethnicity: Caribbean",
  "Ethnicity: Any other Black, Black British, or Caribbean background",
  "Ethnicity: Arab",
  "Ethnicity: Any other ethnic group",
  "Ethnicity: Don't think of myself as any of these",
  "Ethnicity: Prefer not to say",
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber',
  'Migration: Arrived within the last year',
  'Migration: Arrived within the last 3 years',
  'Migration: Arrived within the last 5 years',
  'Migration: Arrived within the last 10 years',
  'Migration: Arrived within the last 15 years',
  'Migration: Arrived within the last 20 years',
  'Migration: Arrived within the last 30 years',
  'Migration: Arrived more than 30 years ago',
  'Migration: Prefer not to say'
)

tab_model(mod, 
          pred.labels = labels, 
          dv.labels = "Outsourcing",
          show.r2 = FALSE)

	Outsourcing
Predictors	Odds Ratios	CI	p
Intercept	0.49	0.38 – 0.62	<0.001
Ethnicity: Irish	0.76	0.41 – 1.33	0.362
Ethnicity: Gypsy or Irish Traveller	1.00	0.18 – 3.75	0.998
Ethnicity: Roma	1.25	0.20 – 5.71	0.788
Ethnicity: Any other White background	0.80	0.59 – 1.08	0.145
Ethnicity: White and Black Caribbean	0.49	0.18 – 1.09	0.112
Ethnicity: White and Black African	2.66	1.30 – 5.30	0.006
Ethnicity: White and Asian	1.19	0.53 – 2.43	0.647
Ethnicity: Any other Mixed/Multiple ethnic background	2.00	1.04 – 3.68	0.031
Ethnicity: Indian	1.18	0.87 – 1.57	0.277
Ethnicity: Pakistani	2.17	1.51 – 3.08	<0.001
Ethnicity: Bangladeshi	1.32	0.78 – 2.19	0.289
Ethnicity: Chinese	0.62	0.37 – 1.00	0.059
Ethnicity: Any other Asian background	1.19	0.80 – 1.75	0.381
Ethnicity: African	1.44	1.02 – 2.03	0.037
Ethnicity: Caribbean	1.27	0.68 – 2.22	0.423
Ethnicity: Any other Black, Black British, or Caribbean background	1.84	0.87 – 3.69	0.095
Ethnicity: Arab	1.97	0.91 – 4.08	0.075
Ethnicity: Any other ethnic group	1.03	0.40 – 2.33	0.942
Ethnicity: Don't think of myself as any of these	2.31	0.45 – 9.63	0.266
Ethnicity: Prefer not to say	1.37	0.53 – 3.17	0.482
Age	0.98	0.97 – 0.98	<0.001
Gender: Male	1.44	1.29 – 1.61	<0.001
Gender: Other	1.27	0.26 – 4.28	0.726
Gender: Prefer not to say	1.20	0.36 – 3.10	0.739
Education: Has degree	0.95	0.84 – 1.06	0.348
Education: Don't know	1.09	0.70 – 1.66	0.679
Region: East Midlands	0.92	0.73 – 1.17	0.510
Region: East of England	0.57	0.45 – 0.73	<0.001
Region: North East	0.61	0.44 – 0.85	0.004
Region: North West	0.81	0.65 – 1.01	0.061
Region: Northern Ireland	0.73	0.48 – 1.06	0.107
Region: Scotland	0.67	0.52 – 0.85	0.001
Region: South East	0.60	0.49 – 0.74	<0.001
Region: South West	0.66	0.51 – 0.84	0.001
Region: Wales	0.88	0.66 – 1.16	0.368
Region: West Midlands	0.81	0.64 – 1.01	0.059
Region: Yorkshire and the Humber	0.65	0.51 – 0.83	0.001
Migration: Arrived within the last year	1.76	1.16 – 2.64	0.007
Migration: Arrived within the last 3 years	1.10	0.73 – 1.63	0.628
Migration: Arrived within the last 5 years	1.23	0.80 – 1.85	0.328
Migration: Arrived within the last 10 years	1.76	1.26 – 2.43	0.001
Migration: Arrived within the last 15 years	1.84	1.27 – 2.64	0.001
Migration: Arrived within the last 20 years	1.69	1.14 – 2.46	0.008
Migration: Arrived within the last 30 years	0.49	0.24 – 0.90	0.033
Migration: Arrived more than 30 years ago	2.11	1.48 – 2.98	<0.001
Migration: Prefer not to say	1.22	0.73 – 1.98	0.433
Observations	9812

We next focus on predicting whether a person was outsourced based on wehther the person was born in the UK. This binary variable was constructed by collapsing the 10-level migration variable down into two levels, so that “I was born in the UK” becomes “Born in UK”, and all levels apart from “I was born in the UK” become “Not born in UK”.

Code

mod <- glm(outsourcing_status ~ BORNUK_binary + Ethnicity_collapsed_disaggregated + Age + Gender + Has_Degree + Region, data, weights = NatRepemployees, family="quasibinomial")
summary(mod)

tjur <- data %>%
  filter(!is.na(Ethnicity_collapsed_disaggregated)) %>%
  mutate(pred = predict(mod, type = "response")) %>%
  group_by(outsourcing_status) %>%
  summarise(weighted_mean = weighted.mean(pred, NatRepemployees)) %>%
  summarise(tjur_r2 = diff(weighted_mean)) %>%
  pull()

coefs <- extract_glm_coefs(mod, only_sig=T)

The value of Tjur’s pseudo R² for this model is 0.04. The table below shows the model coefficients.

Code

labels <- c(
  'Intercept',
  'Migration: Not born in the UK',
  "Ethnicity: Irish",
  "Ethnicity: Gypsy or Irish Traveller",
  "Ethnicity: Roma",
  "Ethnicity: Any other White background",
  "Ethnicity: White and Black Caribbean",
  "Ethnicity: White and Black African",
  "Ethnicity: White and Asian",
  "Ethnicity: Any other Mixed/Multiple ethnic background",
  "Ethnicity: Indian",
  "Ethnicity: Pakistani",
  "Ethnicity: Bangladeshi",
  "Ethnicity: Chinese",
  "Ethnicity: Any other Asian background",
  "Ethnicity: African",
  "Ethnicity: Caribbean",
  "Ethnicity: Any other Black, Black British, or Caribbean background",
  "Ethnicity: Arab",
  "Ethnicity: Any other ethnic group",
  "Ethnicity: Don't think of myself as any of these",
  "Ethnicity: Prefer not to say",
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber'
)

tab_model(mod, 
          pred.labels = labels, 
          dv.labels = "Outsourcing",
          show.r2 = FALSE)

	Outsourcing
Predictors	Odds Ratios	CI	p
Intercept	0.48	0.38 – 0.62	<0.001
Migration: Not born in the UK	1.49	1.24 – 1.79	<0.001
Ethnicity: Irish	0.76	0.41 – 1.32	0.349
Ethnicity: Gypsy or Irish Traveller	1.05	0.19 – 3.91	0.949
Ethnicity: Roma	1.14	0.18 – 5.08	0.872
Ethnicity: Any other White background	0.83	0.62 – 1.10	0.201
Ethnicity: White and Black Caribbean	0.48	0.18 – 1.08	0.108
Ethnicity: White and Black African	2.58	1.27 – 5.13	0.007
Ethnicity: White and Asian	1.23	0.55 – 2.50	0.590
Ethnicity: Any other Mixed/Multiple ethnic background	1.82	0.95 – 3.33	0.059
Ethnicity: Indian	1.14	0.85 – 1.51	0.389
Ethnicity: Pakistani	2.14	1.49 – 3.04	<0.001
Ethnicity: Bangladeshi	1.25	0.74 – 2.06	0.393
Ethnicity: Chinese	0.61	0.36 – 0.97	0.046
Ethnicity: Any other Asian background	1.22	0.83 – 1.78	0.303
Ethnicity: African	1.47	1.06 – 2.04	0.020
Ethnicity: Caribbean	1.25	0.67 – 2.18	0.460
Ethnicity: Any other Black, Black British, or Caribbean background	1.73	0.82 – 3.45	0.132
Ethnicity: Arab	2.07	0.96 – 4.26	0.053
Ethnicity: Any other ethnic group	1.06	0.41 – 2.38	0.902
Ethnicity: Don't think of myself as any of these	2.31	0.46 – 9.27	0.257
Ethnicity: Prefer not to say	1.25	0.48 – 2.86	0.622
Age	0.98	0.97 – 0.98	<0.001
Gender: Male	1.44	1.29 – 1.61	<0.001
Gender: Other	1.27	0.26 – 4.27	0.728
Gender: Prefer not to say	1.14	0.34 – 2.94	0.812
Education: Has degree	0.95	0.85 – 1.06	0.375
Education: Don't know	1.11	0.71 – 1.68	0.632
Region: East Midlands	0.93	0.74 – 1.17	0.534
Region: East of England	0.57	0.44 – 0.72	<0.001
Region: North East	0.61	0.44 – 0.84	0.003
Region: North West	0.81	0.65 – 1.00	0.053
Region: Northern Ireland	0.72	0.48 – 1.06	0.102
Region: Scotland	0.66	0.52 – 0.84	0.001
Region: South East	0.60	0.49 – 0.74	<0.001
Region: South West	0.66	0.52 – 0.84	0.001
Region: Wales	0.88	0.66 – 1.16	0.357
Region: West Midlands	0.80	0.64 – 1.00	0.055
Region: Yorkshire and the Humber	0.65	0.51 – 0.82	<0.001
Observations	9812

Code

mod <- glm(outsourcing_status ~ BORNUK_binary * Ethnicity_collapsed + Age + Gender + Has_Degree + Region, data, weights = NatRepemployees, family="quasibinomial")
summary(mod)

tjur <- data %>%
  #filter(!is.na(Ethnicity_collapsed_disaggregated)) %>%
  mutate(pred = predict(mod, type = "response")) %>%
  group_by(outsourcing_status) %>%
  summarise(weighted_mean = weighted.mean(pred, NatRepemployees)) %>%
  summarise(tjur_r2 = diff(weighted_mean)) %>%
  pull()

coefs <- extract_glm_coefs(mod, only_sig=T)

For model 1, the value of Tjur’s pseudo R² is 0.038. The table below shows the model coefficients.

Code

labels <- c(
  'Intercept',
  'Migration: Not born in the UK',
  'Ethnicity: Arab/British Arab',
  'Ethnicity: Asian/Asian British',
  'Ethnicity: Black/African/Caribbean/Black British',
  'Ethnicity: Mixed/Multiple ethnic group',
  'Ethnicity: Other ethnic group',
  'Ethnicity: Prefer not to say',
  'Ethnicity: White other',
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber',
  'Interaction: Not born in UK x Arab/Arab British',
  'Interaction: Not born in UK x Asian/Asian British',
  'Interaction: Not born in UK x Black/African/Caribbean/Black British',
  'Interaction: Not born in UK x Mixed/Multiple ethnic group',
  'Interaction: Not born in UK x Other ethnic group',
  'Interaction: Not born in UK x Prefer not to say',
  'Interaction: Not born in UK x White other'
  
)

tab_model(mod, 
          pred.labels = labels, 
          dv.labels = "Outsourcing",
          show.r2 = FALSE)

	Outsourcing
Predictors	Odds Ratios	CI	p
Intercept	0.46	0.36 – 0.59	<0.001
Migration: Not born in the UK	2.16	1.65 – 2.80	<0.001
Ethnicity: Arab/British Arab	1.89	0.49 – 6.14	0.310
Ethnicity: Asian/Asian British	1.50	1.17 – 1.90	0.001
Ethnicity: Black/African/Caribbean/Black British	1.55	1.01 – 2.32	0.038
Ethnicity: Mixed/Multiple ethnic group	1.17	0.75 – 1.78	0.470
Ethnicity: Other ethnic group	3.36	1.15 – 9.70	0.023
Ethnicity: Prefer not to say	1.21	0.85 – 1.68	0.268
Ethnicity: White other	1.02	0.63 – 1.58	0.938
Age	0.98	0.98 – 0.98	<0.001
Gender: Male	1.41	1.26 – 1.57	<0.001
Gender: Other	1.15	0.23 – 3.80	0.842
Gender: Prefer not to say	0.69	0.29 – 1.43	0.354
Education: Has degree	0.91	0.82 – 1.02	0.118
Education: Don't know	0.94	0.63 – 1.39	0.777
Region: East Midlands	0.93	0.74 – 1.16	0.510
Region: East of England	0.60	0.47 – 0.76	<0.001
Region: North East	0.64	0.46 – 0.87	0.006
Region: North West	0.86	0.70 – 1.06	0.154
Region: Northern Ireland	0.80	0.55 – 1.14	0.230
Region: Scotland	0.71	0.56 – 0.89	0.004
Region: South East	0.62	0.50 – 0.76	<0.001
Region: South West	0.69	0.54 – 0.87	0.002
Region: Wales	0.90	0.68 – 1.18	0.435
Region: West Midlands	0.84	0.67 – 1.04	0.116
Region: Yorkshire and the Humber	0.67	0.53 – 0.85	0.001
Interaction: Not born in UK x Arab/Arab British	0.86	0.19 – 4.34	0.846
Interaction: Not born in UK x Asian/Asian British	0.51	0.34 – 0.77	0.001
Interaction: Not born in UK x Black/African/Caribbean/Black British	0.67	0.38 – 1.20	0.176
Interaction: Not born in UK x Mixed/Multiple ethnic group	1.19	0.55 – 2.60	0.655
Interaction: Not born in UK x Other ethnic group	0.00	NA – 0.43	0.924
Interaction: Not born in UK x Prefer not to say	0.82	0.45 – 1.49	0.516
Interaction: Not born in UK x White other	0.56	0.32 – 1.00	0.044
Observations	10155

Code

ems <- emmeans(mod, specs = "Ethnicity_collapsed", by = "BORNUK_binary")
cons <- summary(contrast(ems, "pairwise",adjust="tukey"))
sig_cons <- cons %>% filter(p.value < .05) %>%
  mutate(
    or = 1 / exp(estimate), .after=estimate # 1 / or because we want to express comparison - white(ref) (contrast expresses white(ref) - comparison)
  )

To explore the interaction we calculated estimated marginal means of the ethnicity x migration effect. Examining the effect of ethnicity within each level of migration, we find that:

Among people born in the UK, Asian/Asian British people are 1.5 times more likely to be outsourced than White British people.
Among people not born in the UK, White other workers are 0.57 as likely to be outsourced than White British people.
Among people not born in the UK, White other workers are 0.54 as likely to be outsourced than Black/African/Caribbean/Black British people.

The plot below visualises these effects.

Code

sjPlot::plot_model(mod, type = "pred", legend.title="", terms = c("BORNUK_binary","Ethnicity_collapsed"), dodge=0.5) +#
  
  coord_flip() +
  xlab("") + ylab("Likelihood of being outsourced") +
  theme_minimal()

Code

ems <- emmeans(mod, specs = "BORNUK_binary", by = "Ethnicity_collapsed")
cons <- summary(contrast(ems, "pairwise",adjust="tukey"))
sig_cons <- cons %>% filter(p.value < .05) %>%
  mutate(
    or = 1 / exp(estimate), .after=estimate # 1 / or because we want to express comparison - white(ref) (contrast expresses white(ref) - comparison)
  )

Examining the effect of migration for particular ethnicities, we find that:

Among White British people, people not born in the UK are 2.16 more likely to be outsourced than people born in the UK
Among people of Mixed/Multiple ethnic group, people not born in the UK are 2.57 more likely to be outsourced than people born in the UK
Among people who preferred not to say their ethnicity, people not born in the UK are 1.77 more likely to be outsourced than people born in the UK

The plot below visualises these effects.

Code

sjPlot::plot_model(mod, type = "pred", legend.title="", terms = c("Ethnicity_collapsed","BORNUK_binary"), dodge=0.5) +#
  
  coord_flip() +
  xlab("") + ylab("Likelihood of being outsourced") +
  theme_minimal()

Code

mod <- glm(outsourcing_status ~ BORNUK_binary * Ethnicity_collapsed_disaggregated + Age + Gender + Has_Degree + Region, data, weights = NatRepemployees, family="quasibinomial")
summary(mod)

tjur <- data %>%
  filter(!is.na(Ethnicity_collapsed_disaggregated)) %>%
  mutate(pred = predict(mod, type = "response")) %>%
  group_by(outsourcing_status) %>%
  summarise(weighted_mean = weighted.mean(pred, NatRepemployees)) %>%
  summarise(tjur_r2 = diff(weighted_mean)) %>%
  pull()

coefs <- extract_glm_coefs(mod, only_sig=T)
coefs_all <- extract_glm_coefs(mod)

For model 2, the value of Tjur’s pseudo R² is 0.045. The table below shows the model coefficients.

Code

labels <- c(
  'Intercept',
  'Migration: Not born in the UK',
  "Ethnicity: Irish",
  "Ethnicity: Gypsy or Irish Traveller",
  "Ethnicity: Roma",
  "Ethnicity: Any other White background",
  "Ethnicity: White and Black Caribbean",
  "Ethnicity: White and Black African",
  "Ethnicity: White and Asian",
  "Ethnicity: Any other Mixed/Multiple ethnic background",
  "Ethnicity: Indian",
  "Ethnicity: Pakistani",
  "Ethnicity: Bangladeshi",
  "Ethnicity: Chinese",
  "Ethnicity: Any other Asian background",
  "Ethnicity: African",
  "Ethnicity: Caribbean",
  "Ethnicity: Any other Black, Black British, or Caribbean background",
  "Ethnicity: Arab",
  "Ethnicity: Any other ethnic group",
  "Ethnicity: Don't think of myself as any of these",
  "Ethnicity: Prefer not to say",
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber',
  "Interaction: Not born in UK x Irish",
  "Interaction: Not born in UK x Gypsy or Irish Traveller",
  # "Interaction: Not born in UK x Roma",
  "Interaction: Not born in UK x Any other White background",
  "Interaction: Not born in UK x White and Black Caribbean",
  "Interaction: Not born in UK x White and Black African",
  "Interaction: Not born in UK x White and Asian",
  "Interaction: Not born in UK x Any other Mixed/Multiple ethnic background",
  "Interaction: Not born in UK x Indian",
  "Interaction: Not born in UK x Pakistani",
  "Interaction: Not born in UK x Bangladeshi",
  "Interaction: Not born in UK x Chinese",
  "Interaction: Not born in UK x Any other Asian background",
  "Interaction: Not born in UK x African",
  "Interaction: Not born in UK x Caribbean",
  "Interaction: Not born in UK x Any other Black, Black British, or Caribbean background",
  "Interaction: Not born in UK x Arab",
  "Interaction: Not born in UK x Any other ethnic group",
  "Interaction: Not born in UK x Don't think of myself as any of these",
  "Interaction: Not born in UK x Prefer not to say"

  
)

tab_model(mod, 
          pred.labels = labels,
          dv.labels = "Outsourcing",
          show.r2 = FALSE)

	Outsourcing
Predictors	Odds Ratios	CI	p
Intercept	0.45	0.35 – 0.58	<0.001
Migration: Not born in the UK	2.12	1.61 – 2.75	<0.001
Ethnicity: Irish	0.92	0.46 – 1.72	0.810
Ethnicity: Gypsy or Irish Traveller	1.67	0.30 – 6.98	0.505
Ethnicity: Roma	0.82	0.13 – 3.71	0.811
Ethnicity: Any other White background	1.11	0.53 – 2.14	0.757
Ethnicity: White and Black Caribbean	0.53	0.19 – 1.19	0.166
Ethnicity: White and Black African	3.39	1.26 – 8.76	0.012
Ethnicity: White and Asian	0.90	0.32 – 2.12	0.830
Ethnicity: Any other Mixed/Multiple ethnic background	1.88	0.78 – 4.13	0.135
Ethnicity: Indian	1.32	0.90 – 1.91	0.143
Ethnicity: Pakistani	2.68	1.73 – 4.08	<0.001
Ethnicity: Bangladeshi	1.81	0.90 – 3.49	0.083
Ethnicity: Chinese	0.54	0.20 – 1.16	0.152
Ethnicity: Any other Asian background	1.06	0.44 – 2.28	0.882
Ethnicity: African	1.53	0.77 – 2.84	0.200
Ethnicity: Caribbean	1.12	0.53 – 2.16	0.747
Ethnicity: Any other Black, Black British, or Caribbean background	2.62	1.10 – 5.90	0.023
Ethnicity: Arab	1.86	0.48 – 6.05	0.324
Ethnicity: Any other ethnic group	3.36	1.15 – 9.73	0.023
Ethnicity: Don't think of myself as any of these	5.42	0.63 – 41.39	0.090
Ethnicity: Prefer not to say	1.99	0.65 – 5.17	0.183
Age	0.98	0.98 – 0.98	<0.001
Gender: Male	1.44	1.29 – 1.61	<0.001
Gender: Other	1.26	0.25 – 4.26	0.736
Gender: Prefer not to say	1.08	0.32 – 2.82	0.888
Education: Has degree	0.95	0.84 – 1.06	0.339
Education: Don't know	1.11	0.72 – 1.69	0.620
Region: East Midlands	0.95	0.75 – 1.20	0.690
Region: East of England	0.59	0.47 – 0.75	<0.001
Region: North East	0.63	0.45 – 0.87	0.007
Region: North West	0.84	0.68 – 1.05	0.122
Region: Northern Ireland	0.72	0.48 – 1.06	0.109
Region: Scotland	0.69	0.54 – 0.88	0.003
Region: South East	0.62	0.51 – 0.77	<0.001
Region: South West	0.69	0.54 – 0.87	0.002
Region: Wales	0.91	0.68 – 1.21	0.521
Region: West Midlands	0.81	0.65 – 1.01	0.068
Region: Yorkshire and the Humber	0.67	0.52 – 0.86	0.001
Interaction: Not born in UK x Irish	0.37	0.08 – 1.36	0.163
Interaction: Not born in UK x Gypsy or Irish Traveller	0.00	NA – 96204611106190144.00	0.970
Interaction: Not born in UK x Any other White background	0.52	0.25 – 1.19	0.102
Interaction: Not born in UK x White and Black Caribbean	0.00	NA – 490664849159.12	0.968
Interaction: Not born in UK x White and Black African	0.43	0.10 – 1.73	0.231
Interaction: Not born in UK x White and Asian	2.80	0.47 – 18.40	0.259
Interaction: Not born in UK x Any other Mixed/Multiple ethnic background	0.72	0.20 – 2.55	0.602
Interaction: Not born in UK x Indian	0.55	0.31 – 1.00	0.051
Interaction: Not born in UK x Pakistani	0.42	0.19 – 0.89	0.024
Interaction: Not born in UK x Bangladeshi	0.35	0.12 – 0.98	0.047
Interaction: Not born in UK x Chinese	0.90	0.33 – 2.74	0.838
Interaction: Not born in UK x Any other Asian background	0.87	0.35 – 2.35	0.774
Interaction: Not born in UK x African	0.70	0.33 – 1.54	0.362
Interaction: Not born in UK x Caribbean	1.19	0.31 – 4.35	0.793
Interaction: Not born in UK x Any other Black, Black British, or Caribbean background	0.20	0.03 – 0.98	0.062
Interaction: Not born in UK x Arab	0.87	0.19 – 4.43	0.864
Interaction: Not born in UK x Any other ethnic group	0.00	NA – 0.43	0.924
Interaction: Not born in UK x Don't think of myself as any of these	0.13	0.00 – 2.46	0.186
Interaction: Not born in UK x Prefer not to say	0.19	0.02 – 1.24	0.102
Observations	9812

Code

ems <- emmeans(mod, specs = "Ethnicity_collapsed_disaggregated", by = "BORNUK_binary")
cons <- summary(contrast(ems, "pairwise",adjust="tukey"))
sig_cons <- cons %>% filter(p.value < .05) %>%
  mutate(
    or = 1 / exp(estimate), .after=estimate # 1 / or because we want to express comparison - white(ref) (contrast expresses white(ref) - comparison)
  )

Again for this model we calculated estimated marginal means of the ethnicity x migration effect. Examining the effect of ethnicity within each level of migration, we find that:

Among people born in the UK, Pakistani workers are 2.68 times more likely to be outsourced than English/Welsh/Scottish/Northern Irish/British workers.

No other differences were significant. The plot below visualises these effects.

Code

sjPlot::plot_model(mod, type = "pred", legend.title="", terms = c("BORNUK_binary","Ethnicity_collapsed_disaggregated"), dodge=0.5) +#
  
  coord_flip() +
  xlab("") + ylab("Likelihood of being outsourced") +
  theme_minimal() +
  theme(
    legend.position = "none"
  )

Code

ems <- emmeans(mod, specs = "BORNUK_binary", by = "Ethnicity_collapsed_disaggregated")
cons <- summary(contrast(ems, "pairwise",adjust="tukey"))
sig_cons <- cons %>% filter(p.value < .05) %>%
  mutate(
    or = 1 / exp(estimate), .after=estimate # 1 / or because we want to express comparison - white(ref) (contrast expresses white(ref) - comparison)
  )

Examining the effect of migration for particular ethnicities, we find that:

Among English/Welsh/Scottish/Northern Irish/British workers, people not born in the UK are 2.12 more likely to be outsourced than people born in the UK
Among Mixed White and Asian workers, people not born in the UK are 5.93 more likely to be outsourced than people born in the UK (NB n < 50 for this group).

No other differences were significant. The plot below visualises these findings.

Code

sjPlot::plot_model(mod, type = "pred", legend.title="", terms = c("Ethnicity_collapsed_disaggregated","BORNUK_binary"), dodge=0.5) +#
  
  coord_flip() +
  xlab("") + ylab("Likelihood of being outsourced") +
  theme_minimal()

5 Outsourced workers are more likely to be men, but women are more likely to be low-paid outsourced workers

Code

mod <- glm(outsourcing_status ~ BORNUK_binary + Ethnicity_collapsed_disaggregated + Age + Gender + Has_Degree + Region, data, weights = NatRepemployees, family="quasibinomial")
summary(mod)

coefs <- extract_glm_coefs(mod, only_sig=T)

gender_coefs <- coefs %>%
  filter(variable == "GenderMale")

The same model as the previous was used to estimate the effect of Gender on outsourcing, and indicates that men are 1.44 times more likely to be outsourced than women.

Code

# Annual income
# Intercept only
mod_base <- lm(income_annual_all ~ 1, income_data, weights = NatRepemployees)
# H1
mod_annual <- lm(income_annual_all ~ Age + Gender + Has_Degree + Ethnicity_collapsed + Region + outsourcing_status + BORNUK_labelled, income_data, weights = NatRepemployees)

summary(mod_annual)

# F and p
f_annual <- round(anova(mod_base, mod_annual)[2,"F"],2)
p_annual <- anova(mod_base, mod_annual)[2,"Pr(>F)"]
if(p_annual < .001){
  p_annual = "< .001"
} else{
  p_annual = paste0("= ",round(p_annual,3))
}

# Degrees of freedom
dfs_annual <- as.list(anova(mod_base, mod_annual)[2,c("Df","Res.Df")])
# R2
rsquare_annual <- round(summary(mod_annual)$r.squared,2)

# Weekly income
# Intercept only
mod_base <- lm(income_weekly_all ~ 1, income_data, weights = NatRepemployees)
# H1
mod_weekly <- lm(income_weekly_all ~ Age + Gender + Has_Degree + Ethnicity_collapsed + Region + outsourcing_status + BORNUK_labelled, income_data, weights = NatRepemployees)
summary(mod_weekly)

# F and p
f_weekly <- round(anova(mod_base, mod_weekly)[2,"F"],2)
p_weekly <- anova(mod_base, mod_weekly)[2,"Pr(>F)"]
if(p_weekly < .001){
  p_weekly = "< .001"
} else{
  p_weekly = paste0("= ",round(p_weekly,3))
}

# Degrees of freedom
dfs_weekly <- as.list(anova(mod_base, mod_weekly)[2,c("Df","Res.Df")])
# R2
rsquare_weekly <- round(summary(mod_weekly)$r.squared,2)

Code

annual_gender_coef <- extract_lm_coefs(mod_annual, only_sig = T) %>%
  filter(variable == "GenderMale") 

weekly_gender_coef <- extract_lm_coefs(mod_weekly, only_sig = T) %>%
  filter(variable == "GenderMale")

Exploring a possible gender pay gap, as shown in Table 1 and Table 2 there is a significant difference in pay between men and women. Annually, men earn £7027.53 more than women. Per week, men earn £150.99 more than women.

We next explored whether outsourcing compounds this gender pay gap by adding an outsourcing:gender interaction term into the previous models.

Code

# Annual income

# H1
mod_annual_int <- lm(income_annual_all ~ Age + Gender + Has_Degree + Ethnicity_collapsed + Region + outsourcing_status + BORNUK_labelled + Gender:outsourcing_status, income_data, weights = NatRepemployees)

summary(mod_annual_int)

# F and p
anova_test <- anova(mod_annual, mod_annual_int)
f_annual <- anova_test[2,"F"] %>% round(2)
p_annual <- anova_test[2,"Pr(>F)"]
if(p_annual < .001){
  p_annual = "< .001"
} else{
  p_annual = paste0("= ",round(p_annual,3))
}

# Degrees of freedom
dfs_annual <- as.list(anova_test[2,c("Df","Res.Df")])
# R2
rsquare_annual <- round(summary(mod_annual_int)$r.squared,2)

# Weekly income

# H1
mod_weekly_int <- lm(income_weekly_all ~ Age + Gender + Has_Degree + Ethnicity_collapsed + Region + outsourcing_status + BORNUK_labelled + Gender:outsourcing_status, income_data, weights = NatRepemployees)
summary(mod_weekly_int)

# F and p
anova_test <- anova(mod_weekly, mod_weekly_int)

f_weekly <-  anova_test[2,"F"] %>% round(2)
p_weekly <- anova_test[2,"Pr(>F)"]
if(p_weekly < .001){
  p_weekly = "< .001"
} else{
  p_weekly = paste0("= ", round(p_weekly,3))
}

# Degrees of freedom
dfs_weekly <- as.list(anova_test[2,c("Df","Res.Df")])
# R2
rsquare_weekly <- round(summary(mod_weekly_int)$r.squared,2)

For both models, adding the interaction effect did not improve model fit (R² = 0.18, F(3, 8253) = 0.76, p = 0.514). The tables below show the coefficients for each model.

Code

labels <- c(
  'Intercept',
  'Age',
  'Gender: Male',
  'Gender: Other',
  'Gender: Prefer not to say',
  'Education: Has degree',
  "Education: Don't know",
  'Ethnicity: Arab/British Arab',
  'Ethnicity: Asian/Asian British',
  'Ethnicity: Black/African/Caribbean/Black British',
  'Ethnicity: Mixed/Multiple ethnic group',
  'Ethnicity: Other ethnic group',
  'Ethnicity: Prefer not to say',
  'Ethnicity: White other',
  'Region: East Midlands',
  'Region: East of England',
  'Region: North East',
  'Region: North West',
  'Region: Northern Ireland',
  'Region: Scotland',
  'Region: South East',
  'Region: South West',
  'Region: Wales',
  'Region: West Midlands',
  'Region: Yorkshire and the Humber',
  'Outsourcing: Outsourced',
  'Migration: Arrived within the last year',
  'Migration: Arrived within the last 3 years',
  'Migration: Arrived within the last 5 years',
  'Migration: Arrived within the last 10 years',
  'Migration: Arrived within the last 15 years',
  'Migration: Arrived within the last 20 years',
  'Migration: Arrived within the last 30 years',
  'Migration: Arrived more than 30 years ago',
  'Migration: Prefer not to say',
  'Interaction: Outsourcing x Gender Male',
  'Interaction: Outsourcing x Gender Other',
  'Interaction: Outsourcing x Gender Prefer not to say'
)
  
tab_model(mod_annual_int, pred.labels = labels, dv.labels = "Annual income")

Table 4: Linear regression predicting annual income with outsourcing:gender interaction term

	Annual income
Predictors	Estimates	CI	p
Intercept	23921.57	22660.06 – 25183.08	<0.001
Age	14.52	-6.10 – 35.14	0.167
Gender: Male	6994.01	6417.97 – 7570.04	<0.001
Gender: Other	3525.48	-4019.27 – 11070.23	0.360
Gender: Prefer not to say	4366.44	-1326.82 – 10059.69	0.133
Education: Has degree	8190.56	7643.58 – 8737.54	<0.001
Education: Don't know	-1963.22	-4091.44 – 165.00	0.071
Ethnicity: Arab/British Arab	-172.85	-4863.98 – 4518.28	0.942
Ethnicity: Asian/Asian British	-466.43	-1560.35 – 627.49	0.403
Ethnicity: Black/African/Caribbean/Black British	-1176.65	-2780.51 – 427.21	0.150
Ethnicity: Mixed/Multiple ethnic group	-1514.57	-3493.64 – 464.51	0.134
Ethnicity: Other ethnic group	3570.13	-1022.61 – 8162.86	0.128
Ethnicity: Prefer not to say	-243.51	-1963.50 – 1476.48	0.781
Ethnicity: White other	-586.40	-1950.14 – 777.34	0.399
Region: East Midlands	-5792.15	-7008.07 – -4576.23	<0.001
Region: East of England	-4101.18	-5244.25 – -2958.10	<0.001
Region: North East	-4877.65	-6407.08 – -3348.23	<0.001
Region: North West	-4501.54	-5608.43 – -3394.65	<0.001
Region: Northern Ireland	-6563.67	-8340.41 – -4786.94	<0.001
Region: Scotland	-5489.92	-6678.61 – -4301.23	<0.001
Region: South East	-3433.17	-4473.52 – -2392.82	<0.001
Region: South West	-5704.35	-6891.22 – -4517.48	<0.001
Region: Wales	-5388.98	-6835.17 – -3942.79	<0.001
Region: West Midlands	-5021.92	-6178.63 – -3865.21	<0.001
Region: Yorkshire and the Humber	-5547.51	-6730.41 – -4364.60	<0.001
Outsourcing: Outsourced	-3031.29	-4081.44 – -1981.15	<0.001
Migration: Arrived within the last year	-6140.84	-8382.45 – -3899.24	<0.001
Migration: Arrived within the last 3 years	-2391.65	-4414.37 – -368.92	0.020
Migration: Arrived within the last 5 years	-2057.10	-4287.24 – 173.05	0.071
Migration: Arrived within the last 10 years	-637.25	-2383.00 – 1108.51	0.474
Migration: Arrived within the last 15 years	718.11	-1222.02 – 2658.23	0.468
Migration: Arrived within the last 20 years	1305.40	-778.66 – 3389.46	0.220
Migration: Arrived within the last 30 years	3486.00	1066.44 – 5905.57	0.005
Migration: Arrived more than 30 years ago	-194.33	-2114.72 – 1726.06	0.843
Migration: Prefer not to say	-2115.89	-5649.21 – 1417.43	0.240
Interaction: Outsourcing x Gender Male	192.17	-1217.63 – 1601.98	0.789
Interaction: Outsourcing x Gender Other	-12336.38	-28857.14 – 4184.38	0.143
Interaction: Outsourcing x Gender Prefer not to say	-1318.07	-15431.44 – 12795.30	0.855
Observations	8291
R² / R² adjusted	0.184 / 0.180

Code

tab_model(mod_weekly_int, pred.labels = labels, dv.labels = "Weekly income")

Table 5: Linear regression predicting weekly income with outsourcing:gender interaction term

	Weekly income
Predictors	Estimates	CI	p
Intercept	513.97	486.86 – 541.07	<0.001
Age	0.31	-0.13 – 0.76	0.167
Gender: Male	150.27	137.89 – 162.65	<0.001
Gender: Other	75.75	-86.36 – 237.85	0.360
Gender: Prefer not to say	93.82	-28.51 – 216.14	0.133
Education: Has degree	175.98	164.23 – 187.73	<0.001
Education: Don't know	-42.18	-87.91 – 3.55	0.071
Ethnicity: Arab/British Arab	-3.71	-104.51 – 97.08	0.942
Ethnicity: Asian/Asian British	-10.02	-33.53 – 13.48	0.403
Ethnicity: Black/African/Caribbean/Black British	-25.28	-59.74 – 9.18	0.150
Ethnicity: Mixed/Multiple ethnic group	-32.54	-75.06 – 9.98	0.134
Ethnicity: Other ethnic group	76.71	-21.97 – 175.38	0.128
Ethnicity: Prefer not to say	-5.23	-42.19 – 31.72	0.781
Ethnicity: White other	-12.60	-41.90 – 16.70	0.399
Region: East Midlands	-124.45	-150.57 – -98.32	<0.001
Region: East of England	-88.12	-112.68 – -63.56	<0.001
Region: North East	-104.80	-137.66 – -71.94	<0.001
Region: North West	-96.72	-120.50 – -72.94	<0.001
Region: Northern Ireland	-141.02	-179.20 – -102.85	<0.001
Region: Scotland	-117.95	-143.49 – -92.41	<0.001
Region: South East	-73.76	-96.12 – -51.41	<0.001
Region: South West	-122.56	-148.06 – -97.06	<0.001
Region: Wales	-115.79	-146.86 – -84.71	<0.001
Region: West Midlands	-107.90	-132.75 – -83.05	<0.001
Region: Yorkshire and the Humber	-119.19	-144.61 – -93.78	<0.001
Outsourcing: Outsourced	-65.13	-87.69 – -42.57	<0.001
Migration: Arrived within the last year	-131.94	-180.10 – -83.78	<0.001
Migration: Arrived within the last 3 years	-51.39	-94.85 – -7.93	0.020
Migration: Arrived within the last 5 years	-44.20	-92.11 – 3.72	0.071
Migration: Arrived within the last 10 years	-13.69	-51.20 – 23.82	0.474
Migration: Arrived within the last 15 years	15.43	-26.26 – 57.11	0.468
Migration: Arrived within the last 20 years	28.05	-16.73 – 72.82	0.220
Migration: Arrived within the last 30 years	74.90	22.91 – 126.88	0.005
Migration: Arrived more than 30 years ago	-4.18	-45.44 – 37.09	0.843
Migration: Prefer not to say	-45.46	-121.38 – 30.45	0.240
Interaction: Outsourcing x Gender Male	4.13	-26.16 – 34.42	0.789
Interaction: Outsourcing x Gender Other	-265.05	-620.01 – 89.90	0.143
Interaction: Outsourcing x Gender Prefer not to say	-28.32	-331.55 – 274.91	0.855
Observations	8291
R² / R² adjusted	0.184 / 0.180

Code

ems <- emmeans(mod_annual_int, specs = "Gender", by = "outsourcing_status", nuisance = "BORNUK_labelled")
cons <- summary(contrast(ems, "pairwise",adjust="tukey"))
sig_cons <- cons %>% filter(p.value < .05)

Code

ems <- emmeans(mod_annual_int, specs = "outsourcing_status", by = "Gender", nuisance = "BORNUK_labelled")
cons <- summary(contrast(ems, "pairwise",adjust="tukey"))
sig_cons2 <- cons %>% filter(p.value < .05)

The interaction term is non-significant. Estimated marginal means show that:

Among not outsourced workers, men are paid £6994.01 more than women
Among outsourced workers, men are paid £7186.18 more than women
Among men, not outsourced workers are paid £2839.12 more than outsourced workers.
Among women, not outsourced workers are paid £3031.29 more than outsourced workers.

The plot below illustrates the main effects that men are paid more than women and that outsourced men and women are paid less than non-outsourced men and women. The lack of interaction indicates that the difference in pay between men and women does not significantly differ between outsourced and non-outsourced people.

Code

sjPlot::plot_model(mod_annual_int, type = "pred", legend.title="", terms = c("outsourcing_status","Gender"), dodge=0.5) +#
  
  coord_flip() +
  xlab("") + ylab("Likelihood of being outsourced") +
  theme_minimal()

6 Age

Code

mod <- glm(outsourcing_status ~ BORNUK_binary + Ethnicity_collapsed_disaggregated + Age + Gender + Has_Degree + Region, data, weights = NatRepemployees, family="quasibinomial")
summary(mod)

coefs <- extract_glm_coefs(mod, only_sig=T)

age_coefs <- coefs %>%
  filter(variable == "Age")

We found that age was a significant predictor of the likelihood of being outsourced. The model indicates that for each year older a worker is, they are 0.98 times as likely (i.e. 2% less likely) to be outsourced.