import pandas as pdimport requestsimport osimport janitorimport numpy as npimport geopandas as gpdimport matplotlib as mplfrom matplotlib.colors import ListedColormap, LinearSegmentedColormapimport matplotlib.pyplot as pltimport seaborn as snsimport math
# Clean data# Drop the UK rowrelative = relative.iloc[1:]absolute = absolute.iloc[1:]relative = janitor.clean_names(relative)absolute = janitor.clean_names(absolute)# Remove notes in colnamesrelative.columns = ( relative.columns .str.replace(r'_\[.*$', '', regex=True) # Remove _[ to end .str.replace(r'\[p\]', '', regex=True) # Remove [p] .str.replace(r'_$', '', regex=True) # Remove trailing underscore, .str.replace(r'_%$', '', regex=True) # Remove percentage sign at end)absolute.columns = ( absolute.columns .str.replace(r'_\[.*$', '', regex=True) # Remove _[ to end .str.replace(r'\[p\]', '', regex=True) # Remove [p] .str.replace(r'_$', '', regex=True) # Remove trailing underscore .str.replace(r'_%$', '', regex=True) # Remove percentage sign at end)# Drop the number columnsrelative = relative.drop(columns=[col for col in relative.columns if'number'in col])absolute = absolute.drop(columns=[col for col in absolute.columns if'number'in col])# Rename columns to just yearrelative.columns = ( relative.columns .str.replace(r'^percentage.*?(?=fye)', '', regex=True) # Remove 'percentage' up to but not including 'fye')absolute.columns = ( absolute.columns .str.replace(r'^percentage.*?(?=fye)', '', regex=True) # Remove 'percentage' up to but not including 'fye')# Drop text in numeric columnsrelative.replace(['[x]'], np.nan, inplace=True)absolute.replace(['[x]'], np.nan, inplace=True)# Loop through columns and create decile columnscolumns = [col for col in relative.columns if col.startswith('fye_')]for col in columns: decile_col = col +'_decile' relative_deciles = pd.qcut(relative[col], q=10, labels=False, duplicates='drop') +1# Deciles 1 to 10 relative[decile_col] =11- relative_deciles # reverse so 1 is worst absolute_deciles = pd.qcut(absolute[col], q=10, labels=False, duplicates='drop') +1# Deciles 1 to 10 absolute[decile_col] =11- absolute_deciles # reverse# Subset to haringeyharingey_relative = relative.loc[relative['local_authority'] =='Haringey']haringey_absolute = absolute.loc[absolute['local_authority'] =='Haringey']# Get some averages from national picturerelative_decile_1_2015_mean = relative.loc[relative['fye_2015_decile']==1,'fye_2015'].mean()relative_decile_10_2015_mean = relative.loc[relative['fye_2015_decile']==10,'fye_2015'].mean()absolute_decile_1_2015_mean = absolute.loc[absolute['fye_2015_decile']==1,'fye_2015'].mean()absolute_decile_10_2015_mean = absolute.loc[absolute['fye_2015_decile']==10,'fye_2015'].mean()
C:\Users\Jolyon\AppData\Local\Temp\ipykernel_46168\4279449845.py:44: FutureWarning:
Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
C:\Users\Jolyon\AppData\Local\Temp\ipykernel_46168\4279449845.py:45: FutureWarning:
Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
Code
# Get a list of wards to request from geoportal APIharingey_wards = haringey_relative['area_code_1'].unique().tolist()# regionsbase_url ='https://services1.arcgis.com/ESMARspQHYMw9BZ9/ArcGIS/rest/services/Wards_December_2022_Boundaries_UK_BFC/FeatureServer/0/query'where_clause ="WD22CD IN ('"+"', '".join(haringey_wards) +"')"# Parametersparams = {"where": where_clause, # Retrieve all records"outFields": "*", # "*" specifies all fields"outSR": "4326","f": "geojson","resultOffset": 0, # For pagination"resultRecordCount": 2000}# Send requestresponse = requests.get(base_url, params=params)# Read the response content as geopandas dfharingey_boundaries = gpd.read_file(response.content)haringey_boundaries = janitor.clean_names(haringey_boundaries)# Merge the data with the geometryharingey_relative_gpd = haringey_boundaries.merge(haringey_relative, how='right', left_on='wd22cd', right_on='area_code_1')haringey_absolute_gpd = haringey_boundaries.merge(haringey_absolute, how='right', left_on='wd22cd', right_on='area_code_1')
Plots
Relative
2024
Code
cmap = mpl.colormaps['viridis'].reversed()fig, ax = plt.subplots(figsize=[10,10])haringey_relative_gpd.plot(ax=ax, column='fye_2024_decile', legend=True, cmap=cmap,legend_kwds={'label': "Decile (where 1 represents the 10% of places nationally with the highest number)"}, vmin =1, vmax =10, edgecolor='black', linewidth=0.1)# Add ward labelsfor idx, row in haringey_relative_gpd.iterrows():# Use the centroid of the polygon for label positionif row['geometry'].geom_type =='Polygon'or row['geometry'].geom_type =='MultiPolygon': centroid = row['geometry'].centroid x, y = centroid.x, centroid.yif row['wd22nm'] =='Bruce Castle': y +=0.001 ax.text( x, y, row['wd22nm'], # Replace with your ward name column fontsize=8, ha='center', va='center', color='black', weight='bold' )ax.set_title('Children living in relative poverty in Haringey: 2024')ax.set_axis_off()
Across years
Code
# Example list of yearsyears =list(range(2015, 2025))columns = [f'fye_{year}_decile'for year in years]# Set up subplot grid: adjust rows and cols as neededn_cols =3n_rows = math.ceil(len(columns) / n_cols)fig, axes = plt.subplots(n_rows, n_cols, figsize=(3* n_cols, 3* n_rows))# Flatten axes for easy iterationaxes = axes.flatten()for i, (col, ax) inenumerate(zip(columns, axes)): haringey_relative_gpd.plot( ax=ax, column=col, cmap=cmap, legend=True, legend_kwds={'label': "Decile (1 = highest 10% nationally)", 'shrink': 0.5}, vmin=1, vmax=10, edgecolor='black', linewidth=0.1 ) ax.set_title(f'{years[i]}') ax.set_axis_off()# Hide any unused subplots if number of columns doesn't fill gridfor j inrange(len(columns), len(axes)): fig.delaxes(axes[j])fig.suptitle('Children in relative poverty by year: Deciles', size =20)plt.tight_layout()plt.show()
Percentage
Code
cmap = mpl.colormaps['viridis']# raw numbersyears =list(range(2015, 2025))columns = [f'fye_{year}'for year in years]max_number = math.ceil(haringey_relative_gpd[columns].max().max()/.05) *.05min_number = math.floor(haringey_relative_gpd[columns].min().min()/.05) *.05# Set up subplot grid: adjust rows and cols as neededn_cols =3n_rows = math.ceil(len(columns) / n_cols)fig, axes = plt.subplots(n_rows, n_cols, figsize=( 3* n_cols, 3* n_rows))# Flatten axes for easy iterationaxes = axes.flatten()for i, (col, ax) inenumerate(zip(columns, axes)): haringey_relative_gpd.plot( ax=ax, column=col, cmap=cmap, legend=True, legend_kwds={'label': "Percentage of children", 'shrink': 0.5}, vmin = min_number, vmax = max_number ) ax.set_title(f'{years[i]}') ax.set_axis_off()# Hide any unused subplots if number of columns doesn't fill gridfor j inrange(len(columns), len(axes)): fig.delaxes(axes[j])fig.suptitle('Children in relative poverty by year: Percentage', size =20)plt.tight_layout()plt.show()
Absolute
2024
Code
cmap = mpl.colormaps['viridis'].reversed()fig, ax = plt.subplots(figsize=[10,10])haringey_absolute_gpd.plot(ax=ax, column='fye_2024_decile', legend=True, cmap=cmap,legend_kwds={'label': "Decile (where 1 represents the 10% of places nationally with the highest number)"}, vmin =1, vmax =10, edgecolor='black', linewidth=0.1)# Add ward labelsfor idx, row in haringey_absolute_gpd.iterrows():# Use the centroid of the polygon for label positionif row['geometry'].geom_type =='Polygon'or row['geometry'].geom_type =='MultiPolygon': centroid = row['geometry'].centroid x, y = centroid.x, centroid.yif row['wd22nm'] =='Bruce Castle': y +=0.001 ax.text( x, y, row['wd22nm'], # Replace with your ward name column fontsize=8, ha='center', va='center', color='black', weight='bold' )ax.set_title('Children living in absolute poverty in Haringey: 2024')ax.set_axis_off()
Across years
Code
# Example list of yearsyears =list(range(2015, 2025))columns = [f'fye_{year}_decile'for year in years]# Set up subplot grid: adjust rows and cols as neededn_cols =3n_rows = math.ceil(len(columns) / n_cols)fig, axes = plt.subplots(n_rows, n_cols, figsize=(3* n_cols, 3* n_rows))# Flatten axes for easy iterationaxes = axes.flatten()for i, (col, ax) inenumerate(zip(columns, axes)): haringey_absolute_gpd.plot( ax=ax, column=col, cmap=cmap, legend=True, legend_kwds={'label': "Decile (1 = highest 10% nationally)", 'shrink': 0.5}, vmin=1, vmax=10, edgecolor='black', linewidth=0.1 ) ax.set_title(f'{years[i]}') ax.set_axis_off()# Hide any unused subplots if number of columns doesn't fill gridfor j inrange(len(columns), len(axes)): fig.delaxes(axes[j])fig.suptitle('Children in absolute poverty by year: Deciles', size =20)plt.tight_layout()plt.show()
Percentage
Code
cmap = mpl.colormaps['viridis']# raw numbersyears =list(range(2015, 2025))columns = [f'fye_{year}'for year in years]max_number = math.ceil(haringey_absolute_gpd[columns].max().max()/.05) *.05min_number = math.floor(haringey_absolute_gpd[columns].min().min()/.05) *.05# Set up subplot grid: adjust rows and cols as neededn_cols =3n_rows = math.ceil(len(columns) / n_cols)fig, axes = plt.subplots(n_rows, n_cols, figsize=( 3* n_cols, 3* n_rows))# Flatten axes for easy iterationaxes = axes.flatten()for i, (col, ax) inenumerate(zip(columns, axes)): haringey_absolute_gpd.plot( ax=ax, column=col, cmap=cmap, legend=True, legend_kwds={'label': "Percentage of children", 'shrink': 0.5}, vmin = min_number, vmax = max_number ) ax.set_title(f'{years[i]}') ax.set_axis_off()# Hide any unused subplots if number of columns doesn't fill gridfor j inrange(len(columns), len(axes)): fig.delaxes(axes[j])fig.suptitle('Children in absolute poverty by year: Percentage', size =20)plt.tight_layout()plt.show()
Line plots
Relative poverty
Code
columns = [col for col in relative.columns if col.startswith('fye_') andnot col.endswith('decile')]df_sub = haringey_relative[['ward'] + columns]df_long = df_sub.melt(id_vars='ward', var_name='year', value_name='perc')df_long['year'] = df_long['year'].str.extract(r'(\d+)', expand=False).astype(int) # r tells regex not to treat backslash as escape# Create a flag for Northumberland Park.df_long['np'] = (df_long['ward'] =="Northumberland Park").astype(int)# Create custom color palette so that NP is black and the rest are greyunique_wards = df_long['ward'].unique()colors = ['black'if ward =="Northumberland Park"else'lightgrey'for ward in unique_wards]color_palette =dict(zip(unique_wards, colors))# Create the plotplt.figure(figsize=(8, 6))plot = sns.lineplot(data=df_long, x='year', y='perc', hue='ward', marker='o', legend=False,palette=color_palette)# Collect final points and colorsendpoints = []for ward, group in df_long.groupby('ward'): group_sorted = group.sort_values('year') x = group_sorted['year'].values[-1] y = group_sorted['perc'].values[-1] line = plot.lines[df_long['ward'].unique().tolist().index(ward)] color = line.get_color() endpoints.append({'ward': ward, 'x': x, 'y': y, 'color': color})# Sort to prevent overlapendpoints =sorted(endpoints, key=lambda d: d['y'])min_spacing =.005adjusted_ys = []for i, point inenumerate(endpoints): y = point['y']if i >0: prev_y = adjusted_ys[-1]if y - prev_y < min_spacing: y = prev_y + min_spacing adjusted_ys.append(y)# # Plot labels and connecting linesfor point, new_y inzip(endpoints, adjusted_ys): label_x = point['x'] +0.3# Label offset to the right label_y = new_y# Draw a line from data point to label plt.plot([point['x'], label_x], [point['y'], label_y], color=point['color'], linewidth=1, linestyle='--', alpha=0.7)# Add the label plt.text(label_x, label_y, point['ward'], color=point['color'], va='center', fontsize=10)plt.axhline(y=relative_decile_1_2015_mean, color='red', linestyle='--', linewidth=1)plt.axhline(y=relative_decile_10_2015_mean, color='red', linestyle='--', linewidth=1)plt.text( x=df_long['year'].max() +1, # Just beyond the last year y=relative_decile_1_2015_mean, s='Decile 1 average', va='center', ha='left', fontsize=9, color='red')plt.text( x=df_long['year'].max() +1, # Just beyond the last year y=relative_decile_10_2015_mean, s='Decile 10 average', va='center', ha='left', fontsize=9, color='red')plt.title('Children in relative poverty by ward over time: Haringey')plt.ylabel('Percentage of children')plt.xlabel('Year')plt.grid(True)plt.tight_layout()# Ensure ticks for every year from min to maxyears = np.arange(df_long['year'].min(), df_long['year'].max() +1)plt.xticks(years)plt.show()
Absolute poverty
Code
columns = [col for col in absolute.columns if col.startswith('fye_') andnot col.endswith('decile')]df_sub = haringey_absolute[['ward'] + columns]df_long = df_sub.melt(id_vars='ward', var_name='year', value_name='perc')df_long['year'] = df_long['year'].str.extract(r'(\d+)', expand=False).astype(int) # r tells regex not to treat backslash as escape# Create a flag for Northumberland Park.df_long['np'] = (df_long['ward'] =="Northumberland Park").astype(int)# Create custom color palette so that NP is black and the rest are greyunique_wards = df_long['ward'].unique()colors = ['black'if ward =="Northumberland Park"else'lightgrey'for ward in unique_wards]color_palette =dict(zip(unique_wards, colors))# Create the plotplt.figure(figsize=(8, 6))plot = sns.lineplot(data=df_long, x='year', y='perc', hue='ward', marker='o', legend=False,palette=color_palette)# Collect final points and colorsendpoints = []for ward, group in df_long.groupby('ward'): group_sorted = group.sort_values('year') x = group_sorted['year'].values[-1] y = group_sorted['perc'].values[-1] line = plot.lines[df_long['ward'].unique().tolist().index(ward)] color = line.get_color() endpoints.append({'ward': ward, 'x': x, 'y': y, 'color': color})# Sort to prevent overlapendpoints =sorted(endpoints, key=lambda d: d['y'])min_spacing =.005adjusted_ys = []for i, point inenumerate(endpoints): y = point['y']if i >0: prev_y = adjusted_ys[-1]if y - prev_y < min_spacing: y = prev_y + min_spacing adjusted_ys.append(y)# # Plot labels and connecting linesfor point, new_y inzip(endpoints, adjusted_ys): label_x = point['x'] +0.3# Label offset to the right label_y = new_y# Draw a line from data point to label plt.plot([point['x'], label_x], [point['y'], label_y], color=point['color'], linewidth=1, linestyle='--', alpha=0.7)# Add the label plt.text(label_x, label_y, point['ward'], color=point['color'], va='center', fontsize=10)plt.axhline(y=absolute_decile_1_2015_mean, color='red', linestyle='--', linewidth=1)plt.axhline(y=absolute_decile_10_2015_mean, color='red', linestyle='--', linewidth=1)plt.text( x=df_long['year'].max() +.75, # Just beyond the last year y=absolute_decile_1_2015_mean, s='Decile 1 average', va='center', ha='left', fontsize=9, color='red')plt.text( x=df_long['year'].max() +.75, # Just beyond the last year y=absolute_decile_10_2015_mean, s='Decile 10 average', va='center', ha='left', fontsize=9, color='red')plt.title('Children in absolute poverty by ward over time: Haringey')plt.ylabel('Percentage of children')plt.xlabel('Year')plt.grid(True)plt.tight_layout()# Ensure ticks for every year from min to maxyears = np.arange(df_long['year'].min(), df_long['year'].max() +1)plt.xticks(years)plt.show()