Code
import simpy
import random
import pandas as pd
import numpy as np
import plotly.express as px
import re
import simpy
import random
import pandas as pd
import numpy as np
import plotly.express as px
import re
Class to store global parameter values
class g:
# Inter-arrival times
= 3
patient_inter = 10
call_inter
# Activity times
= 2
mean_reg_time = 8
mean_gp_time = 4
mean_book_test_time = 4
mean_call_time
# Resource numbers
= 1
number_of_receptionists = 2
number_of_gps
# Branch probabilities
= 0.25
prob_book_test
# Simulation meta parameters
= 480
sim_duration = 10 number_of_runs
Class representing patients coming in to the GP surgery
class Patient:
def __init__(self, p_id):
self.id = p_id
self.arrival_time = 0
self.q_time_reg = 0
self.q_time_gp = 0
self.time_with_gp = 0
self.q_time_book_test = 0
self.time_with_receptionist = 0.0
Class representing callers phoning the GP surgery
class Caller:
def __init__(self, c_id):
self.id = c_id
self.call_time = 0
self.time_with_receptionist = 0.0
self.q_time_call = 0
Class representing our model of the GP surgery
class Model:
# Constructor
def __init__(self, run_number):
# Set up SimPy environment
self.env = simpy.Environment()
# Set up counters to use as entity IDs
self.patient_counter = 0
self.caller_counter = 0
# Set up lists to store patient objects
self.patient_objects = [] ##NEW
self.caller_objects = [] ##NEW
# Set up resources
self.receptionist = simpy.Resource(
self.env, capacity=g.number_of_receptionists
)self.gp = simpy.Resource(
self.env, capacity=g.number_of_gps
)
# Set run number from value passed in
self.run_number = run_number
# Set up DataFrame to store patient-level results
self.patient_results_df = pd.DataFrame()
self.patient_results_df["Patient ID"] = [1]
self.patient_results_df["Arrival Time"] = [0.0]
self.patient_results_df["Queue Time Reg"] = [0.0]
self.patient_results_df["Time Seen For Registration"] = [0.0]
self.patient_results_df["Queue Time GP"] = [0.0]
self.patient_results_df["Time Seen By GP"] = [0.0]
self.patient_results_df["Queue Time Book Test"] = [0.0]
self.patient_results_df["Time Test Booking Started"] = [0.0]
self.patient_results_df["Departure Time"] = [0.0]
self.patient_results_df.set_index("Patient ID", inplace=True)
# Set up DataFrame to store caller-level results
self.caller_results_df = pd.DataFrame()
self.caller_results_df["Caller ID"] = [1]
self.caller_results_df["Call Start Time"] = [0.0]
self.caller_results_df["Queue Time Call"] = [0.0]
self.caller_results_df["Call Answered At"] = [0.0]
self.caller_results_df["Call End Time"] = [0.0]
self.caller_results_df.set_index("Caller ID", inplace=True)
# Set up attributes that will store mean queuing times across the run
self.mean_q_time_reg = 0
self.mean_q_time_gp = 0
self.mean_q_time_book_test = 0
self.mean_q_time_call = 0
# Set up attributes used to monitor total resource usage
self.receptionist_utilisation_prop = 0.0
self.gp_utilisation_prop = 0.0
# Generator function that represents the DES generator for patient arrivals
def generator_patient_arrivals(self):
while True:
self.patient_counter += 1
= Patient(self.patient_counter)
p self.patient_objects.append(p) ##NEW
self.env.process(self.attend_gp_surgery(p))
= random.expovariate(1.0 / g.patient_inter)
sampled_inter
yield self.env.timeout(sampled_inter)
# Generator function that represents the DES generator for caller arrivals
def generator_callers(self):
while True:
self.caller_counter += 1
= Caller(self.caller_counter)
c self.caller_objects.append(c) ##NEW
self.env.process(self.call_gp_surgery(c))
= random.expovariate(1.0 / g.call_inter)
sampled_inter
yield self.env.timeout(sampled_inter)
# Generator function representing pathway for patients attending the GP
# surgery to see a GP
def attend_gp_surgery(self, patient):
# Registration activity
= self.env.now
start_q_reg self.patient_results_df.at[patient.id, "Arrival Time"] = (
start_q_reg
)
with self.receptionist.request() as req:
yield req
= self.env.now
end_q_reg
= end_q_reg - start_q_reg
patient.q_time_reg
self.patient_results_df.at[patient.id, "Queue Time Reg"] = (
patient.q_time_reg
)self.patient_results_df.at[patient.id, "Time Seen For Registration"] = (
+ patient.q_time_reg
start_q_reg
)
= random.expovariate(
sampled_reg_time 1.0 / g.mean_reg_time
)
+= sampled_reg_time
patient.time_with_receptionist
yield self.env.timeout(sampled_reg_time)
# GP Consultation activity
= self.env.now
start_q_gp
with self.gp.request() as req:
yield req
= self.env.now
end_q_gp
= end_q_gp - start_q_gp
patient.q_time_gp
self.patient_results_df.at[patient.id, "Queue Time GP"] = (
patient.q_time_gp
)self.patient_results_df.at[patient.id, "Time Seen By GP"] = (
+ patient.q_time_gp
start_q_gp
)
= random.expovariate(
sampled_gp_time 1.0 / g.mean_gp_time
)
+= sampled_gp_time
patient.time_with_gp
yield self.env.timeout(sampled_gp_time)
# Branching path check to see if patient needs to book a test
if random.uniform(0,1) < g.prob_book_test:
# Book test activity
= self.env.now
start_q_book_test
with self.receptionist.request() as req:
yield req
= self.env.now
end_q_book_test
= end_q_book_test - start_q_book_test
patient.q_time_book_test
self.patient_results_df.at[patient.id, "Queue Time Book Test"] = (
patient.q_time_book_test
)
self.patient_results_df.at[patient.id, "Time Test Booking Started"] = (
+ patient.q_time_book_test
start_q_book_test
)
= random.expovariate(
sampled_book_test_time 1.0 / g.mean_book_test_time
)
+= sampled_book_test_time
patient.time_with_receptionist
yield self.env.timeout(sampled_book_test_time)
self.patient_results_df.at[patient.id, "Departure Time"] = (
self.env.now
)
# Generator function representing callers phoning the GP surgery
def call_gp_surgery(self, caller):
# Answering call activity
= self.env.now
start_q_call self.caller_results_df.at[caller.id, "Call Start Time"] = (
start_q_call
)
with self.receptionist.request() as req:
yield req
= self.env.now
end_q_call
= end_q_call - start_q_call
caller.q_time_call
self.caller_results_df.at[caller.id, "Queue Time Call"] = (
caller.q_time_call
)
self.caller_results_df.at[caller.id, "Call Answered At"] = (
self.env.now
)
= random.expovariate(
sampled_call_time 1.0 / g.mean_call_time
)
+= sampled_call_time
caller.time_with_receptionist
yield self.env.timeout(sampled_call_time)
self.caller_results_df.at[caller.id, "Call End Time"] = (
self.env.now
)
# Method to calculate and store results over the run
def calculate_run_results(self):
self.mean_q_time_reg = self.patient_results_df["Queue Time Reg"].mean()
self.mean_q_time_gp = self.patient_results_df["Queue Time GP"].mean()
self.mean_q_time_book_test = (
self.patient_results_df["Queue Time Book Test"].mean()
)
self.mean_q_time_call = self.caller_results_df["Queue Time Call"].mean()
= sum([i.time_with_gp for i in self.patient_objects])
gp_utilisation_mins
= sum(
receptionist_utilisation_mins for i in self.patient_objects]
[i.time_with_receptionist + sum(
) for i in self.caller_objects]
[i.time_with_receptionist
)
self.gp_utilisation_prop = (
/ (g.number_of_gps * g.sim_duration)
gp_utilisation_mins
)
self.receptionist_utilisation_prop = (
/ (g.number_of_receptionists * g.sim_duration)
receptionist_utilisation_mins
)
# Method to run a single run of the simulation
def run(self):
# Start up DES generators
self.env.process(self.generator_patient_arrivals())
self.env.process(self.generator_callers())
# Run for the duration specified in g class
self.env.run(until=g.sim_duration)
# Calculate results over the run
self.calculate_run_results()
return self.caller_results_df, self.patient_results_df
Class representing a trial for our simulation
class Trial:
# Constructor
def __init__(self):
self.df_trial_results = pd.DataFrame()
self.df_trial_results["Run Number"] = [1]
self.df_trial_results["Mean Queue Time Reg"] = [0.0]
self.df_trial_results["Mean Queue Time GP"] = [0.0]
self.df_trial_results["Mean Queue Time Book Test"] = [0.0]
self.df_trial_results["Mean Queue Time Call"] = [0.0]
self.df_trial_results["GP Utilisation - Percentage"] = [0.0]
self.df_trial_results["Receptionist Utilisation - Percentage"] = [0.0]
self.df_trial_results.set_index("Run Number", inplace=True)
# Method to calculate and store means across runs in the trial
def calculate_means_over_trial(self):
self.mean_q_time_reg_trial = (
self.df_trial_results["Mean Queue Time Reg"].mean()
)self.mean_q_time_gp_trial = (
self.df_trial_results["Mean Queue Time GP"].mean()
)self.mean_q_time_book_test_trial = (
self.df_trial_results["Mean Queue Time Book Test"].mean()
)self.mean_q_time_call_trial = (
self.df_trial_results["Mean Queue Time Call"].mean()
)
# Method to run trial
def run_trial(self):
= []
caller_dfs = []
patient_dfs
for run in range(1, g.number_of_runs+1):
= Model(run)
my_model = my_model.run()
caller_df, patient_df "Run"] = run
caller_df["What"] = "Callers"
caller_df["Run"] = run
patient_df["What"] = "Patients"
patient_df[
caller_dfs.append(caller_df)
patient_dfs.append(patient_df)
self.df_trial_results.loc[run] = [my_model.mean_q_time_reg,
my_model.mean_q_time_gp,
my_model.mean_q_time_book_test,
my_model.mean_q_time_call,round(my_model.gp_utilisation_prop * 100, 2),
round(my_model.receptionist_utilisation_prop*100, 2)
]
return self.df_trial_results.round(1), pd.concat(caller_dfs), pd.concat(patient_dfs)
= Trial().run_trial()
df_trial_results, caller_results, patient_results
print(df_trial_results)
Mean Queue Time Reg Mean Queue Time GP \
Run Number
1 41.2 8.3
2 55.9 33.0
3 71.2 46.5
4 96.1 15.1
5 31.3 44.3
6 83.6 10.5
7 75.1 7.4
8 35.6 67.7
9 42.4 41.8
10 84.3 13.8
Mean Queue Time Book Test Mean Queue Time Call \
Run Number
1 39.6 32.5
2 47.7 45.7
3 77.4 69.1
4 100.0 95.5
5 31.8 23.8
6 90.0 89.0
7 60.4 75.4
8 35.9 31.1
9 48.5 47.2
10 91.4 78.7
GP Utilisation - Percentage Receptionist Utilisation - Percentage
Run Number
1 83.2 97.3
2 82.8 99.0
3 95.8 100.1
4 94.5 100.3
5 97.7 98.4
6 86.0 98.7
7 88.6 100.7
8 97.8 100.0
9 96.8 98.7
10 79.3 101.3
print(caller_results.sample(25))
Call Start Time Queue Time Call Call Answered At Call End Time \
Caller ID
3 26.985178 22.884711 49.869889 57.143681
2 8.288770 3.013388 11.302158 20.587982
50 456.639579 NaN NaN NaN
21 180.721162 113.096958 293.818120 300.277487
13 112.270641 89.695134 201.965775 205.985518
26 204.562526 136.806200 341.368725 343.768102
21 263.891131 61.632482 325.523613 325.581183
33 388.920430 NaN NaN NaN
21 300.686889 39.177955 339.864844 345.776609
1 0.000000 1.432402 1.432402 7.561549
17 127.765046 24.585563 152.350609 156.917206
3 4.708912 20.421817 25.130729 25.784647
26 220.258790 114.123136 334.381926 337.064949
49 468.227680 NaN NaN NaN
4 20.844011 6.798484 27.642496 29.089390
20 222.298899 131.923697 354.222596 359.931704
7 54.792788 8.743920 63.536709 74.323428
13 136.050764 13.288501 149.339264 150.910655
11 86.783582 64.575479 151.359061 151.692975
9 53.625186 40.634466 94.259652 102.372077
47 424.345526 NaN NaN NaN
11 98.517729 78.295469 176.813198 182.947985
19 276.359264 37.459171 313.818435 320.012249
37 348.314725 NaN NaN NaN
34 319.394850 156.977310 476.372159 478.386799
Run What
Caller ID
3 8 Callers
2 3 Callers
50 6 Callers
21 6 Callers
13 10 Callers
26 6 Callers
21 2 Callers
33 2 Callers
21 8 Callers
1 9 Callers
17 1 Callers
3 9 Callers
26 10 Callers
49 3 Callers
4 7 Callers
20 4 Callers
7 2 Callers
13 5 Callers
11 3 Callers
9 3 Callers
47 6 Callers
11 4 Callers
19 8 Callers
37 7 Callers
34 7 Callers
print(patient_results.sample(25))
Arrival Time Queue Time Reg Time Seen For Registration \
Patient ID
18 46.653668 1.105970 47.759638
150 401.123587 NaN NaN
162 400.851731 NaN NaN
90 245.858248 52.054016 297.912264
163 403.050543 NaN NaN
153 438.325943 NaN NaN
110 391.823900 62.659485 454.483386
127 382.229551 NaN NaN
87 231.091073 81.183470 312.274543
139 422.082336 NaN NaN
22 71.561838 23.745370 95.307208
53 149.214649 100.493782 249.708431
56 169.022238 7.809793 176.832031
29 114.922759 23.171650 138.094409
31 84.050046 59.141769 143.191814
65 176.816969 112.100204 288.917173
123 334.119015 125.454061 459.573076
35 135.385034 37.261925 172.646959
49 131.830443 71.621691 203.452134
46 123.425332 56.258970 179.684303
31 88.011265 100.494793 188.506058
3 6.858186 0.000000 6.858186
42 110.412522 61.679324 172.091846
166 455.648340 NaN NaN
69 218.900928 84.796802 303.697730
Queue Time GP Time Seen By GP Queue Time Book Test \
Patient ID
18 45.416487 95.650777 NaN
150 NaN NaN NaN
162 NaN NaN NaN
90 47.059007 345.891093 NaN
163 NaN NaN NaN
153 NaN NaN NaN
110 NaN NaN NaN
127 NaN NaN NaN
87 76.241267 390.885029 NaN
139 NaN NaN NaN
22 23.359609 119.641138 NaN
53 0.000000 253.438941 NaN
56 59.166358 240.021970 57.399221
29 1.554249 139.919118 30.913434
31 16.133493 159.550785 NaN
65 0.000000 292.334376 NaN
123 NaN NaN NaN
35 0.000000 172.926239 NaN
49 36.882943 240.379504 NaN
46 48.785069 233.279101 NaN
31 0.000000 189.076891 NaN
3 0.000000 8.002999 NaN
42 52.112295 224.448247 93.785410
166 NaN NaN NaN
69 21.779556 326.312644 NaN
Time Test Booking Started Departure Time Run What
Patient ID
18 NaN NaN 5 Patients
150 NaN NaN 9 Patients
162 NaN NaN 8 Patients
90 NaN NaN 9 Patients
163 NaN NaN 8 Patients
153 NaN NaN 2 Patients
110 NaN NaN 1 Patients
127 NaN NaN 10 Patients
87 NaN NaN 3 Patients
139 NaN NaN 3 Patients
22 NaN NaN 6 Patients
53 NaN NaN 6 Patients
56 297.856937 299.191484 5 Patients
29 173.079362 174.777895 1 Patients
31 NaN NaN 3 Patients
65 NaN NaN 6 Patients
123 NaN NaN 4 Patients
35 NaN NaN 1 Patients
49 NaN NaN 3 Patients
46 NaN NaN 3 Patients
31 NaN NaN 10 Patients
3 NaN NaN 2 Patients
42 325.462855 326.101203 3 Patients
166 NaN NaN 7 Patients
69 NaN NaN 7 Patients
The median wait for registration across 10 was 63.55 minutes
The median wait for booking a test was 54.45 minutes
The median wait for callers to have their call answered was 58.15 minutes
The median utilisation for a receptionist across 10 was 99.5%
The median wait for a GP across 10 was 24.05 minutes
The median utilisation for a GP across 10 was 91.55%
Let’s set up a reusable sequence of colours that can give our plotly plots a consistent feel/identity. This uses some colours from the NHS identity guidelines that should work well when placed next to each other. Link to NHS Colour Guidelines If we pass this to something with just a single colour in the plot, it will just take the first colour from the sequence (NHS Blue). If we pass it to a plot that has categories, it will assign colours to categories in the order given in this list:
= ["#005EB8", "#FFB81C", "#00A499", "#41B6E6", "#AE2573", "#006747"]
nhs_colour_sequence = px.bar(
average_waits_fig # First we need to get the dataframe into the shape needed by the plot
# We start by dropping the utilisation columns from our dataframe
# as they're on a very different scale to the wait times
df_trial_results.drop(=["GP Utilisation - Percentage",
columns"Receptionist Utilisation - Percentage"])
# We then reset the index of the plot so the run number is
# a column rather than the index of the dataframe
=False)
.reset_index(drop# Finally, we use the melt function to turn this from a 'wide'
# dataframe (where we have a column for each of the different measures)
# to a 'long' dataframe where we have one row per run/metric combination.
# After melting, our original column names will be in a column entitled
# 'variable' and our actual wait times for each stage will be in a column
# # called 'value'
# (so a row might look like "1, Mean Queue Time Reg, 87" for the 'Run Number',
# 'variable' and 'value' columns respectively)
="Run Number"),
.melt(id_vars="value", # What's on the horizontal axis - this is the number of minutes
x="Run Number", # What's on the vertical axis
y="variable", # This will create a separate plot for each variable (here, the metric)
facet_col# Give the whole plot a title
="Average Waits (Minutes) For Each Stage of the Patient Journey - by Run",
title='h', # Set this to a horizontal bar plot (default is vertical)
orientation={"value": "Average Wait (Mins)"}, # Make the label on the x axis nicer
labels# Use our NHS colour palette; only the first colour will be used as we haven't
# made use of colour as a part of the visualisation in this plot, but this does mean
# that the bars will use the standard NHS blue rather than the plotly one
=nhs_colour_sequence
color_discrete_sequence )
After we use the px.bar function to create our plot, there will be a few additional things we want to do to the plot before displaying it. There is a limit to what can be done in the original function call as there are only so many parameters - these little extra touches just make the plot as readable and polished-looking as possible! This will tidy up the subtitles of each ‘facet’ within our plot (the mini-graph relating) to each of our metrics This uses what’s called a ‘lambda’ function, which is a little temporary function that in this case iterates through the annotation text and replaces the string ‘variable=’ with an empty string, which just tidies up the headers in this case so it only contains the actual name of the variable
lambda a: a.update(text=a.text.replace("variable=", ""))) average_waits_fig.for_each_annotation(
Here we are going to update the layout to ensure that we have a label for every run number in our y axis By default, plotly tries to intelligently choose a scale - but for this, it makes more sense to include a label for every row (unless we have lots of runs, in which case we won’t apply this correction)
if g.number_of_runs < 20:
= {'dtick': 1}) average_waits_fig.update_layout(yaxis
Finally, we force plotly to display the plot in the interactive window. If we don’t use this then only the final plotly plot we create will actually be displayed
average_waits_fig.show()
= px.bar(
performance_per_run_fig # First we need to get the dataframe into the shape needed by the plot
# We start by dropping the utilisation columns from our dataframe
# as they're on a very different scale to the wait times
df_trial_results.drop(=["GP Utilisation - Percentage",
columns"Receptionist Utilisation - Percentage"])
# We then reset the index of the plot so the run number is
# a column rather than the index of the dataframe
=False)
.reset_index(drop# This time we use a lambda function (a small temporary function)
# to look at each of our column names and replace the string
# 'Mean Queue Time ' with a blank string, which we want to do here
# as we're going to use those values as our x axis labels and it will
# get cluttered and hard to read with that phrase used (and we can just make
# it clear what each value is via other labels or the title)
=lambda x: re.sub('Mean Queue Time ', '', x))
.rename(columns# Finally, we reshape the dataframe from a wide to a long format
# (see the first plot for more details on this)
="Run Number"),
.melt(id_vars# This time we're going to facet (make mini sub-plots) by run instead - we're aiming to
# end up with a mini-plot per run to look at the performance on a run level rather than
# in the previous plot where we had more ability to look at the performance against a
# single metric across multiple runs - so even though we're using the same data here,
# the focus of the plot is slightly different
="Run Number",
facet_col=10, # Ensure that if we have lots of runs, our subplots don't become too small
facet_col_wrap="variable", # the column used for our horizontal axis
x="value", # the column used for our vertical axis
y# A title for the whole plot
="Average Waits (Minutes) For Each Stage of the Patient Journey - by Run",
title# Make use of our NHS colour scheme (again, as this plot will only use a single colour, it just
# uses the first colour from the list which is the NHS blue)
=nhs_colour_sequence,
color_discrete_sequence# Finally we tidy up the labels, replacing 'variable' with a blank string (as it's very clear
# from the category labels and the other labels on the plot what is displayed there
={"variable": "",
labels"value": "Queue Time (minutes)"
})
We cycle through and tidy up the display of the subheaders for the subplots
performance_per_run_fig.for_each_annotation(lambda a: a.update(text=a.text.replace("Run Number=", "Run "))
)
This time, as we have multiple x axes in the overall plot (one per subplot) we need to use a slightly different function to ensure every label will get displayed
lambda xaxis: xaxis.update(dtick=1))
performance_per_run_fig.for_each_xaxis(# Display the plot
performance_per_run_fig.show()
= px.box(
utilisation_boxplot_fig # First we need to get the dataframe into the shape needed by the plot
# We start by only selecting the utilisation columns by passing a list of
# the column names inside another set of square brackets
"GP Utilisation - Percentage",
(df_trial_results[["Receptionist Utilisation - Percentage"]]
# once again we want the run number to be a column, not the index
=False)
.reset_index(drop# and once again we want it in long format (see the first plot for details)
="Run Number")),
.melt(id_vars="value", # Make our horizontal axis display the % utilisation of the resource in the run
x="variable", # Make the y axis the utilisation category (will be our original column names)
y="all", # Force the boxplot to actually show the individual points too, not just a summary
points="Resource Utilisation", # Add a plot title
title# Force the plot to start at 0 regardless of the lowest utilisation recorded
# and finish just past 100 so that the higher points can be seen
=[0, 105],
range_x# Again, use our NHS colour paletted - this will just use NHS blue (the first colour in the list)
=nhs_colour_sequence,
color_discrete_sequence# Tidy up the x and y axis labels
={"variable": "",
labels"value": "Resource Utilisation Across Run (%)"
} )
We don’t need to do any additional tweaks to the plot this time - we can just display it straight away
utilisation_boxplot_fig.show()
We’re going to use the same data as for our boxplot, but we’re more interested in looking at the utilisation of resources within a single run rather than the consistency of resource use of a particular resource type, which the boxplot is better at demonstrating So once again - same data, different focus!
= px.bar(
utilisation_bar_fig # First we need to get the dataframe into the shape needed by the plot
# We start by only selecting the utilisation columns by passing a list of
# the column names inside another set of square brackets
"GP Utilisation - Percentage",
(df_trial_results[["Receptionist Utilisation - Percentage"]]
# once again we want the run number to be a column, not the index
=False)
.reset_index(drop# and once again we want it in long format (see the first plot for details)
="Run Number")),
.melt(id_vars="Run Number", # The value for our horizontal plot
x="value", # What will be displayed on the vertical axis (here, utilisation %)
y# This will colour the bars by a factor
# Here, because we melted our dataframe into long format, the values of the column 'variable'
# are the names of our original columns - i.e. "GP Utilisation - Percentage" or
# "Receptionist Utilisation - Percentage". We will automatically get a legend thanks to plotly.
="variable",
color# Force the bars to display side-by-side instead of on top of each other (which wouldn't really
# make sense in this graph)
="group",
barmode# Use our NHS colour palette - this time as we have two possible values in the column we coloured
# by, it will use the first two values in the colour palette (NHS blue and NHS warm yellow)
=nhs_colour_sequence,
color_discrete_sequence="Resource Utilisation",
title={"variable": "", # Remove the legend header - it's clear enough without it
labels"value": "Resource Utilisation Across Run (%)" # tidy up our y-axis label
} )
Ensure the run label appears on the x axis for each run unless there are lots of them, in which case we’ll just leave the value of dtick as the default (which means plotly will choose a sensible value for us)
if g.number_of_runs < 20:
= {'dtick': 1}) utilisation_bar_fig.update_layout(xaxis
Show the bar plot
utilisation_bar_fig.show()
It would be good to be able to display whether callers had their call answered or not - this can give us a quick overview of whether the system has been particularly overloaded on different runs. If a large number of callers never get their call answered, this suggests we need more receptionists (as they are the ones dealing will registration, test booking and calls in this model)
Adds a column for whether the call was answered We use np.where as a bit of an ‘if’/‘case when’ statement here
"Call Answered"] = np.where(
caller_results[# First, we check a condition - is the value in the 'call answered at' column
# NA/missing?
"Call Answered At"].isna(),
caller_results[# If it is, then it means we never recorded a 'call answered at' time because a receptionist
# resource never became free for this caller - so return the string below
"Call Not Answered Before Closing Time",
# If it is not na (i.e. the method .isna() returns False), then we can be confident that the
# call was answered
"Call Answered"
)
Now let’s group by run, keep just our new ‘call answered’ column, and count how many calls per run fell into each of these categories. As the ‘value_counts()’ method returns a pandas series instead of a pandas dataframe, we need to manually turn it back into a dataframe first
= pd.DataFrame(
calls_answered_df "Run")["Call Answered"].value_counts()
caller_results.groupby(# Finally, we reset the index (as due to grouping by 'Run' that will have been the index of
# the new column we created, but for plotting and pivoting purposes it's easier if that's an
# actual column instead)
=False) ).reset_index(drop
For display purposes, it would actually be easier to read if our dataframe was in ‘wide’ format - which will mean that we have a column for ‘call answered by closing time’ and a column for ‘call not answered before closing time’ and a row per run, with the cells then containing the count of calls per run falling into each of those categories We use the ‘pivot’ function for going from long to wide format
= calls_answered_df.pivot(
calls_answered_df_wide ="Run", columns="Call Answered", values="count"
index=False) ).reset_index(drop
Finally, let’s display this dataframe
print(calls_answered_df_wide)
Call Answered Run Call Answered Call Not Answered Before Closing Time
0 1 44 3
1 2 32 10
2 3 41 10
3 4 31 12
4 5 30 4
5 6 38 14
6 7 35 10
7 8 25 16
8 9 39 16
9 10 35 17
We can now use the long version of this dataframe to create a stacked bar plot exploring the total number of calls received - and those not answered - within the plot
= px.bar(
calls_answered_fig # we can just pass in our 'call_answered_df' without further modification
calls_answered_df,="Run", # The run should be the x axis
x="count", # The number of calls falling into each category should by the y axis
y="Call Answered", # This time we colour the dataframe by whether the call was answered or not
color# Tidy up the y axis label (x axis label and legend title are already fine)
={"count": "Number of Calls"},
labels# Pass in our colour sequence - the first category alphabetically will use colour 1,
# and the second category will use colour 2. If we had more categories, it would continue to
# make its way through the list of colours we defined
=nhs_colour_sequence,
color_discrete_sequence# Add a plot title
="Number of Calls - How Many Were Answered in Opening Hours?"
title )
Ensure each column has a number on the x axis (if there aren’t too many runs)
if g.number_of_runs < 20:
= {'dtick': 1}) calls_answered_fig.update_layout(xaxis
Show the plot
calls_answered_fig.show()
Finally, let’s make a scatterplot that can help us to just check that the patterns of arrivals across the day makes sense. Are the callers and patients arriving in an intermingled fashion and do we have some of each? This plot might be of more use for debugging than actually understanding the model behaviour - although it can also be useful to demonstrate that the arrival times are not fixed across the different runs, which can help people to understand the value and functioning of the model
We start by joining the patient and caller results together
= pd.concat([
calls_and_patients # we only want a few columns from each
"Run", "Arrival Time", "What"]],
patient_results[[# It's important that the columns are in the same order and have the same names
# as we are just going to stack them on top of each other
"Run", "Call Start Time", "What"]].rename(columns={"Call Start Time": "Arrival Time"})
caller_results[[ ])
Here we are going to use something called a strip plot, which is a scatterplot (a plot with a series of dots - but with some level of randomness on one axis to ensure points at exactly the same position don’t fully overlap)
= px.strip(
arrival_fig # We pass in the dataframe we just created
calls_and_patients,# We place the points horizontally depending on the time the individual caller or patient
# arrived in the model
="Arrival Time",
x# We then use the run number on the y axis, which will give us a line of points per run
="Run",
y# We'll use the colour to distinguish between patients and callers
="What",
color# We'll use our colour palette
=nhs_colour_sequence,
color_discrete_sequence# Finally, let's add a title
="Patient Arrivals by Time",
title={"Arrival Time": "Arrival Time (Simulation Minute)"}
labels
)# Force the maximum amount of jitter (random offset) in the points
=1.0)
arrival_fig.update_traces(jitter# Display the plot
arrival_fig.show()
We can also use a similar point to give an indication of at what point our system starts to overload during each run. Instead of displaying both patients and callers, we use just the callers this time
= px.strip(
call_answered_detailed_fig # We pass in the dataframe we just created
caller_results,# We place the points horizontally depending on the time the individual caller or patient
# arrived in the model
="Call Start Time",
x# We then use the run number on the y axis, which will give us a line of points per run
="Run",
y# We'll use the colour to distinguish between patients and callers
="Call Answered",
color# This time, instead of using our palette, let's explicitly map some colours to the possible
# values
# This allows us to ensure the 'not answered' gets associated with a typically 'bad' colour
={"Call Answered": "#005EB8", # NHS blue
color_discrete_map"Call Not Answered Before Closing Time": "#DA291C"}, # NHS Red
# Finally, let's add a title
="Patient Calls - Successful Answering over Time",
title# Make it clearer what the units of the x axis are
={"Call Start Time": "Call Start Time (Simulation Minute)"},
labels
)
call_answered_detailed_fig.show()