I've been working on a workforce scheduling problem using linear programming in Python with the PuLP library. I'm trying to assign employees to different brands, ensuring certain conditions are met. Here's a detailed explanation of the problem and the code I've implemented:
Problem Statement:
- Assign a fixed number of employees to different brands.
- Ensure that an employee works on only one brand per day.
- An employee can work a maximum of 9 hours and a minimum of 5 hours consecutively.
- Need to fulfill staffing requirements for each brand.
Code:
# Imports
import pulp
from pulp import LpMinimize, LpProblem, LpVariable, lpSum, LpMaximize
from pulp.apis import PULP_CBC_CMD
import pandas as pd
# Variable definition
# Define data
number_of_weeks = 1
days_per_week = 7
total_days = number_of_weeks * days_per_week
days = list(range(1, total_days + 1))
shop_open = 10 # 10 AM
shop_close = 20 # 8 PM
hours = list(range(shop_open, shop_close))
max_working_hours = 9
min_working_hours = 5
number_of_employee = 20
# 5-day or 4-day contract type (50% each for now)
five_days_count = int(number_of_employee * 0.5)
employees = list(range(1, number_of_employee + 1))
employee_type = ["5_days" if e <= five_days_count else "4_days" for e in employees]
categories = ["PHONE", "TV"]
brands = {
"PHONE": ["PHONE1", "PHONE2"],
"TV": ["TV1", "TV2", "TV3"]
}
list_of_brands = []
for key, values in brands.items():
list_of_brands.extend(values)
staffing_requirements = {
"PHONE": {"PHONE1": 1, "PHONE2": 1},
"TV": {"TV1": 1, "TV2": 1, "TV3": 1}
}
flat_staffing_requirements = {brand: req for category in staffing_requirements for brand, req in staffing_requirements[category].items()}
# Initialize the problem
model = LpProblem(name="workforce-planning", sense=LpMinimize)
# Variables
# Work on a day for a brand on a specific hour
x = LpVariable.dicts("x", [(e, d, h, b) for e in employees for d in days for h in hours for b in list_of_brands], cat="Binary")
# Work on a day for a brand
y = LpVariable.dicts("y", [(e, d, b) for e in employees for d in days for b in list_of_brands], cat="Binary")
# Gap
g = LpVariable.dicts("g", [(e, d, h, b) for e in employees for d in days for h in hours for b in list_of_brands], cat="Binary")
# Objective: Minimize employees working per day
model += lpSum(x[e, d, h, b] for e in employees for d in days for h in hours for b in list_of_brands)
# An employee can only work on one brand of a category per day
for e in employees:
for d in days:
# For each employee and day, the sum of all brands they work for should be at most 1.
# This ensures that an employee works for only one brand a day.
model += lpSum(y[e, d, b] for b in list_of_brands) <= 1
# Connect x to y
for e in employees:
for d in days:
for b in list_of_brands:
# If y is 1, then the employee can work for that brand up to max_working_hours.
# If y is 0, then they don't work for that brand.
model += lpSum(x[e, d, h, b] for h in hours) <= max_working_hours * y[e, d, b]
# If the employee works any hour for that brand, then y must be 1.
# This ensures that if an employee is working, they are assigned to a brand.
model += lpSum(x[e, d, h, b] for h in hours) >= y[e, d, b]
for e in employees:
for d in days:
for b in list_of_brands:
for i, h in enumerate(hours):
# 1. If a sequence begins at hour h, the following hours within max_working_hours (or until the end of the day)
# must also be worked hours. This ensures consecutive hours.
max_hrs = min(len(hours) - i, max_working_hours)
for j in range(max_hrs):
if i+j < len(hours):
# This constraint enforces that if g is 1 (sequence starts at hour h),
# the following hours are set to work hours.
model += x[e, d, hours[i+j], b] >= g[e, d, h, b]
# 2. If an employee works at a certain hour h, then a work sequence must have started
# at that hour or some previous hour within the range of max_working_hours. This
# constraint ties together the concept that a worked hour is part of some sequence.
# The sum checks for all possible starting hours of the sequence leading up to hour h.
model += x[e, d, h, b] <= lpSum(g[e, d, hours[i-k], b] for k in range(min(i+1, max_working_hours)))
# Staffing requirements for each hour
for d in days:
for h in hours:
for b in list_of_brands:
model += lpSum(x[e, d, h, b] for e in employees) == flat_staffing_requirements[b]
# Solve the model
model.solve(PULP_CBC_CMD(msg=1))
# Create an empty DataFrame with the desired structure
columns = []
for d in days:
columns.extend([f"Day_{d}_Start", f"Day_{d}_End", f"Day_{d}_Brand"])
df = pd.DataFrame(index=employees, columns=columns)
# Populate the DataFrame
for e in employees:
for d in days:
assigned_brand = None
start_time = None # Resetting for each employee and day combination
end_time = None # Resetting for each employee and day combination
for h in hours:
for b in list_of_brands:
if x[e, d, h, b].varValue == 1:
assigned_brand = b
if start_time is None: # First assignment for this day for this employee
start_time = h
end_time = h + 1
else:
start_time = min(start_time, h)
end_time = max(end_time, h + 1) # h+1 because if they start at a particular hour, they work that entire hour
# Use 'REST' if no assignment is detected
df.loc[e, f"Day_{d}_Start"] = start_time if assigned_brand else "REST"
df.loc[e, f"Day_{d}_End"] = end_time if assigned_brand else "REST"
df.loc[e, f"Day_{d}_Brand"] = assigned_brand if assigned_brand else "REST"
df.head(30)
The provided code works and provides an optimal solution when I don't consider the 5 hours minimum working constraint. The execution time is 17s, which seems a bit long for the dataset size. Here's a sample output:
| Day_1_Start | Day_1_End | Day_1_Brand |
|---|---|---|
| 19 | 20 | TV1 |
| 10 | 19 | TV1 |
| ... | ... | ... |
However, when I add the constraint to ensure a minimum of 5 hours of work, the problem becomes infeasible. Here are the changes I made to the original code :
# Connect x to y
for e in employees:
for d in days:
for b in list_of_brands:
# If y is 1, then the employee can work for that brand up to max_working_hours.
# If y is 0, then they don't work for that brand.
model += lpSum(x[e, d, h, b] for h in hours) <= max_working_hours * y[e, d, b]
model += lpSum(x[e, d, h, b] for h in hours) >= min_working_hours * y[e, d, b]
# If the employee works any hour for that brand, then y must be 1.
# This ensures that if an employee is working, they are assigned to a brand.
model += lpSum(x[e, d, h, b] for h in hours) >= y[e, d, b]
for e in employees:
for d in days:
for b in list_of_brands:
for i, h in enumerate(hours[:-4]): # Exclude the last 4 hours to prevent a sequence that doesn't respect the 5-hour minimum
# If a sequence starts at hour h, then the next 4 hours should also be worked hours to ensure 5 consecutive hours.
for j in range(5): # 5 hours including the current hour
model += x[e, d, hours[i+j], b] >= g[e, d, h, b]
# 2. If an employee works at a certain hour h, then a work sequence must have started
# at that hour or some previous hour within the range of max_working_hours. This
# constraint ties together the concept that a worked hour is part of some sequence.
# The sum checks for all possible starting hours of the sequence leading up to hour h.
model += x[e, d, h, b] <= lpSum(g[e, d, hours[i-k], b] for k in range(min(i+1, max_working_hours)))
When adding the 5-hour minimum working constraint, the problem turns infeasible, and I'm unsure why. I'd appreciate any insights or corrections on the approach or code. Thank you!