上面关于背景等略
先简单说一下,数据生成,你先介绍这两个排队系统,然后我们对于文章提出的两种到达方式选择了分段到达的方式:

说一下参数:$lambda1$金额$lambda2$是两个到达率,随时间变化如图,$lambda=p \times lambda1+(1-p)\times lambda2$,$lambda$是E2到达率具体这一段看原文都有介绍。其次系统容量为:50,服务员人数:1,服务率:10 代码:
import simpy
import numpy as np
import pandas as pd
import os
# 定义时间变化的到达率函数
def lambda_1(t):
if 0 <= t < 2:
return 10
elif 2 <= t < 4:
return 7
elif 4 <= t < 6:
return 12
elif 6 <= t < 8:
return 16
elif 8 <= t < 10:
return 18
elif 10 <= t < 12:
return 13
elif 12 <= t < 14:
return 6
elif 14 <= t <= 16:
return 8
else:
return 0 # 超出定义范围的时间返回 0
def lambda_2(t):
if 0 <= t < 2:
return 2
elif 2 <= t < 4:
return 5
elif 4 <= t < 6:
return 8
elif 6 <= t < 8:
return 14
elif 8 <= t < 10:
return 12
elif 10 <= t < 12:
return 16
elif 12 <= t < 14:
return 9
elif 14 <= t <= 16:
return 6
else:
return 0 # 超出定义范围的时间返回 0
# 定义超指数(H2)到达时间分布
def H2_arrival(env, server, queue_capacity, service_rate, data, path_data, p):
while True:
if np.random.rand() < p:
rate = lambda_1(env.now)
else:
rate = lambda_2(env.now)
inter_arrival = np.random.exponential(1 / rate)
yield env.timeout(inter_arrival)
arrival_time = env.now
if len(server.queue) < queue_capacity:
req = server.request()
req.time_of_arrival = arrival_time
env.process(service(env, server, req, service_rate, arrival_time, data, path_data))
path_data.append({
'arrival_time': arrival_time,
'wait_time': None # 初始状态,等待时间未知
})
# 定义Erlang(E2)到达时间分布
def E2_arrival(env, server, queue_capacity, service_rate, data, path_data, p):
while True:
rate = p * lambda_1(env.now) + (1 - p) * lambda_2(env.now)
inter_arrival = np.random.exponential(1 / rate)
yield env.timeout(inter_arrival)
arrival_time = env.now
if len(server.queue) < queue_capacity:
req = server.request()
req.time_of_arrival = arrival_time
env.process(service(env, server, req, service_rate, arrival_time, data, path_data))
path_data.append({
'arrival_time': arrival_time,
'wait_time': None # 初始状态,等待时间未知
})
# 定义服务过程
def service(env, server, request, service_rate, arrival_time, data, path_data):
with request:
yield request
yield env.timeout(np.random.exponential(1 / service_rate))
departure_time = env.now
wait_time = departure_time - arrival_time
data.append({
'arrival_time': arrival_time,
'departure_time': departure_time,
'wait_time': wait_time,
'queue_length': len(server.queue)
})
# 更新path_data中的等待时间
for entry in path_data:
if entry['arrival_time'] == arrival_time and entry['wait_time'] is None:
entry['wait_time'] = wait_time
break
# 定义记录每个时间点的等待时间
def record_waiting_time(env, server, waiting_times):
while True:
current_time = int(env.now)
total_wait_time = 0
count = 0
for request in server.queue:
total_wait_time += env.now - request.time_of_arrival
count += 1
for request in server.users:
total_wait_time += env.now - request.time_of_arrival
count += 1
avg_wait_time = total_wait_time / count if count > 0 else 0
waiting_times[current_time] = avg_wait_time
yield env.timeout(1) # 每1个时间单位记录一次
# 仿真函数
def simulate_queue(arrival_process, p, service_rate, num_servers, queue_capacity, sim_time):
env = simpy.Environment()
server = simpy.Resource(env, capacity=num_servers)
data = []
path_data = []
waiting_times = {}
if arrival_process == 'H2':
env.process(H2_arrival(env, server, queue_capacity, service_rate, data, path_data, p))
elif arrival_process == 'E2':
env.process(E2_arrival(env, server, queue_capacity, service_rate, data, path_data, p))
env.process(record_waiting_time(env, server, waiting_times))
env.run(until=sim_time)
df = pd.DataFrame(data)
df_path = pd.DataFrame(path_data)
return df, df_path, waiting_times
# 参数设置
service_rate = 10
num_servers = 1
queue_capacity = 50
sim_time = 16
n = 10
p = 0.4
# 创建文件夹以保存结果
if not os.path.exists('./virtual_queue_H2'):
os.makedirs('./virtual_queue_H2')
if not os.path.exists('./virtual_queue_E2'):
os.makedirs('./virtual_queue_E2')
# 运行仿真并保存结果
waiting_times_H2_all = []
waiting_times_E2_all = []
for i in range(n):
df_H2, df_H2_path, waiting_times_H2 = simulate_queue('H2', p, service_rate, num_servers, queue_capacity, sim_time)
df_H2.to_csv(f'./virtual_queue_H2/virtual_queue_H2_results{i}.csv', index=False)
df_H2_path.to_csv(f'./virtual_queue_H2/virtual_queue_H2_path{i}.csv', index=False)
waiting_times_H2_all.append(waiting_times_H2)
for i in range(n):
df_E2, df_E2_path, waiting_times_E2 = simulate_queue('E2', p, service_rate, num_servers, queue_capacity, sim_time)
df_E2.to_csv(f'./virtual_queue_E2/virtual_queue_E2_results{i}.csv', index=False)
df_E2_path.to_csv(f'./virtual_queue_E2/virtual_queue_E2_path{i}.csv', index=False)
waiting_times_E2_all.append(waiting_times_E2)
# 计算t=1,2,...,15的平均等待时间
average_waiting_times_H2 = {t: np.mean([wt[t] for wt in waiting_times_H2_all if t in wt]) for t in range(1, 16)}
average_waiting_times_E2 = {t: np.mean([wt[t] for wt in waiting_times_E2_all if t in wt]) for t in range(1, 16)}
# 保存平均等待时间到文件
df_avg_waiting_times_H2 = pd.DataFrame(list(average_waiting_times_H2.items()), columns=['time', 'average_waiting_time'])
df_avg_waiting_times_E2 = pd.DataFrame(list(average_waiting_times_E2.items()), columns=['time', 'average_waiting_time'])
df_avg_waiting_times_H2.to_csv('./virtual_queue_H2/average_waiting_times_H2.csv', index=False)
df_avg_waiting_times_E2.to_csv('./virtual_queue_E2/average_waiting_times_E2.csv', index=False)
下面给出两幅图,这是对两个系统模拟之后产生的等待时间和到达时间的散点图,上面是H2下面是E2:


knn算法k的选取至关重要,具体说明。。。
我们一开始采用遍历的方法去确定最优k的位置得到:

可以看到。。。
但是,遍历这种方式去寻找最优k值会浪费大量的时间,后来我们使用贝叶斯优化去寻找最优k值
介绍贝叶斯优化。。。这里最好补充一下伪代码。
时间上确实取得了优势,在搜索范围均为(1,1000)时,采取遍历的方法用时将近10分钟,但是在使用了贝叶斯搜索之后,用时只有19.3469秒
knn以及寻找最优k代码:
import numpy as np
import pandas as pd
import os
from skopt import gp_minimize
# 设置搜索范围
timepoint_K = range(1, 400)
timepoint = list(range(1, 16))
n = 10
# 准备测试集
test_H2 = pd.read_csv('./virtual_queue_H2/virtual_queue_H2_results'+str(n-1)+'.csv')
test_E2 = pd.read_csv('./virtual_queue_E2/virtual_queue_E2_results'+str(n-1)+'.csv')
length_H2 = test_H2.shape[0]
length_E2 = test_E2.shape[0]
# 合并训练集
dfs = []
for i in range(n-1):
df = pd.read_csv('./virtual_queue_H2/virtual_queue_H2_results'+str(i)+'.csv')
dfs.append(df)
data_train_H2 = pd.concat(dfs, ignore_index=True)
data_train_H2.sort_values(by='arrival_time', inplace=True)
dfs = []
for i in range(n-1):
df = pd.read_csv('./virtual_queue_E2/virtual_queue_E2_results'+str(i)+'.csv')
dfs.append(df)
data_train_E2 = pd.concat(dfs, ignore_index=True)
data_train_E2.sort_values(by='arrival_time', inplace=True)
# 在测试集使用knn计算virtual_wait_time
def knn(data_Train, Time, k):
number = data_Train.shape[0]
data_Train.loc[:, 'time_diff'] = np.abs(data_Train['arrival_time']-Time)
data_Train_sorted = data_Train.sort_values(by='time_diff')
data_Train_near = data_Train_sorted.iloc[:k].copy()
virtual_wait_time = data_Train_near['wait_time'].mean()
return virtual_wait_time
# 目标函数
def objective_function_H2(k):
MSE = 0
for i in range(length_H2):
time = test_H2.loc[i, 'arrival_time']
virtual_wait_time = knn(data_train_H2, time, int(k[0]))
actual_wait_time = test_H2.loc[i, 'wait_time']
MSE += (virtual_wait_time - actual_wait_time) ** 2
return np.sqrt(MSE / length_H2)
def objective_function_E2(k):
MSE = 0
for i in range(length_E2):
time = test_E2.loc[i, 'arrival_time']
virtual_wait_time = knn(data_train_E2, time, int(k[0]))
actual_wait_time = test_E2.loc[i, 'wait_time']
MSE += (virtual_wait_time - actual_wait_time) ** 2
return np.sqrt(MSE / length_E2)
# 贝叶斯优化寻找最优k
space = [(1, 1000)]
result_H2 = gp_minimize(objective_function_H2, space, n_calls=20)
result_E2 = gp_minimize(objective_function_E2, space, n_calls=20)
# 输出结果
print("best_k_H2:", result_H2.x)
print("MSE_H2:", result_H2.fun)
print("best_k_E2:", result_E2.x)
print("MSE_E2:", result_E2.fun)
| n | best_k_H2 | best_k_E2 |
|---|---|---|
| 10 | 47 | 11 |
| 25 | 120 | 124 |
| 50 | 113 | 109 |
| 100 | 282 | 184 |
