|
@@ -2,13 +2,8 @@ package cn.com.yusys.manager.service;
|
|
|
|
|
|
|
|
import cn.com.yusys.manager.common.ParseInstanceStatusRegistry;
|
|
import cn.com.yusys.manager.common.ParseInstanceStatusRegistry;
|
|
|
import cn.com.yusys.manager.config.ParserConfig;
|
|
import cn.com.yusys.manager.config.ParserConfig;
|
|
|
-import cn.com.yusys.manager.model.ExecuteResponse;
|
|
|
|
|
-import cn.com.yusys.manager.model.InstanceStatus;
|
|
|
|
|
-import cn.com.yusys.manager.model.InstanceStatusResponse;
|
|
|
|
|
-import cn.com.yusys.manager.instanceManager.Impl.DockerInstanceManager;
|
|
|
|
|
-import cn.com.yusys.manager.model.Task;
|
|
|
|
|
-import cn.com.yusys.manager.model.InstanceManagementResponse;
|
|
|
|
|
-import cn.com.yusys.manager.model.InstanceConfigRequest;
|
|
|
|
|
|
|
+import cn.com.yusys.manager.instanceManager.InstanceManager;
|
|
|
|
|
+import cn.com.yusys.manager.model.*;
|
|
|
import cn.com.yusys.manager.util.ParseInstanceClient;
|
|
import cn.com.yusys.manager.util.ParseInstanceClient;
|
|
|
import cn.com.yusys.manager.common.PortPool;
|
|
import cn.com.yusys.manager.common.PortPool;
|
|
|
import lombok.RequiredArgsConstructor;
|
|
import lombok.RequiredArgsConstructor;
|
|
@@ -45,9 +40,9 @@ public class InstanceMonitorService {
|
|
|
@Resource
|
|
@Resource
|
|
|
private final ParserConfig parserConfig;
|
|
private final ParserConfig parserConfig;
|
|
|
|
|
|
|
|
- //docker实例管理器
|
|
|
|
|
|
|
+ //实例管理器
|
|
|
@Resource
|
|
@Resource
|
|
|
- private DockerInstanceManager dockerInstanceManager;
|
|
|
|
|
|
|
+ private InstanceManager instanceManager;
|
|
|
|
|
|
|
|
// 注入独立的端口池管理工具类
|
|
// 注入独立的端口池管理工具类
|
|
|
@Resource
|
|
@Resource
|
|
@@ -57,6 +52,10 @@ public class InstanceMonitorService {
|
|
|
@Resource
|
|
@Resource
|
|
|
private TaskLogService taskLogService;
|
|
private TaskLogService taskLogService;
|
|
|
|
|
|
|
|
|
|
+ // 注入任务记录服务
|
|
|
|
|
+ @Resource
|
|
|
|
|
+ private TaskRecordService taskRecordService;
|
|
|
|
|
+
|
|
|
// 最大重试次数
|
|
// 最大重试次数
|
|
|
@org.springframework.beans.factory.annotation.Value("${parser.task.max-retry:3}")
|
|
@org.springframework.beans.factory.annotation.Value("${parser.task.max-retry:3}")
|
|
|
private int maxRetry;
|
|
private int maxRetry;
|
|
@@ -69,14 +68,14 @@ public class InstanceMonitorService {
|
|
|
// 4. 使用PortPool分配端口
|
|
// 4. 使用PortPool分配端口
|
|
|
Integer port = portPool.allocatePort();
|
|
Integer port = portPool.allocatePort();
|
|
|
if(port != null){
|
|
if(port != null){
|
|
|
- String containerId = dockerInstanceManager.startParseInstance(parserConfig.IMAGE_NAME, port);
|
|
|
|
|
|
|
+ String instanceId = instanceManager.startParseInstance(port);
|
|
|
// 增加容器ID空值校验
|
|
// 增加容器ID空值校验
|
|
|
- if (containerId == null || containerId.isEmpty()) {
|
|
|
|
|
- log.error("初始化实例失败:Docker容器创建失败,端口:{}", port);
|
|
|
|
|
|
|
+ if (instanceId == null || instanceId.isEmpty()) {
|
|
|
|
|
+ log.error("初始化实例失败:进程创建失败,端口:{}", port);
|
|
|
portPool.releasePort(port); // 归还端口
|
|
portPool.releasePort(port); // 归还端口
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- InstanceStatus instanceStatus = saveInstanceStatus(containerId, port);
|
|
|
|
|
|
|
+ InstanceStatus instanceStatus = saveInstanceStatus(instanceId, port);
|
|
|
} else {
|
|
} else {
|
|
|
log.error("初始化实例失败:无可用端口");
|
|
log.error("初始化实例失败:无可用端口");
|
|
|
break;
|
|
break;
|
|
@@ -130,11 +129,11 @@ public class InstanceMonitorService {
|
|
|
activeInstancePool.remove(instanceId);
|
|
activeInstancePool.remove(instanceId);
|
|
|
// 使用PortPool释放端口
|
|
// 使用PortPool释放端口
|
|
|
portPool.releasePort(state.getPort());
|
|
portPool.releasePort(state.getPort());
|
|
|
- // 捕获Docker操作异常
|
|
|
|
|
|
|
+ // 捕获进程操作异常
|
|
|
try {
|
|
try {
|
|
|
- dockerInstanceManager.terminateInstance(state.getContainerId());
|
|
|
|
|
|
|
+ instanceManager.terminateInstance(instanceId);
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
|
- log.error("终止实例{}的Docker容器失败", instanceId, e);
|
|
|
|
|
|
|
+ log.error("终止实例{}的进程失败", instanceId, e);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
});
|
|
});
|
|
@@ -222,14 +221,14 @@ public class InstanceMonitorService {
|
|
|
// 使用PortPool分配端口
|
|
// 使用PortPool分配端口
|
|
|
Integer port = portPool.allocatePort();
|
|
Integer port = portPool.allocatePort();
|
|
|
if(port != null){
|
|
if(port != null){
|
|
|
- String containerId = dockerInstanceManager.startParseInstance(parserConfig.IMAGE_NAME, port);
|
|
|
|
|
- // 增加容器ID空值校验
|
|
|
|
|
- if (containerId == null || containerId.isEmpty()) {
|
|
|
|
|
- log.error("创建实例失败:Docker容器创建失败,端口:{}", port);
|
|
|
|
|
|
|
+ String instanceId = instanceManager.startParseInstance(port);
|
|
|
|
|
+ // 增加实例ID空值校验
|
|
|
|
|
+ if (instanceId == null || instanceId.isEmpty()) {
|
|
|
|
|
+ log.error("创建实例失败:进程创建失败,端口:{}", port);
|
|
|
portPool.releasePort(port); // 归还端口
|
|
portPool.releasePort(port); // 归还端口
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- InstanceStatus instanceStatus = saveInstanceStatus(containerId, port);
|
|
|
|
|
|
|
+ InstanceStatus instanceStatus = saveInstanceStatus(instanceId, port);
|
|
|
} else {
|
|
} else {
|
|
|
log.error("创建实例失败:无可用端口");
|
|
log.error("创建实例失败:无可用端口");
|
|
|
break;
|
|
break;
|
|
@@ -317,17 +316,19 @@ public class InstanceMonitorService {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//保存实例状态
|
|
//保存实例状态
|
|
|
- private InstanceStatus saveInstanceStatus(String containerId,Integer port) {
|
|
|
|
|
|
|
+ private InstanceStatus saveInstanceStatus(String instanceId, Integer port) {
|
|
|
InstanceStatus instanceStatus = new InstanceStatus();
|
|
InstanceStatus instanceStatus = new InstanceStatus();
|
|
|
instanceStatus.setIp("127.0.0.1");
|
|
instanceStatus.setIp("127.0.0.1");
|
|
|
instanceStatus.setPort(port);
|
|
instanceStatus.setPort(port);
|
|
|
instanceStatus.setLastHeartbeatTime(System.currentTimeMillis());
|
|
instanceStatus.setLastHeartbeatTime(System.currentTimeMillis());
|
|
|
instanceStatus.setStatus(0);
|
|
instanceStatus.setStatus(0);
|
|
|
|
|
+ instanceStatus.setInstanceId(instanceId);
|
|
|
|
|
+ // 获取并设置进程PID
|
|
|
|
|
+ Long pid = instanceManager.getPid(instanceId);
|
|
|
|
|
+ instanceStatus.setPid(pid);
|
|
|
|
|
|
|
|
Map<String, InstanceStatus> activeInstancePool = instancestatusRegistry.getActiveInstancePool();
|
|
Map<String, InstanceStatus> activeInstancePool = instancestatusRegistry.getActiveInstancePool();
|
|
|
-
|
|
|
|
|
- instanceStatus.setContainerId(containerId);
|
|
|
|
|
- activeInstancePool.put(containerId, instanceStatus);
|
|
|
|
|
|
|
+ activeInstancePool.put(instanceId, instanceStatus);
|
|
|
return instanceStatus;
|
|
return instanceStatus;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -343,15 +344,15 @@ public class InstanceMonitorService {
|
|
|
// 使用PortPool分配端口
|
|
// 使用PortPool分配端口
|
|
|
Integer port = portPool.allocatePort();
|
|
Integer port = portPool.allocatePort();
|
|
|
if(port != null){
|
|
if(port != null){
|
|
|
- String containerId = dockerInstanceManager.startParseInstance(parserConfig.IMAGE_NAME, port);
|
|
|
|
|
- // 增加容器ID空值校验
|
|
|
|
|
- if (containerId == null || containerId.isEmpty()) {
|
|
|
|
|
- log.error("GPU扩容实例失败:Docker容器创建失败,端口:{}", port);
|
|
|
|
|
|
|
+ String instanceId = instanceManager.startParseInstance(port);
|
|
|
|
|
+ // 增加实例ID空值校验
|
|
|
|
|
+ if (instanceId == null || instanceId.isEmpty()) {
|
|
|
|
|
+ log.error("GPU扩容实例失败:进程创建失败,端口:{}", port);
|
|
|
portPool.releasePort(port); // 归还端口
|
|
portPool.releasePort(port); // 归还端口
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- InstanceStatus instanceStatus = saveInstanceStatus(containerId, port);
|
|
|
|
|
- log.info("基于GPU负载扩容,已创建实例,容器ID:{},端口:{}", containerId, port);
|
|
|
|
|
|
|
+ InstanceStatus instanceStatus = saveInstanceStatus(instanceId, port);
|
|
|
|
|
+ log.info("基于GPU负载扩容,已创建实例,实例ID:{},端口:{}", instanceId, port);
|
|
|
} else {
|
|
} else {
|
|
|
log.warn("端口池已满,无法继续GPU扩容");
|
|
log.warn("端口池已满,无法继续GPU扩容");
|
|
|
break;
|
|
break;
|
|
@@ -387,10 +388,10 @@ public class InstanceMonitorService {
|
|
|
activeInstancePool.remove(instanceId);
|
|
activeInstancePool.remove(instanceId);
|
|
|
// 释放端口
|
|
// 释放端口
|
|
|
portPool.releasePort(instanceStatus.getPort());
|
|
portPool.releasePort(instanceStatus.getPort());
|
|
|
- // 关闭Docker容器
|
|
|
|
|
- dockerInstanceManager.terminateInstance(instanceStatus.getContainerId());
|
|
|
|
|
|
|
+ // 关闭进程
|
|
|
|
|
+ instanceManager.terminateInstance(instanceId);
|
|
|
|
|
|
|
|
- log.info("缩容实例{}成功,已关闭容器并释放端口{}", instanceId, instanceStatus.getPort());
|
|
|
|
|
|
|
+ log.info("缩容实例{}成功,已关闭进程并释放端口{}", instanceId, instanceStatus.getPort());
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
|
log.error("缩容实例{}失败", instanceId, e);
|
|
log.error("缩容实例{}失败", instanceId, e);
|
|
|
// 缩容失败时,将实例重新加入活跃池(避免端口丢失)
|
|
// 缩容失败时,将实例重新加入活跃池(避免端口丢失)
|
|
@@ -408,6 +409,12 @@ public class InstanceMonitorService {
|
|
|
String taskId = task.getTaskId();
|
|
String taskId = task.getTaskId();
|
|
|
|
|
|
|
|
try {
|
|
try {
|
|
|
|
|
+ TaskRecordRequest taskRecordRequest=new TaskRecordRequest();
|
|
|
|
|
+ taskRecordRequest.setTaskId(taskId);
|
|
|
|
|
+ taskRecordRequest.setStatus(1);
|
|
|
|
|
+ // 更新任务状态为等待解析
|
|
|
|
|
+ taskRecordService.updateStatus(taskRecordRequest);
|
|
|
|
|
+
|
|
|
// 记录任务开始日志
|
|
// 记录任务开始日志
|
|
|
taskLogService.logTaskStart(taskId, task.getFilePath());
|
|
taskLogService.logTaskStart(taskId, task.getFilePath());
|
|
|
|
|
|
|
@@ -416,17 +423,33 @@ public class InstanceMonitorService {
|
|
|
if (idleInstance == null) {
|
|
if (idleInstance == null) {
|
|
|
log.debug("当前无空闲解析实例");
|
|
log.debug("当前无空闲解析实例");
|
|
|
taskLogService.logTaskFailure(taskId, "当前无空闲解析实例");
|
|
taskLogService.logTaskFailure(taskId, "当前无空闲解析实例");
|
|
|
|
|
+ // 更新任务状态为解析失败
|
|
|
|
|
+ taskRecordRequest.setStatus(4);
|
|
|
|
|
+ taskRecordService.updateStatus(taskRecordRequest);
|
|
|
return ExecuteResponse.fail(300,"当前无空闲解析实例");
|
|
return ExecuteResponse.fail(300,"当前无空闲解析实例");
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // 更新任务状态为解析中
|
|
|
|
|
+ taskRecordRequest.setStatus(2);
|
|
|
|
|
+ taskRecordService.updateStatus(taskRecordRequest);
|
|
|
|
|
+
|
|
|
// 记录实例分配日志
|
|
// 记录实例分配日志
|
|
|
- taskLogService.logInstanceAllocation(taskId, idleInstance.getContainerId());
|
|
|
|
|
|
|
+ taskLogService.logInstanceAllocation(taskId, idleInstance.getInstanceId());
|
|
|
|
|
|
|
|
// 执行任务解析
|
|
// 执行任务解析
|
|
|
ExecuteResponse response = executeTaskWithRetry(idleInstance, task);
|
|
ExecuteResponse response = executeTaskWithRetry(idleInstance, task);
|
|
|
|
|
|
|
|
|
|
+ // 根据执行结果更新任务状态
|
|
|
|
|
+ if ( response.getCode() == 200) {
|
|
|
|
|
+ taskRecordRequest.setStatus(3);
|
|
|
|
|
+ taskRecordService.updateStatus(taskRecordRequest); // 解析成功
|
|
|
|
|
+ } else {
|
|
|
|
|
+ taskRecordRequest.setStatus(4);
|
|
|
|
|
+ taskRecordService.updateStatus(taskRecordRequest); // 解析失败
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
// 记录任务完成日志
|
|
// 记录任务完成日志
|
|
|
- if (response != null && response.getCode() == 200) {
|
|
|
|
|
|
|
+ if (response.getCode() == 200) {
|
|
|
taskLogService.logTaskComplete(taskId, response.getMessage());
|
|
taskLogService.logTaskComplete(taskId, response.getMessage());
|
|
|
} else {
|
|
} else {
|
|
|
taskLogService.logTaskFailure(taskId, response != null ? response.getMessage() : "未知错误");
|
|
taskLogService.logTaskFailure(taskId, response != null ? response.getMessage() : "未知错误");
|
|
@@ -437,6 +460,11 @@ public class InstanceMonitorService {
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
|
log.error("多模态任务解析定时任务执行失败", e);
|
|
log.error("多模态任务解析定时任务执行失败", e);
|
|
|
taskLogService.logTaskFailure(taskId, e.getMessage());
|
|
taskLogService.logTaskFailure(taskId, e.getMessage());
|
|
|
|
|
+ // 更新任务状态为解析失败
|
|
|
|
|
+ TaskRecordRequest taskRecordRequest=new TaskRecordRequest();
|
|
|
|
|
+ taskRecordRequest.setTaskId(taskId);
|
|
|
|
|
+ taskRecordRequest.setStatus(4);
|
|
|
|
|
+ taskRecordService.updateStatus(taskRecordRequest);
|
|
|
return ExecuteResponse.fail("多模态任务解析定时任务执行失败");
|
|
return ExecuteResponse.fail("多模态任务解析定时任务执行失败");
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -456,7 +484,7 @@ public class InstanceMonitorService {
|
|
|
* 执行任务并处理重试逻辑
|
|
* 执行任务并处理重试逻辑
|
|
|
*/
|
|
*/
|
|
|
private ExecuteResponse executeTaskWithRetry(InstanceStatus instance, Task task) {
|
|
private ExecuteResponse executeTaskWithRetry(InstanceStatus instance, Task task) {
|
|
|
- String instanceId = instance.getContainerId();
|
|
|
|
|
|
|
+ String instanceId = instance.getInstanceId();
|
|
|
int retryCount = 0;
|
|
int retryCount = 0;
|
|
|
// 标记实例为运行中
|
|
// 标记实例为运行中
|
|
|
instance.setStatus(1);
|
|
instance.setStatus(1);
|
|
@@ -502,11 +530,11 @@ public class InstanceMonitorService {
|
|
|
task);
|
|
task);
|
|
|
|
|
|
|
|
if (response == null || response.getCode() != 200) {
|
|
if (response == null || response.getCode() != 200) {
|
|
|
- log.warn("调用解析器返回失败,实例:{},响应:{}", instance.getContainerId(), response);
|
|
|
|
|
|
|
+ log.warn("调用解析器返回失败,实例:{},响应:{}", instance.getInstanceId(), response);
|
|
|
}
|
|
}
|
|
|
return response;
|
|
return response;
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
|
- log.error("调用解析器失败,实例:{}", instance.getContainerId(), e);
|
|
|
|
|
|
|
+ log.error("调用解析器失败,实例:{}", instance.getInstanceId(), e);
|
|
|
throw e;
|
|
throw e;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -533,7 +561,7 @@ public class InstanceMonitorService {
|
|
|
// 构建实例详情列表
|
|
// 构建实例详情列表
|
|
|
List<InstanceManagementResponse.InstanceDetail> instanceDetails = activeInstancePool.values().stream()
|
|
List<InstanceManagementResponse.InstanceDetail> instanceDetails = activeInstancePool.values().stream()
|
|
|
.map(status -> InstanceManagementResponse.InstanceDetail.builder()
|
|
.map(status -> InstanceManagementResponse.InstanceDetail.builder()
|
|
|
- .containerId(status.getContainerId())
|
|
|
|
|
|
|
+ .instanceId(status.getInstanceId())
|
|
|
.ip(status.getIp())
|
|
.ip(status.getIp())
|
|
|
.port(status.getPort())
|
|
.port(status.getPort())
|
|
|
.status(status.getStatus())
|
|
.status(status.getStatus())
|
|
@@ -591,4 +619,5 @@ public class InstanceMonitorService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-}
|
|
|
|
|
|
|
+}
|
|
|
|
|
+
|