video_parser.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. from core.router import Parser
  2. from models.result import ParseResult
  3. from utils.logger import log
  4. from utils.ffmpeg_wrapper import FFmpegWrapper
  5. import os
  6. import tempfile
  7. import base64
  8. import requests
  9. from parsers.audio_parser import AudioParser
  10. class VideoParser(Parser):
  11. """视频文件解析器"""
  12. def __init__(self):
  13. self.ffmpeg = FFmpegWrapper()
  14. self.audio_parser = AudioParser()
  15. # Qwen3-VL模型配置
  16. self.qwen_api_url = "http://10.192.72.13:7280/v1/chat/completions"
  17. async def parse(self, file_path: str) -> ParseResult:
  18. """
  19. 解析视频文件
  20. Args:
  21. file_path: 文件路径
  22. Returns:
  23. ParseResult: 解析结果
  24. """
  25. log.info(f"开始解析视频文件: {file_path}")
  26. try:
  27. # 1. 提取音频轨道
  28. with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
  29. temp_audio_path = temp_file.name
  30. self.ffmpeg.extract_audio(file_path, temp_audio_path)
  31. log.info(f"音频提取完成: {temp_audio_path}")
  32. # 2. 使用AudioParser解析音频
  33. audio_result = await self.audio_parser.parse(temp_audio_path)
  34. log.info("音频解析完成")
  35. # 3. 提取关键帧
  36. frame_results = [] # 移到外部定义
  37. with tempfile.TemporaryDirectory() as temp_dir:
  38. keyframes = self.ffmpeg.extract_keyframes(file_path, temp_dir)
  39. log.info(f"关键帧提取完成,共{len(keyframes)}张")
  40. # 4. 使用Qwen3-VL解析关键帧
  41. for i, frame_path in enumerate(keyframes):
  42. try:
  43. frame_content = self._parse_frame_with_qwen(frame_path)
  44. log.info(f"解析关键帧 {i+1} 结果长度: {len(frame_content) if frame_content else 0}")
  45. if frame_content:
  46. # 计算关键帧的时间点(秒)
  47. time_second = i * 10 # 假设每10秒提取一个关键帧
  48. frame_results.append((time_second, frame_content))
  49. log.info(f"添加关键帧 {i+1} 到结果列表")
  50. else:
  51. log.warning(f"关键帧 {i+1} 解析结果为空")
  52. except Exception as e:
  53. log.warning(f"解析关键帧 {i+1} 失败: {str(e)}")
  54. log.info(f"关键帧解析完成,frame_results长度: {len(frame_results)}")
  55. # 5. 合并结果
  56. content = []
  57. content.append("# 音频内容")
  58. content.append(audio_result.content)
  59. if frame_results:
  60. log.info("开始添加画面内容到结果")
  61. content.append("\n# 画面内容")
  62. for time_second, frame_content in frame_results:
  63. content.append(f"\n## 第{time_second}秒")
  64. content.append(frame_content)
  65. log.info(f"添加第{time_second}秒画面内容,长度: {len(frame_content)}")
  66. else:
  67. log.warning("没有画面内容可以添加")
  68. # 清理临时文件
  69. if os.path.exists(temp_audio_path):
  70. os.remove(temp_audio_path)
  71. return ParseResult(
  72. content="\n".join(content),
  73. metadata={
  74. "parser": "VideoParser",
  75. "file_size": os.path.getsize(file_path),
  76. "audio_parser": "Qwen3-ASR",
  77. "visual_parser": "Qwen3-VL",
  78. "keyframe_count": len(keyframes)
  79. },
  80. file_type="video"
  81. )
  82. except Exception as e:
  83. log.error(f"视频文件解析失败: {str(e)}")
  84. # 清理临时文件
  85. if 'temp_audio_path' in locals() and os.path.exists(temp_audio_path):
  86. os.remove(temp_audio_path)
  87. return ParseResult(
  88. content="",
  89. metadata={"error": str(e)},
  90. file_type="video"
  91. )
  92. def _parse_frame_with_qwen(self, image_path: str) -> str:
  93. """
  94. 使用Qwen3-VL模型解析图片
  95. Args:
  96. image_path: 图片路径
  97. Returns:
  98. str: 解析结果
  99. """
  100. log.info(f"使用Qwen3-VL解析图片: {image_path}")
  101. # 编码图片
  102. with open(image_path, "rb") as f:
  103. base64_image = base64.b64encode(f.read()).decode("utf-8")
  104. # 发送请求
  105. payload = {
  106. "model": "/model",
  107. "messages": [{
  108. "role": "user",
  109. "content": [
  110. {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
  111. {"type": "text", "text": "详细描述这张图片的内容,包括人物、物体、场景、文字等所有可见信息"}
  112. ]
  113. }],
  114. "max_tokens": 512
  115. }
  116. response = requests.post(self.qwen_api_url, json=payload, timeout=120)
  117. response.raise_for_status()
  118. result = response.json()
  119. return result["choices"][0]["message"]["content"]