5 months ago · 4a9c9d1114
--- a/ocr_tools/universal_doc_parser/models/adapters/base.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/base.py
@@ -2,6 +2,10 @@ from abc import ABC, abstractmethod
 
				 from typing import Dict, Any, List, Union, Optional, Tuple
			
 
				 import numpy as np
			
 
				 from PIL import Image
			
 
				+from loguru import logger
			
 
				+from pathlib import Path
			
 
				+import cv2
			
 
				+import json
			
 
				 
			
 
				 class BaseAdapter(ABC):
			
 
				     """基础适配器接口"""
			
@@ -44,6 +48,18 @@ class BasePreprocessor(BaseAdapter):
 
				 class BaseLayoutDetector(BaseAdapter):
			
 
				     """版式检测器基类"""
			
 
				     
			
 
				+    def __init__(self, config: Dict[str, Any]):
			
 
				+        """初始化版式检测器
			
 
				+        
			
 
				+        Args:
			
 
				+            config: 配置字典
			
 
				+        """
			
 
				+        super().__init__(config)
			
 
				+        # 初始化 debug 相关属性（支持从配置或运行时设置）
			
 
				+        self.debug_mode = None  # 将在 detect() 方法中从配置读取
			
 
				+        self.output_dir = None  # 将在 detect() 方法中从配置读取
			
 
				+        self.page_name = None   # 将在 detect() 方法中从配置读取
			
 
				+    
			
 
				     def detect(
			
 
				         self, 
			
 
				         image: Union[np.ndarray, Image.Image],
			
@@ -66,6 +82,58 @@ class BaseLayoutDetector(BaseAdapter):
 
				         # 调用子类实现的原始检测方法
			
 
				         layout_results = self._detect_raw(image, ocr_spans)
			
 
				         
			
 
				+        # Debug 模式：打印和可视化后处理前的检测结果
			
 
				+        # 优先从实例属性读取（如果存在），否则从配置读取
			
 
				+        # 支持两种配置方式：debug_mode 或 debug_options.enabled
			
 
				+        debug_mode = getattr(self, 'debug_mode', None)
			
 
				+        if debug_mode is None:
			
 
				+            if hasattr(self, 'config'):
			
 
				+                # 优先从 debug_mode 读取
			
 
				+                debug_mode = self.config.get('debug_mode', False)
			
 
				+                # 如果没有 debug_mode，尝试从 debug_options.enabled 读取
			
 
				+                if not debug_mode:
			
 
				+                    debug_options = self.config.get('debug_options', {})
			
 
				+                    if isinstance(debug_options, dict):
			
 
				+                        debug_mode = debug_options.get('enabled', False)
			
 
				+            else:
			
 
				+                debug_mode = False
			
 
				+        
			
 
				+        if debug_mode:
			
 
				+            logger.debug(f"🔍 Layout detection raw results (before post-processing): {len(layout_results)} elements")
			
 
				+            # logger.debug(f"Raw layout_results: {layout_results}")
			
 
				+            # 可视化 layout 结果
			
 
				+            output_dir = getattr(self, 'output_dir', None)
			
 
				+            if output_dir is None:
			
 
				+                if hasattr(self, 'config'):
			
 
				+                    # 优先从 output_dir 读取
			
 
				+                    output_dir = self.config.get('output_dir', None)
			
 
				+                    # 如果没有 output_dir，尝试从 debug_options.output_dir 读取
			
 
				+                    if output_dir is None:
			
 
				+                        debug_options = self.config.get('debug_options', {})
			
 
				+                        if isinstance(debug_options, dict):
			
 
				+                            output_dir = debug_options.get('output_dir', None)
			
 
				+                else:
			
 
				+                    output_dir = None
			
 
				+            
			
 
				+            page_name = getattr(self, 'page_name', None)
			
 
				+            if page_name is None:
			
 
				+                if hasattr(self, 'config'):
			
 
				+                    # 优先从 page_name 读取
			
 
				+                    page_name = self.config.get('page_name', None)
			
 
				+                    # 如果没有 page_name，尝试从 debug_options.prefix 读取
			
 
				+                    if page_name is None:
			
 
				+                        debug_options = self.config.get('debug_options', {})
			
 
				+                        if isinstance(debug_options, dict):
			
 
				+                            prefix = debug_options.get('prefix', '')
			
 
				+                            page_name = prefix if prefix else 'layout_detection'
			
 
				+                    if page_name is None:
			
 
				+                        page_name = 'layout_detection'
			
 
				+                else:
			
 
				+                    page_name = 'layout_detection'
			
 
				+            
			
 
				+            if output_dir:
			
 
				+                self._visualize_layout_results(image, layout_results, output_dir, page_name, suffix='raw')
			
 
				+        
			
 
				         # 自动执行后处理
			
 
				         if layout_results:
			
 
				             layout_config = self.config.get('post_process', {}) if hasattr(self, 'config') else {}
			
@@ -132,7 +200,7 @@ class BaseLayoutDetector(BaseAdapter):
 
				                 return layout_results
			
 
				         
			
 
				         # 1. 去除重叠框
			
 
				-        layout_results = self._remove_overlapping_boxes(layout_results, CoordinateUtils)
			
 
				+        layout_results_removed_overlapping = self._remove_overlapping_boxes(layout_results, CoordinateUtils)
			
 
				         
			
 
				         # 2. 将大面积文本块转换为表格（如果配置启用）
			
 
				         layout_config = config if config is not None else {}
			
@@ -143,94 +211,15 @@ class BaseLayoutDetector(BaseAdapter):
 
				             else:
			
 
				                 h, w = image.shape[:2] if len(image.shape) >= 2 else (0, 0)
			
 
				             
			
 
				-            layout_results = self._convert_large_text_to_table(
			
 
				-                layout_results,
			
 
				+            layout_results_converted_large_text = self._convert_large_text_to_table(
			
 
				+                layout_results_removed_overlapping,
			
 
				                 (h, w),
			
 
				                 min_area_ratio=layout_config.get('min_text_area_ratio', 0.25),
			
 
				                 min_width_ratio=layout_config.get('min_text_width_ratio', 0.4),
			
 
				                 min_height_ratio=layout_config.get('min_text_height_ratio', 0.3)
			
 
				             )
			
 
				         
			
 
				-        return layout_results
			
 
				-    
			
 
				-    def _remove_overlapping_boxes(
			
 
				-        self,
			
 
				-        layout_results: List[Dict[str, Any]],
			
 
				-        coordinate_utils: Any,
			
 
				-        iou_threshold: float = 0.8,
			
 
				-        overlap_ratio_threshold: float = 0.8
			
 
				-    ) -> List[Dict[str, Any]]:
			
 
				-        """
			
 
				-        处理重叠的布局框（参考 MinerU 的去重策略）
			
 
				-        
			
 
				-        策略：
			
 
				-        1. 高 IoU 重叠：保留置信度高的框
			
 
				-        2. 包含关系：小框被大框高度包含时，保留大框并扩展边界
			
 
				-        """
			
 
				-        if not layout_results or len(layout_results) <= 1:
			
 
				-            return layout_results
			
 
				-        
			
 
				-        # 复制列表避免修改原数据
			
 
				-        results = [item.copy() for item in layout_results]
			
 
				-        need_remove = set()
			
 
				-        
			
 
				-        for i in range(len(results)):
			
 
				-            if i in need_remove:
			
 
				-                continue
			
 
				-                
			
 
				-            for j in range(i + 1, len(results)):
			
 
				-                if j in need_remove:
			
 
				-                    continue
			
 
				-                
			
 
				-                bbox1 = results[i].get('bbox', [0, 0, 0, 0])
			
 
				-                bbox2 = results[j].get('bbox', [0, 0, 0, 0])
			
 
				-                
			
 
				-                if len(bbox1) < 4 or len(bbox2) < 4:
			
 
				-                    continue
			
 
				-                
			
 
				-                # 计算 IoU
			
 
				-                iou = coordinate_utils.calculate_iou(bbox1, bbox2)
			
 
				-                
			
 
				-                if iou > iou_threshold:
			
 
				-                    # 高度重叠，保留置信度高的
			
 
				-                    score1 = results[i].get('confidence', results[i].get('score', 0))
			
 
				-                    score2 = results[j].get('confidence', results[j].get('score', 0))
			
 
				-                    
			
 
				-                    if score1 >= score2:
			
 
				-                        need_remove.add(j)
			
 
				-                    else:
			
 
				-                        need_remove.add(i)
			
 
				-                        break  # i 被移除，跳出内层循环
			
 
				-                else:
			
 
				-                    # 检查包含关系
			
 
				-                    overlap_ratio = coordinate_utils.calculate_overlap_ratio(bbox1, bbox2)
			
 
				-                    
			
 
				-                    if overlap_ratio > overlap_ratio_threshold:
			
 
				-                        # 小框被大框高度包含
			
 
				-                        area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				-                        area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
			
 
				-                        
			
 
				-                        if area1 <= area2:
			
 
				-                            small_idx, large_idx = i, j
			
 
				-                        else:
			
 
				-                            small_idx, large_idx = j, i
			
 
				-                        
			
 
				-                        # 扩展大框的边界
			
 
				-                        small_bbox = results[small_idx]['bbox']
			
 
				-                        large_bbox = results[large_idx]['bbox']
			
 
				-                        results[large_idx]['bbox'] = [
			
 
				-                            min(small_bbox[0], large_bbox[0]),
			
 
				-                            min(small_bbox[1], large_bbox[1]),
			
 
				-                            max(small_bbox[2], large_bbox[2]),
			
 
				-                            max(small_bbox[3], large_bbox[3])
			
 
				-                        ]
			
 
				-                        need_remove.add(small_idx)
			
 
				-                        
			
 
				-                        if small_idx == i:
			
 
				-                            break  # i 被移除，跳出内层循环
			
 
				-        
			
 
				-        # 返回去重后的结果
			
 
				-        return [results[i] for i in range(len(results)) if i not in need_remove]
			
 
				+        return layout_results_converted_large_text
			
 
				     
			
 
				     def _convert_large_text_to_table(
			
 
				         self,
			
@@ -324,6 +313,281 @@ class BaseLayoutDetector(BaseAdapter):
 
				             101: 'image_footnote'
			
 
				         }
			
 
				         return category_map.get(category_id, f'unknown_{category_id}')
			
 
				+    
			
 
				+    def _visualize_layout_results(
			
 
				+        self,
			
 
				+        image: Union[np.ndarray, Image.Image],
			
 
				+        layout_results: List[Dict[str, Any]],
			
 
				+        output_dir: str,
			
 
				+        page_name: str,
			
 
				+        suffix: str = 'raw'
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        可视化 layout 检测结果
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 输入图像
			
 
				+            layout_results: 布局检测结果
			
 
				+            output_dir: 输出目录
			
 
				+            page_name: 页面名称
			
 
				+            suffix: 文件名后缀（如 'raw', 'postprocessed'）
			
 
				+        """
			
 
				+        if not layout_results:
			
 
				+            return
			
 
				+        
			
 
				+        try:
			
 
				+            # 转换为 numpy 数组
			
 
				+            if isinstance(image, Image.Image):
			
 
				+                vis_image = np.array(image)
			
 
				+                if len(vis_image.shape) == 3 and vis_image.shape[2] == 3:
			
 
				+                    # PIL RGB -> OpenCV BGR
			
 
				+                    vis_image = cv2.cvtColor(vis_image, cv2.COLOR_RGB2BGR)
			
 
				+            else:
			
 
				+                vis_image = image.copy()
			
 
				+                if len(vis_image.shape) == 3 and vis_image.shape[2] == 3:
			
 
				+                    # 如果是 RGB，转换为 BGR
			
 
				+                    vis_image = cv2.cvtColor(vis_image, cv2.COLOR_RGB2BGR)
			
 
				+            
			
 
				+            # 定义类别颜色映射 (BGR格式)
			
 
				+            category_colors = {
			
 
				+                'table_body': (0, 0, 255),      # 红色
			
 
				+                'table_caption': (0, 0, 200),   # 暗红色
			
 
				+                'table_footnote': (0, 0, 150),  # 更暗的红色
			
 
				+                'text': (255, 0, 0),            # 蓝色
			
 
				+                'title': (0, 255, 255),         # 黄色
			
 
				+                'header': (255, 0, 255),        # 紫色
			
 
				+                'footer': (0, 165, 255),        # 橙色
			
 
				+                'image_body': (0, 255, 0),      # 绿色
			
 
				+                'image_caption': (0, 200, 0),   # 暗绿色
			
 
				+                'image_footnote': (0, 150, 0),  # 更暗的绿色
			
 
				+                'abandon': (128, 128, 128),     # 灰色
			
 
				+            }
			
 
				+            
			
 
				+            # 绘制检测框
			
 
				+            for result in layout_results:
			
 
				+                bbox = result.get('bbox', [])
			
 
				+                if not bbox or len(bbox) < 4:
			
 
				+                    continue
			
 
				+                
			
 
				+                category = result.get('category', 'unknown')
			
 
				+                color = category_colors.get(category, (128, 128, 128))  # 默认灰色
			
 
				+                thickness = 2
			
 
				+                
			
 
				+                x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
			
 
				+                cv2.rectangle(vis_image, (x1, y1), (x2, y2), color, thickness)
			
 
				+                
			
 
				+                # 添加类别标签
			
 
				+                label = f"{category}"
			
 
				+                confidence = result.get('confidence', result.get('score', 0))
			
 
				+                if confidence:
			
 
				+                    label += f":{confidence:.2f}"
			
 
				+                
			
 
				+                # 计算文本大小
			
 
				+                font = cv2.FONT_HERSHEY_SIMPLEX
			
 
				+                font_scale = 0.4
			
 
				+                text_thickness = 1
			
 
				+                (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, text_thickness)
			
 
				+                
			
 
				+                # 在框的上方绘制文本背景
			
 
				+                text_y = max(y1 - baseline - 1, text_height + baseline)
			
 
				+                cv2.rectangle(vis_image, (x1, text_y - text_height - baseline - 2), 
			
 
				+                            (x1 + text_width, text_y), color, -1)
			
 
				+                # 绘制文本
			
 
				+                cv2.putText(vis_image, label, (x1, text_y - baseline - 1), 
			
 
				+                          font, font_scale, (255, 255, 255), text_thickness)
			
 
				+            
			
 
				+            # 保存图像
			
 
				+            debug_dir = Path(output_dir) / "debug_comparison" / "layout_detection"
			
 
				+            debug_dir.mkdir(parents=True, exist_ok=True)
			
 
				+            output_path = debug_dir / f"{page_name}_layout_{suffix}.jpg"
			
 
				+            cv2.imwrite(str(output_path), vis_image)
			
 
				+            logger.info(f"📊 Saved layout detection image ({suffix}): {output_path}")
			
 
				+            
			
 
				+            # 保存 JSON 数据
			
 
				+            json_data = {
			
 
				+                'page_name': page_name,
			
 
				+                'suffix': suffix,
			
 
				+                'count': len(layout_results),
			
 
				+                'results': [
			
 
				+                    {
			
 
				+                        'category': r.get('category'),
			
 
				+                        'bbox': r.get('bbox'),
			
 
				+                        'confidence': r.get('confidence', r.get('score', 0.0))
			
 
				+                    }
			
 
				+                    for r in layout_results
			
 
				+                ]
			
 
				+            }
			
 
				+            json_path = debug_dir / f"{page_name}_layout_{suffix}.json"
			
 
				+            with open(json_path, 'w', encoding='utf-8') as f:
			
 
				+                json.dump(json_data, f, ensure_ascii=False, indent=2)
			
 
				+            logger.info(f"📊 Saved layout detection JSON ({suffix}): {json_path}")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"⚠️ Failed to visualize layout results: {e}")
			
 
				+    
			
 
				+    def _remove_overlapping_boxes(
			
 
				+        self,
			
 
				+        layout_results: List[Dict[str, Any]],
			
 
				+        coordinate_utils: Any,
			
 
				+        iou_threshold: float = 0.8,
			
 
				+        overlap_ratio_threshold: float = 0.8
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        改进版重叠框处理算法（基于优先级和决策规则的清晰算法）
			
 
				+        
			
 
				+        策略：
			
 
				+        1. 定义类别优先级（abandon < text/image < table_body）
			
 
				+        2. 使用统一的决策规则
			
 
				+        3. 按综合评分排序处理，优先保留大的聚合框
			
 
				+        
			
 
				+        Args:
			
 
				+            layout_results: 布局检测结果
			
 
				+            coordinate_utils: 坐标工具类
			
 
				+            iou_threshold: IoU阈值（默认0.8）
			
 
				+            overlap_ratio_threshold: 重叠比例阈值（默认0.8）
			
 
				+            
			
 
				+        Returns:
			
 
				+            去重后的布局结果
			
 
				+        """
			
 
				+        if not layout_results or len(layout_results) <= 1:
			
 
				+            return layout_results
			
 
				+        
			
 
				+        # 常量定义
			
 
				+        CATEGORY_PRIORITY = {
			
 
				+            'abandon': 0,
			
 
				+            'text': 1,
			
 
				+            'image_body': 1,
			
 
				+            'title': 2,
			
 
				+            'footer': 2,
			
 
				+            'header': 2,
			
 
				+            'table_body': 3,
			
 
				+        }
			
 
				+        AGGREGATE_LABELS = {'key-value region', 'form'}
			
 
				+        MAX_AREA = 4000000.0  # 用于面积归一化
			
 
				+        AREA_WEIGHT = 0.5
			
 
				+        CONFIDENCE_WEIGHT = 0.5
			
 
				+        AGGREGATE_BONUS = 0.1
			
 
				+        AREA_RATIO_THRESHOLD = 3.0  # 大框面积需大于小框的倍数
			
 
				+        
			
 
				+        def get_bbox_area(bbox: List[float]) -> float:
			
 
				+            """计算bbox面积"""
			
 
				+            if len(bbox) < 4:
			
 
				+                return 0.0
			
 
				+            return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
			
 
				+        
			
 
				+        def is_aggregate_type(box: Dict[str, Any]) -> bool:
			
 
				+            """检查是否是聚合类型"""
			
 
				+            original_label = box.get('raw', {}).get('original_label', '').lower()
			
 
				+            return original_label in AGGREGATE_LABELS
			
 
				+        
			
 
				+        def is_bbox_inside(inner: List[float], outer: List[float]) -> bool:
			
 
				+            """检查inner是否完全包含在outer内"""
			
 
				+            if len(inner) < 4 or len(outer) < 4:
			
 
				+                return False
			
 
				+            return (inner[0] >= outer[0] and inner[1] >= outer[1] and
			
 
				+                    inner[2] <= outer[2] and inner[3] <= outer[3])
			
 
				+        
			
 
				+        def calculate_composite_score(box: Dict[str, Any], area: float) -> float:
			
 
				+            """计算text类型的综合评分（面积+置信度）"""
			
 
				+            if box.get('category') != 'text':
			
 
				+                return box.get('confidence', box.get('score', 0))
			
 
				+            
			
 
				+            normalized_area = min(area / MAX_AREA, 1.0)
			
 
				+            area_score = (normalized_area ** 0.5) * AREA_WEIGHT
			
 
				+            confidence_score = box.get('confidence', box.get('score', 0)) * CONFIDENCE_WEIGHT
			
 
				+            bonus = AGGREGATE_BONUS if is_aggregate_type(box) else 0.0
			
 
				+            return area_score + confidence_score + bonus
			
 
				+        
			
 
				+        def should_keep_box1(box1: Dict[str, Any], box2: Dict[str, Any],
			
 
				+                             iou: float, overlap_ratio: float,
			
 
				+                             contained_1_in_2: bool, contained_2_in_1: bool) -> bool:
			
 
				+            """判断是否应该保留box1"""
			
 
				+            # 提取基本信息
			
 
				+            cat1, cat2 = box1.get('category', ''), box2.get('category', '')
			
 
				+            score1 = box1.get('confidence', box1.get('score', 0))
			
 
				+            score2 = box2.get('confidence', box2.get('score', 0))
			
 
				+            bbox1, bbox2 = box1.get('bbox', [0, 0, 0, 0]), box2.get('bbox', [0, 0, 0, 0])
			
 
				+            area1, area2 = get_bbox_area(bbox1), get_bbox_area(bbox2)
			
 
				+            is_agg1, is_agg2 = is_aggregate_type(box1), is_aggregate_type(box2)
			
 
				+            
			
 
				+            # 规则1: 类别优先级
			
 
				+            priority1 = CATEGORY_PRIORITY.get(cat1, 1)
			
 
				+            priority2 = CATEGORY_PRIORITY.get(cat2, 1)
			
 
				+            if priority1 != priority2:
			
 
				+                return priority1 > priority2
			
 
				+            
			
 
				+            # 规则2: 包含关系 + 聚合类型优先
			
 
				+            if contained_2_in_1 and is_agg1 and not is_agg2:
			
 
				+                return True
			
 
				+            if contained_1_in_2 and is_agg2 and not is_agg1:
			
 
				+                return False
			
 
				+            
			
 
				+            # 规则3: 包含关系 + 面积比例
			
 
				+            if contained_2_in_1 and area1 > area2 * AREA_RATIO_THRESHOLD:
			
 
				+                return True
			
 
				+            if contained_1_in_2 and area2 > area1 * AREA_RATIO_THRESHOLD:
			
 
				+                return False
			
 
				+            
			
 
				+            # 规则4: text类型使用综合评分
			
 
				+            if cat1 == 'text' or cat2 == 'text':
			
 
				+                comp_score1 = calculate_composite_score(box1, area1)
			
 
				+                comp_score2 = calculate_composite_score(box2, area2)
			
 
				+                if abs(comp_score1 - comp_score2) > 0.05:
			
 
				+                    return comp_score1 > comp_score2
			
 
				+            
			
 
				+            # 规则5: 置信度比较
			
 
				+            if abs(score1 - score2) > 0.1:
			
 
				+                return score1 > score2
			
 
				+            
			
 
				+            # 规则6: 面积比较
			
 
				+            return area1 >= area2
			
 
				+        
			
 
				+        # 主处理逻辑
			
 
				+        results = [item.copy() for item in layout_results]
			
 
				+        need_remove = set()
			
 
				+        
			
 
				+        # 按综合评分排序（高分优先）
			
 
				+        def get_sort_key(i: int) -> float:
			
 
				+            item = results[i]
			
 
				+            if item.get('category') == 'text':
			
 
				+                return -calculate_composite_score(item, get_bbox_area(item.get('bbox', [])))
			
 
				+            return -item.get('confidence', item.get('score', 0))
			
 
				+        
			
 
				+        sorted_indices = sorted(range(len(results)), key=get_sort_key)
			
 
				+        
			
 
				+        # 比较每对框
			
 
				+        for idx_i, i in enumerate(sorted_indices):
			
 
				+            if i in need_remove:
			
 
				+                continue
			
 
				+            
			
 
				+            for idx_j, j in enumerate(sorted_indices):
			
 
				+                if j == i or j in need_remove or idx_j >= idx_i:
			
 
				+                    continue
			
 
				+                
			
 
				+                bbox1, bbox2 = results[i].get('bbox', []), results[j].get('bbox', [])
			
 
				+                if len(bbox1) < 4 or len(bbox2) < 4:
			
 
				+                    continue
			
 
				+                
			
 
				+                # 计算重叠指标
			
 
				+                iou = coordinate_utils.calculate_iou(bbox1, bbox2)
			
 
				+                overlap_ratio = coordinate_utils.calculate_overlap_ratio(bbox1, bbox2)
			
 
				+                contained_1_in_2 = is_bbox_inside(bbox1, bbox2)
			
 
				+                contained_2_in_1 = is_bbox_inside(bbox2, bbox1)
			
 
				+                
			
 
				+                # 检查是否有显著重叠
			
 
				+                if not (iou > iou_threshold or overlap_ratio > overlap_ratio_threshold or
			
 
				+                       contained_1_in_2 or contained_2_in_1):
			
 
				+                    continue
			
 
				+                
			
 
				+                # 应用决策规则
			
 
				+                if should_keep_box1(results[i], results[j], iou, overlap_ratio,
			
 
				+                                   contained_1_in_2, contained_2_in_1):
			
 
				+                    need_remove.add(j)
			
 
				+                else:
			
 
				+                    need_remove.add(i)
			
 
				+                    break
			
 
				+        
			
 
				+        return [results[i] for i in range(len(results)) if i not in need_remove]
			
 
				 
			
 
				 class BaseVLRecognizer(BaseAdapter):
			
 
				     """VL识别器基类"""