Procházet zdrojové kódy

增强HTML表格显示功能,支持横向滚动和多种显示模式,优化表格信息展示

zhch158_admin před 2 měsíci
rodič
revize
158979280e
1 změnil soubory, kde provedl 152 přidání a 7 odebrání
  1. 152 7
      streamlit_ocr_validator.py

+ 152 - 7
streamlit_ocr_validator.py

@@ -68,21 +68,66 @@ class StreamlitOCRValidator:
         return get_ocr_statistics(self.ocr_data, self.text_bbox_mapping, self.marked_errors)
     
     def display_html_table_as_dataframe(self, html_content: str, enable_editing: bool = False):
-        """将HTML表格解析为DataFrame显示"""
+        """将HTML表格解析为DataFrame显示 - 增强版本支持横向滚动"""
         tables = parse_html_tables(html_content)
         
         if not tables:
             st.warning("未找到可解析的表格")
-            st.markdown(html_content, unsafe_allow_html=True)
+            # 对于无法解析的HTML表格,使用自定义CSS显示
+            st.markdown("""
+            <style>
+            .scrollable-table {
+                overflow-x: auto;
+                white-space: nowrap;
+                border: 1px solid #ddd;
+                border-radius: 5px;
+                margin: 10px 0;
+            }
+            .scrollable-table table {
+                width: 100%;
+                border-collapse: collapse;
+            }
+            .scrollable-table th, .scrollable-table td {
+                border: 1px solid #ddd;
+                padding: 8px;
+                text-align: left;
+                min-width: 100px;
+            }
+            .scrollable-table th {
+                background-color: #f5f5f5;
+                font-weight: bold;
+            }
+            </style>
+            """, unsafe_allow_html=True)
+            
+            st.markdown(f'<div class="scrollable-table">{html_content}</div>', unsafe_allow_html=True)
             return
             
         for i, table in enumerate(tables):
             st.subheader(f"📊 表格 {i+1}")
             
+            # 表格信息显示
+            col_info1, col_info2, col_info3, col_info4 = st.columns(4)
+            with col_info1:
+                st.metric("行数", len(table))
+            with col_info2:
+                st.metric("列数", len(table.columns))
+            with col_info3:
+                # 检查是否有超宽表格
+                is_wide_table = len(table.columns) > 8
+                st.metric("表格类型", "超宽表格" if is_wide_table else "普通表格")
+            with col_info4:
+                # 表格操作模式选择
+                display_mode = st.selectbox(
+                    f"显示模式 (表格{i+1})",
+                    ["完整显示", "分页显示", "筛选列显示"],
+                    key=f"display_mode_{i}"
+                )
+            
             # 创建表格操作按钮
             col1, col2, col3, col4 = st.columns(4)
             with col1:
-                show_info = st.checkbox(f"显示表格信息", key=f"info_{i}")
+                show_info = st.checkbox(f"显示详细信息", key=f"info_{i}")
             with col2:
                 show_stats = st.checkbox(f"显示统计信息", key=f"stats_{i}")
             with col3:
@@ -90,16 +135,73 @@ class StreamlitOCRValidator:
             with col4:
                 enable_sort = st.checkbox(f"启用排序", key=f"sort_{i}")
             
+            # 根据显示模式处理表格
+            display_table = self._process_table_display_mode(table, i, display_mode)
+            
             # 数据过滤和排序逻辑
-            filtered_table = self._apply_table_filters_and_sorts(table, i, enable_filter, enable_sort)
+            filtered_table = self._apply_table_filters_and_sorts(display_table, i, enable_filter, enable_sort)
+            
+            # 显示表格 - 使用自定义CSS支持横向滚动
+            st.markdown("""
+            <style>
+            .dataframe-container {
+                overflow-x: auto;
+                border: 1px solid #ddd;
+                border-radius: 5px;
+                margin: 10px 0;
+            }
+            
+            /* 为超宽表格特殊样式 */
+            .wide-table-container {
+                overflow-x: auto;
+                max-height: 500px;
+                overflow-y: auto;
+                border: 2px solid #0288d1;
+                border-radius: 8px;
+                background: linear-gradient(90deg, #f8f9fa 0%, #ffffff 100%);
+            }
+            
+            .dataframe thead th {
+                position: sticky;
+                top: 0;
+                background-color: #f5f5f5 !important;
+                z-index: 10;
+                border-bottom: 2px solid #0288d1;
+            }
+            
+            .dataframe tbody td {
+                white-space: nowrap;
+                min-width: 100px;
+                max-width: 300px;
+                overflow: hidden;
+                text-overflow: ellipsis;
+            }
+            </style>
+            """, unsafe_allow_html=True)
+            
+            # 根据表格宽度选择显示容器
+            container_class = "wide-table-container" if len(table.columns) > 8 else "dataframe-container"
             
-            # 显示表格
             if enable_editing:
-                edited_table = st.data_editor(filtered_table, use_container_width=True, key=f"editor_{i}")
+                st.markdown(f'<div class="{container_class}">', unsafe_allow_html=True)
+                edited_table = st.data_editor(
+                    filtered_table, 
+                    use_container_width=True, 
+                    key=f"editor_{i}",
+                    height=400 if len(table.columns) > 8 else None
+                )
+                st.markdown('</div>', unsafe_allow_html=True)
+                
                 if not edited_table.equals(filtered_table):
                     st.success("✏️ 表格已编辑,可以导出修改后的数据")
             else:
-                st.dataframe(filtered_table, use_container_width=True)
+                st.markdown(f'<div class="{container_class}">', unsafe_allow_html=True)
+                st.dataframe(
+                    filtered_table, 
+                    use_container_width=True,
+                    height=400 if len(table.columns) > 8 else None
+                )
+                st.markdown('</div>', unsafe_allow_html=True)
             
             # 显示表格信息和统计
             self._display_table_info_and_stats(table, filtered_table, show_info, show_stats, i)
@@ -188,6 +290,49 @@ class StreamlitOCRValidator:
             key=f"download_excel_{table_index}"
         )
     
+    def _process_table_display_mode(self, table: pd.DataFrame, table_index: int, display_mode: str) -> pd.DataFrame:
+        """根据显示模式处理表格"""
+        if display_mode == "分页显示":
+            # 分页显示
+            page_size = st.selectbox(
+                f"每页显示行数 (表格 {table_index+1})",
+                [10, 20, 50, 100],
+                key=f"page_size_{table_index}"
+            )
+            
+            total_pages = (len(table) - 1) // page_size + 1
+            
+            if total_pages > 1:
+                page_number = st.selectbox(
+                    f"页码 (表格 {table_index+1})",
+                    range(1, total_pages + 1),
+                    key=f"page_number_{table_index}"
+                )
+                
+                start_idx = (page_number - 1) * page_size
+                end_idx = start_idx + page_size
+                return table.iloc[start_idx:end_idx]
+            
+            return table
+            
+        elif display_mode == "筛选列显示":
+            # 列筛选显示
+            if len(table.columns) > 5:
+                selected_columns = st.multiselect(
+                    f"选择要显示的列 (表格 {table_index+1})",
+                    table.columns.tolist(),
+                    default=table.columns.tolist()[:5],  # 默认显示前5列
+                    key=f"selected_columns_{table_index}"
+                )
+                
+                if selected_columns:
+                    return table[selected_columns]
+            
+            return table
+            
+        else:  # 完整显示
+            return table
+    
     # 布局方法现在委托给布局管理器
     def create_standard_layout(self, font_size: int = 12, zoom_level: float = 1.0):
         """创建标准布局"""