|
@@ -68,21 +68,66 @@ class StreamlitOCRValidator:
|
|
|
return get_ocr_statistics(self.ocr_data, self.text_bbox_mapping, self.marked_errors)
|
|
return get_ocr_statistics(self.ocr_data, self.text_bbox_mapping, self.marked_errors)
|
|
|
|
|
|
|
|
def display_html_table_as_dataframe(self, html_content: str, enable_editing: bool = False):
|
|
def display_html_table_as_dataframe(self, html_content: str, enable_editing: bool = False):
|
|
|
- """将HTML表格解析为DataFrame显示"""
|
|
|
|
|
|
|
+ """将HTML表格解析为DataFrame显示 - 增强版本支持横向滚动"""
|
|
|
tables = parse_html_tables(html_content)
|
|
tables = parse_html_tables(html_content)
|
|
|
|
|
|
|
|
if not tables:
|
|
if not tables:
|
|
|
st.warning("未找到可解析的表格")
|
|
st.warning("未找到可解析的表格")
|
|
|
- st.markdown(html_content, unsafe_allow_html=True)
|
|
|
|
|
|
|
+ # 对于无法解析的HTML表格,使用自定义CSS显示
|
|
|
|
|
+ st.markdown("""
|
|
|
|
|
+ <style>
|
|
|
|
|
+ .scrollable-table {
|
|
|
|
|
+ overflow-x: auto;
|
|
|
|
|
+ white-space: nowrap;
|
|
|
|
|
+ border: 1px solid #ddd;
|
|
|
|
|
+ border-radius: 5px;
|
|
|
|
|
+ margin: 10px 0;
|
|
|
|
|
+ }
|
|
|
|
|
+ .scrollable-table table {
|
|
|
|
|
+ width: 100%;
|
|
|
|
|
+ border-collapse: collapse;
|
|
|
|
|
+ }
|
|
|
|
|
+ .scrollable-table th, .scrollable-table td {
|
|
|
|
|
+ border: 1px solid #ddd;
|
|
|
|
|
+ padding: 8px;
|
|
|
|
|
+ text-align: left;
|
|
|
|
|
+ min-width: 100px;
|
|
|
|
|
+ }
|
|
|
|
|
+ .scrollable-table th {
|
|
|
|
|
+ background-color: #f5f5f5;
|
|
|
|
|
+ font-weight: bold;
|
|
|
|
|
+ }
|
|
|
|
|
+ </style>
|
|
|
|
|
+ """, unsafe_allow_html=True)
|
|
|
|
|
+
|
|
|
|
|
+ st.markdown(f'<div class="scrollable-table">{html_content}</div>', unsafe_allow_html=True)
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
for i, table in enumerate(tables):
|
|
for i, table in enumerate(tables):
|
|
|
st.subheader(f"📊 表格 {i+1}")
|
|
st.subheader(f"📊 表格 {i+1}")
|
|
|
|
|
|
|
|
|
|
+ # 表格信息显示
|
|
|
|
|
+ col_info1, col_info2, col_info3, col_info4 = st.columns(4)
|
|
|
|
|
+ with col_info1:
|
|
|
|
|
+ st.metric("行数", len(table))
|
|
|
|
|
+ with col_info2:
|
|
|
|
|
+ st.metric("列数", len(table.columns))
|
|
|
|
|
+ with col_info3:
|
|
|
|
|
+ # 检查是否有超宽表格
|
|
|
|
|
+ is_wide_table = len(table.columns) > 8
|
|
|
|
|
+ st.metric("表格类型", "超宽表格" if is_wide_table else "普通表格")
|
|
|
|
|
+ with col_info4:
|
|
|
|
|
+ # 表格操作模式选择
|
|
|
|
|
+ display_mode = st.selectbox(
|
|
|
|
|
+ f"显示模式 (表格{i+1})",
|
|
|
|
|
+ ["完整显示", "分页显示", "筛选列显示"],
|
|
|
|
|
+ key=f"display_mode_{i}"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
# 创建表格操作按钮
|
|
# 创建表格操作按钮
|
|
|
col1, col2, col3, col4 = st.columns(4)
|
|
col1, col2, col3, col4 = st.columns(4)
|
|
|
with col1:
|
|
with col1:
|
|
|
- show_info = st.checkbox(f"显示表格信息", key=f"info_{i}")
|
|
|
|
|
|
|
+ show_info = st.checkbox(f"显示详细信息", key=f"info_{i}")
|
|
|
with col2:
|
|
with col2:
|
|
|
show_stats = st.checkbox(f"显示统计信息", key=f"stats_{i}")
|
|
show_stats = st.checkbox(f"显示统计信息", key=f"stats_{i}")
|
|
|
with col3:
|
|
with col3:
|
|
@@ -90,16 +135,73 @@ class StreamlitOCRValidator:
|
|
|
with col4:
|
|
with col4:
|
|
|
enable_sort = st.checkbox(f"启用排序", key=f"sort_{i}")
|
|
enable_sort = st.checkbox(f"启用排序", key=f"sort_{i}")
|
|
|
|
|
|
|
|
|
|
+ # 根据显示模式处理表格
|
|
|
|
|
+ display_table = self._process_table_display_mode(table, i, display_mode)
|
|
|
|
|
+
|
|
|
# 数据过滤和排序逻辑
|
|
# 数据过滤和排序逻辑
|
|
|
- filtered_table = self._apply_table_filters_and_sorts(table, i, enable_filter, enable_sort)
|
|
|
|
|
|
|
+ filtered_table = self._apply_table_filters_and_sorts(display_table, i, enable_filter, enable_sort)
|
|
|
|
|
+
|
|
|
|
|
+ # 显示表格 - 使用自定义CSS支持横向滚动
|
|
|
|
|
+ st.markdown("""
|
|
|
|
|
+ <style>
|
|
|
|
|
+ .dataframe-container {
|
|
|
|
|
+ overflow-x: auto;
|
|
|
|
|
+ border: 1px solid #ddd;
|
|
|
|
|
+ border-radius: 5px;
|
|
|
|
|
+ margin: 10px 0;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /* 为超宽表格特殊样式 */
|
|
|
|
|
+ .wide-table-container {
|
|
|
|
|
+ overflow-x: auto;
|
|
|
|
|
+ max-height: 500px;
|
|
|
|
|
+ overflow-y: auto;
|
|
|
|
|
+ border: 2px solid #0288d1;
|
|
|
|
|
+ border-radius: 8px;
|
|
|
|
|
+ background: linear-gradient(90deg, #f8f9fa 0%, #ffffff 100%);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ .dataframe thead th {
|
|
|
|
|
+ position: sticky;
|
|
|
|
|
+ top: 0;
|
|
|
|
|
+ background-color: #f5f5f5 !important;
|
|
|
|
|
+ z-index: 10;
|
|
|
|
|
+ border-bottom: 2px solid #0288d1;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ .dataframe tbody td {
|
|
|
|
|
+ white-space: nowrap;
|
|
|
|
|
+ min-width: 100px;
|
|
|
|
|
+ max-width: 300px;
|
|
|
|
|
+ overflow: hidden;
|
|
|
|
|
+ text-overflow: ellipsis;
|
|
|
|
|
+ }
|
|
|
|
|
+ </style>
|
|
|
|
|
+ """, unsafe_allow_html=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 根据表格宽度选择显示容器
|
|
|
|
|
+ container_class = "wide-table-container" if len(table.columns) > 8 else "dataframe-container"
|
|
|
|
|
|
|
|
- # 显示表格
|
|
|
|
|
if enable_editing:
|
|
if enable_editing:
|
|
|
- edited_table = st.data_editor(filtered_table, use_container_width=True, key=f"editor_{i}")
|
|
|
|
|
|
|
+ st.markdown(f'<div class="{container_class}">', unsafe_allow_html=True)
|
|
|
|
|
+ edited_table = st.data_editor(
|
|
|
|
|
+ filtered_table,
|
|
|
|
|
+ use_container_width=True,
|
|
|
|
|
+ key=f"editor_{i}",
|
|
|
|
|
+ height=400 if len(table.columns) > 8 else None
|
|
|
|
|
+ )
|
|
|
|
|
+ st.markdown('</div>', unsafe_allow_html=True)
|
|
|
|
|
+
|
|
|
if not edited_table.equals(filtered_table):
|
|
if not edited_table.equals(filtered_table):
|
|
|
st.success("✏️ 表格已编辑,可以导出修改后的数据")
|
|
st.success("✏️ 表格已编辑,可以导出修改后的数据")
|
|
|
else:
|
|
else:
|
|
|
- st.dataframe(filtered_table, use_container_width=True)
|
|
|
|
|
|
|
+ st.markdown(f'<div class="{container_class}">', unsafe_allow_html=True)
|
|
|
|
|
+ st.dataframe(
|
|
|
|
|
+ filtered_table,
|
|
|
|
|
+ use_container_width=True,
|
|
|
|
|
+ height=400 if len(table.columns) > 8 else None
|
|
|
|
|
+ )
|
|
|
|
|
+ st.markdown('</div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
|
# 显示表格信息和统计
|
|
# 显示表格信息和统计
|
|
|
self._display_table_info_and_stats(table, filtered_table, show_info, show_stats, i)
|
|
self._display_table_info_and_stats(table, filtered_table, show_info, show_stats, i)
|
|
@@ -188,6 +290,49 @@ class StreamlitOCRValidator:
|
|
|
key=f"download_excel_{table_index}"
|
|
key=f"download_excel_{table_index}"
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+ def _process_table_display_mode(self, table: pd.DataFrame, table_index: int, display_mode: str) -> pd.DataFrame:
|
|
|
|
|
+ """根据显示模式处理表格"""
|
|
|
|
|
+ if display_mode == "分页显示":
|
|
|
|
|
+ # 分页显示
|
|
|
|
|
+ page_size = st.selectbox(
|
|
|
|
|
+ f"每页显示行数 (表格 {table_index+1})",
|
|
|
|
|
+ [10, 20, 50, 100],
|
|
|
|
|
+ key=f"page_size_{table_index}"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ total_pages = (len(table) - 1) // page_size + 1
|
|
|
|
|
+
|
|
|
|
|
+ if total_pages > 1:
|
|
|
|
|
+ page_number = st.selectbox(
|
|
|
|
|
+ f"页码 (表格 {table_index+1})",
|
|
|
|
|
+ range(1, total_pages + 1),
|
|
|
|
|
+ key=f"page_number_{table_index}"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ start_idx = (page_number - 1) * page_size
|
|
|
|
|
+ end_idx = start_idx + page_size
|
|
|
|
|
+ return table.iloc[start_idx:end_idx]
|
|
|
|
|
+
|
|
|
|
|
+ return table
|
|
|
|
|
+
|
|
|
|
|
+ elif display_mode == "筛选列显示":
|
|
|
|
|
+ # 列筛选显示
|
|
|
|
|
+ if len(table.columns) > 5:
|
|
|
|
|
+ selected_columns = st.multiselect(
|
|
|
|
|
+ f"选择要显示的列 (表格 {table_index+1})",
|
|
|
|
|
+ table.columns.tolist(),
|
|
|
|
|
+ default=table.columns.tolist()[:5], # 默认显示前5列
|
|
|
|
|
+ key=f"selected_columns_{table_index}"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if selected_columns:
|
|
|
|
|
+ return table[selected_columns]
|
|
|
|
|
+
|
|
|
|
|
+ return table
|
|
|
|
|
+
|
|
|
|
|
+ else: # 完整显示
|
|
|
|
|
+ return table
|
|
|
|
|
+
|
|
|
# 布局方法现在委托给布局管理器
|
|
# 布局方法现在委托给布局管理器
|
|
|
def create_standard_layout(self, font_size: int = 12, zoom_level: float = 1.0):
|
|
def create_standard_layout(self, font_size: int = 12, zoom_level: float = 1.0):
|
|
|
"""创建标准布局"""
|
|
"""创建标准布局"""
|