dataframe_diff.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. import streamlit as st
  2. import pandas as pd
  3. import numpy as np
  4. from typing import Dict, List, Tuple, Optional, Set
  5. import difflib
  6. from dataclasses import dataclass
  7. from enum import Enum
  8. class ChangeType(Enum):
  9. UNCHANGED = "unchanged"
  10. MODIFIED = "modified"
  11. ADDED = "added"
  12. REMOVED = "removed"
  13. @dataclass
  14. class CellDiff:
  15. row_left: Optional[int]
  16. row_right: Optional[int]
  17. column: str
  18. change_type: ChangeType
  19. old_value: any = None
  20. new_value: any = None
  21. similarity: float = 0.0
  22. @dataclass
  23. class RowDiff:
  24. row_left: Optional[int]
  25. row_right: Optional[int]
  26. change_type: ChangeType
  27. similarity: float = 0.0
  28. cell_diffs: List[CellDiff] = None
  29. class DataFrameDiffAlgorithm:
  30. """类似VSCode的DataFrame差异算法"""
  31. def __init__(self, similarity_threshold: float = 0.7):
  32. self.similarity_threshold = similarity_threshold
  33. def compute_diff(self, df_left: pd.DataFrame, df_right: pd.DataFrame) -> List[RowDiff]:
  34. """计算两个DataFrame的差异,从上到下寻找最匹配的行"""
  35. # 确保列对齐
  36. all_columns = list(set(df_left.columns) | set(df_right.columns))
  37. df_left_aligned = self._align_columns(df_left, all_columns)
  38. df_right_aligned = self._align_columns(df_right, all_columns)
  39. # 计算行相似度矩阵
  40. similarity_matrix = self._compute_similarity_matrix(df_left_aligned, df_right_aligned)
  41. # 从上到下匹配行
  42. row_mappings = self._match_rows_top_down(similarity_matrix)
  43. # 生成差异结果
  44. return self._generate_diff_result(df_left_aligned, df_right_aligned, row_mappings, all_columns)
  45. def _align_columns(self, df: pd.DataFrame, all_columns: List[str]) -> pd.DataFrame:
  46. """对齐DataFrame列"""
  47. aligned_df = df.copy()
  48. for col in all_columns:
  49. if col not in aligned_df.columns:
  50. aligned_df[col] = None
  51. return aligned_df[all_columns]
  52. def _compute_similarity_matrix(self, df_left: pd.DataFrame, df_right: pd.DataFrame) -> np.ndarray:
  53. """计算行之间的相似度矩阵"""
  54. matrix = np.zeros((len(df_left), len(df_right)))
  55. for i in range(len(df_left)):
  56. for j in range(len(df_right)):
  57. matrix[i, j] = self._compute_row_similarity(
  58. df_left.iloc[i], df_right.iloc[j]
  59. )
  60. return matrix
  61. def _compute_row_similarity(self, row1: pd.Series, row2: pd.Series) -> float:
  62. """计算两行的相似度"""
  63. total_cols = len(row1)
  64. if total_cols == 0:
  65. return 1.0
  66. matches = 0
  67. for col in row1.index:
  68. val1, val2 = row1[col], row2[col]
  69. # 处理NaN值
  70. if pd.isna(val1) and pd.isna(val2):
  71. matches += 1
  72. elif pd.isna(val1) or pd.isna(val2):
  73. continue
  74. else:
  75. # 字符串相似度计算
  76. str1, str2 = str(val1), str(val2)
  77. if str1 == str2:
  78. matches += 1
  79. else:
  80. # 使用difflib计算字符串相似度
  81. similarity = difflib.SequenceMatcher(None, str1, str2).ratio()
  82. matches += similarity * 0.5 # 部分匹配给予部分分数
  83. return matches / total_cols
  84. def _match_rows_top_down(self, similarity_matrix: np.ndarray) -> Dict[int, Optional[int]]:
  85. """从上到下匹配行,优先匹配相似度高的行"""
  86. left_rows, right_rows = similarity_matrix.shape
  87. matched_right = set()
  88. row_mappings = {}
  89. # 从上到下处理左侧每一行
  90. for left_idx in range(left_rows):
  91. best_right_idx = None
  92. best_similarity = 0.0
  93. # 在未匹配的右侧行中寻找最佳匹配
  94. for right_idx in range(right_rows):
  95. if right_idx not in matched_right:
  96. similarity = similarity_matrix[left_idx, right_idx]
  97. if similarity > best_similarity and similarity >= self.similarity_threshold:
  98. best_similarity = similarity
  99. best_right_idx = right_idx
  100. if best_right_idx is not None:
  101. row_mappings[left_idx] = best_right_idx
  102. matched_right.add(best_right_idx)
  103. else:
  104. row_mappings[left_idx] = None # 左侧行被删除
  105. return row_mappings
  106. def _generate_diff_result(self, df_left: pd.DataFrame, df_right: pd.DataFrame,
  107. row_mappings: Dict[int, Optional[int]], all_columns: List[str]) -> List[RowDiff]:
  108. """生成差异结果"""
  109. result = []
  110. matched_right_rows = set(row_mappings.values()) - {None}
  111. # 处理匹配的行和删除的行
  112. for left_idx, right_idx in row_mappings.items():
  113. if right_idx is None:
  114. # 删除的行
  115. result.append(RowDiff(
  116. row_left=left_idx,
  117. row_right=None,
  118. change_type=ChangeType.REMOVED,
  119. similarity=0.0
  120. ))
  121. else:
  122. # 匹配的行 - 检查单元格差异
  123. cell_diffs = self._compare_cells(
  124. df_left.iloc[left_idx], df_right.iloc[right_idx],
  125. left_idx, right_idx, all_columns
  126. )
  127. change_type = ChangeType.UNCHANGED
  128. if any(cell.change_type != ChangeType.UNCHANGED for cell in cell_diffs):
  129. change_type = ChangeType.MODIFIED
  130. similarity = self._compute_row_similarity(df_left.iloc[left_idx], df_right.iloc[right_idx])
  131. result.append(RowDiff(
  132. row_left=left_idx,
  133. row_right=right_idx,
  134. change_type=change_type,
  135. similarity=similarity,
  136. cell_diffs=cell_diffs
  137. ))
  138. # 处理新增的行(右侧未匹配的行)
  139. for right_idx in range(len(df_right)):
  140. if right_idx not in matched_right_rows:
  141. result.append(RowDiff(
  142. row_left=None,
  143. row_right=right_idx,
  144. change_type=ChangeType.ADDED,
  145. similarity=0.0
  146. ))
  147. return result
  148. def _compare_cells(self, row_left: pd.Series, row_right: pd.Series,
  149. left_idx: int, right_idx: int, columns: List[str]) -> List[CellDiff]:
  150. """比较单元格差异"""
  151. cell_diffs = []
  152. for col in columns:
  153. val_left = row_left[col] if col in row_left.index else None
  154. val_right = row_right[col] if col in row_right.index else None
  155. # 处理NaN值
  156. if pd.isna(val_left) and pd.isna(val_right):
  157. change_type = ChangeType.UNCHANGED
  158. elif pd.isna(val_left):
  159. change_type = ChangeType.ADDED
  160. elif pd.isna(val_right):
  161. change_type = ChangeType.REMOVED
  162. elif str(val_left) == str(val_right):
  163. change_type = ChangeType.UNCHANGED
  164. else:
  165. change_type = ChangeType.MODIFIED
  166. cell_diffs.append(CellDiff(
  167. row_left=left_idx,
  168. row_right=right_idx,
  169. column=col,
  170. change_type=change_type,
  171. old_value=val_left,
  172. new_value=val_right
  173. ))
  174. return cell_diffs
  175. class VSCodeStyleDataFrameDiff:
  176. """类似VSCode样式的DataFrame差异展示"""
  177. def __init__(self):
  178. self.diff_algorithm = DataFrameDiffAlgorithm()
  179. self._inject_css()
  180. def _inject_css(self):
  181. """注入VSCode风格的CSS样式 + data_editor样式"""
  182. st.markdown("""
  183. <style>
  184. /* VSCode风格的差异显示 */
  185. .vscode-diff-container {
  186. font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
  187. font-size: 12px;
  188. border: 1px solid #3c3c3c;
  189. border-radius: 6px;
  190. overflow: hidden;
  191. }
  192. .diff-header {
  193. background-color: #2d2d30;
  194. color: #cccccc;
  195. padding: 8px 12px;
  196. font-weight: bold;
  197. border-bottom: 1px solid #3c3c3c;
  198. }
  199. .diff-content {
  200. height: 500px;
  201. overflow: auto;
  202. background-color: #1e1e1e;
  203. }
  204. .diff-table {
  205. width: 100%;
  206. border-collapse: collapse;
  207. color: #cccccc;
  208. }
  209. .diff-table th {
  210. background-color: #2d2d30;
  211. border: 1px solid #3c3c3c;
  212. padding: 6px 8px;
  213. text-align: left;
  214. position: sticky;
  215. top: 0;
  216. z-index: 10;
  217. }
  218. .diff-table td {
  219. border: 1px solid #3c3c3c;
  220. padding: 4px 8px;
  221. white-space: nowrap;
  222. position: relative;
  223. }
  224. /* 差异颜色 - VSCode风格 */
  225. .diff-added {
  226. background-color: rgba(22, 160, 133, 0.2) !important;
  227. border-left: 3px solid #16a085 !important;
  228. }
  229. .diff-removed {
  230. background-color: rgba(231, 76, 60, 0.2) !important;
  231. border-left: 3px solid #e74c3c !important;
  232. }
  233. .diff-modified {
  234. background-color: rgba(241, 196, 15, 0.2) !important;
  235. border-left: 3px solid #f1c40f !important;
  236. }
  237. .diff-unchanged {
  238. background-color: transparent;
  239. }
  240. /* 行号样式 */
  241. .line-number {
  242. background-color: #2d2d30;
  243. color: #858585;
  244. text-align: right;
  245. padding: 4px 8px;
  246. border-right: 1px solid #3c3c3c;
  247. user-select: none;
  248. min-width: 40px;
  249. }
  250. /* 悬停效果 */
  251. .diff-table tbody tr:hover {
  252. background-color: rgba(255, 255, 255, 0.05);
  253. }
  254. /* 尝试通过CSS选择器为data_editor添加样式 */
  255. div[data-testid="stDataFrame"] {
  256. height: 500px;
  257. }
  258. /* 针对特定行的data_editor样式 - 通过行索引 */
  259. div[data-testid="stDataFrame"] tbody tr:nth-child(1) {
  260. background-color: rgba(22, 160, 133, 0.1);
  261. }
  262. div[data-testid="stDataFrame"] tbody tr:nth-child(2) {
  263. background-color: rgba(241, 196, 15, 0.1);
  264. }
  265. div[data-testid="stDataFrame"] tbody tr:nth-child(5) {
  266. background-color: rgba(231, 76, 60, 0.1);
  267. }
  268. /* 为data_editor添加差异指示器 */
  269. .data-editor-wrapper {
  270. position: relative;
  271. }
  272. .diff-indicator-overlay {
  273. position: absolute;
  274. top: 0;
  275. left: 0;
  276. width: 4px;
  277. height: 100%;
  278. pointer-events: none;
  279. z-index: 1000;
  280. }
  281. .indicator-added { background-color: #16a085; }
  282. .indicator-removed { background-color: #e74c3c; }
  283. .indicator-modified { background-color: #f1c40f; }
  284. /* 滚动条样式 */
  285. .diff-content::-webkit-scrollbar,
  286. div[data-testid="stDataFrame"] ::-webkit-scrollbar {
  287. width: 12px;
  288. height: 12px;
  289. }
  290. .diff-content::-webkit-scrollbar-track,
  291. div[data-testid="stDataFrame"] ::-webkit-scrollbar-track {
  292. background: #2d2d30;
  293. }
  294. .diff-content::-webkit-scrollbar-thumb,
  295. div[data-testid="stDataFrame"] ::-webkit-scrollbar-thumb {
  296. background: #555555;
  297. border-radius: 6px;
  298. }
  299. .diff-content::-webkit-scrollbar-thumb:hover,
  300. div[data-testid="stDataFrame"] ::-webkit-scrollbar-thumb:hover {
  301. background: #777777;
  302. }
  303. </style>
  304. """, unsafe_allow_html=True)
  305. def _create_editable_diff_view(self, row_diffs: List[RowDiff]):
  306. """创建可编辑的差异视图"""
  307. st.subheader("🔍 并排对比")
  308. left_col, right_col = st.columns(2)
  309. with left_col:
  310. st.markdown("### ✏️ 可编辑版本 (左侧)")
  311. # 方案1: 使用标记版本的DataFrame
  312. self._create_marked_data_editor(st.session_state.df_edited, row_diffs)
  313. with right_col:
  314. st.markdown("### 📝 原始版本 (右侧)")
  315. self._create_diff_table(row_diffs, 'right')
  316. def _create_marked_data_editor(self, df: pd.DataFrame, row_diffs: List[RowDiff]):
  317. """创建带标记的data_editor(方案1:在数据中添加标记)"""
  318. # 创建带差异标记的DataFrame
  319. marked_df = self._create_marked_dataframe(df, row_diffs)
  320. # 创建列配置
  321. column_config = self._create_marked_column_config(marked_df, row_diffs)
  322. # 显示data_editor
  323. edited_df = st.data_editor(
  324. marked_df,
  325. height=500,
  326. use_container_width=True,
  327. num_rows="dynamic",
  328. column_config=column_config,
  329. key="marked_diff_editor",
  330. hide_index=False
  331. )
  332. # 移除标记列,恢复原始数据格式
  333. cleaned_df = self._clean_marked_dataframe(edited_df)
  334. # 检测数据变化并更新
  335. if not cleaned_df.equals(st.session_state.df_edited):
  336. st.session_state.df_edited = cleaned_df.copy()
  337. st.rerun()
  338. def _create_marked_dataframe(self, df: pd.DataFrame, row_diffs: List[RowDiff]) -> pd.DataFrame:
  339. """创建带差异标记的DataFrame"""
  340. marked_df = df.copy()
  341. # 添加差异标记列
  342. marked_df.insert(0, '🎨 差异类型', '')
  343. # 根据差异类型为每行添加标记
  344. for row_diff in row_diffs:
  345. if row_diff.row_right is not None and row_diff.row_right < len(marked_df):
  346. if row_diff.change_type == ChangeType.ADDED:
  347. marked_df.iloc[row_diff.row_right, 0] = '🟢 新增'
  348. elif row_diff.change_type == ChangeType.REMOVED:
  349. marked_df.iloc[row_diff.row_right, 0] = '🔴 删除'
  350. elif row_diff.change_type == ChangeType.MODIFIED:
  351. marked_df.iloc[row_diff.row_right, 0] = '🟡 修改'
  352. else:
  353. marked_df.iloc[row_diff.row_right, 0] = '⚪ 未变'
  354. return marked_df
  355. def _create_marked_column_config(self, marked_df: pd.DataFrame, row_diffs: List[RowDiff]) -> Dict:
  356. """为带标记的DataFrame创建列配置"""
  357. config = {}
  358. # 差异类型列配置
  359. config['🎨 差异类型'] = st.column_config.SelectboxColumn(
  360. '🎨 差异类型',
  361. help="行的差异类型",
  362. options=['⚪ 未变', '🟡 修改', '🟢 新增', '🔴 删除'],
  363. disabled=True, # 只读
  364. width="small"
  365. )
  366. # 其他列配置
  367. for col in marked_df.columns[1:]: # 跳过差异标记列
  368. if marked_df[col].dtype in ['int64', 'float64']:
  369. config[col] = st.column_config.NumberColumn(
  370. col,
  371. help=f"数值列: {col}",
  372. format="%.2f" if marked_df[col].dtype == 'float64' else "%d"
  373. )
  374. elif marked_df[col].dtype == 'bool':
  375. config[col] = st.column_config.CheckboxColumn(
  376. col,
  377. help=f"布尔列: {col}"
  378. )
  379. else:
  380. config[col] = st.column_config.TextColumn(
  381. col,
  382. help=f"文本列: {col}",
  383. max_chars=100
  384. )
  385. return config
  386. def _clean_marked_dataframe(self, marked_df: pd.DataFrame) -> pd.DataFrame:
  387. """移除标记列,恢复原始DataFrame格式"""
  388. return marked_df.drop(columns=['🎨 差异类型'])
  389. def create_diff_view(self):
  390. """创建差异对比视图"""
  391. st.title("📊 VSCode风格 DataFrame 差异对比")
  392. st.markdown("---")
  393. # 初始化数据
  394. if 'df_original' not in st.session_state:
  395. st.session_state.df_original = self._create_sample_data()
  396. if 'df_edited' not in st.session_state:
  397. st.session_state.df_edited = st.session_state.df_original.copy()
  398. # 控制面板
  399. self._create_control_panel()
  400. # 计算差异
  401. row_diffs = self.diff_algorithm.compute_diff(
  402. st.session_state.df_original,
  403. st.session_state.df_edited
  404. )
  405. # 显示统计信息
  406. self._display_diff_statistics(row_diffs)
  407. # 主要对比区域
  408. self._create_main_diff_view(row_diffs)
  409. # 详细差异列表
  410. self._create_detailed_diff_view(row_diffs)
  411. def _create_sample_data(self) -> pd.DataFrame:
  412. """创建示例数据"""
  413. return pd.DataFrame({
  414. 'ID': [1, 2, 3, 4, 5],
  415. 'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
  416. 'Age': [25, 30, 35, 40, 45],
  417. 'City': ['New York', 'London', 'Paris', 'Tokyo', 'Sydney'],
  418. 'Salary': [50000, 60000, 70000, 80000, 90000]
  419. })
  420. def _create_control_panel(self):
  421. """创建控制面板"""
  422. with st.expander("🎛️ 控制面板", expanded=True):
  423. col1, col2, col3, col4 = st.columns(4)
  424. with col1:
  425. if st.button("🔄 重置数据"):
  426. st.session_state.df_original = self._create_sample_data()
  427. st.session_state.df_edited = st.session_state.df_original.copy()
  428. st.rerun()
  429. with col2:
  430. if st.button("🎲 生成随机差异"):
  431. st.session_state.df_edited = self._create_random_diff()
  432. st.rerun()
  433. with col3:
  434. similarity_threshold = st.slider(
  435. "相似度阈值",
  436. min_value=0.1,
  437. max_value=1.0,
  438. value=0.7,
  439. step=0.1
  440. )
  441. self.diff_algorithm.similarity_threshold = similarity_threshold
  442. with col4:
  443. auto_scroll = st.checkbox("🔗 同步滚动", value=True)
  444. def _create_random_diff(self) -> pd.DataFrame:
  445. """创建随机差异用于演示"""
  446. df = st.session_state.df_original.copy()
  447. # 修改一些值
  448. df.loc[1, 'Name'] = 'Robert'
  449. df.loc[2, 'Age'] = 36
  450. df.loc[3, 'City'] = 'Berlin'
  451. # 删除一行
  452. df = df.drop(index=4)
  453. # 新增一行
  454. new_row = pd.DataFrame({
  455. 'ID': [6], 'Name': ['Frank'], 'Age': [28],
  456. 'City': ['Madrid'], 'Salary': [55000]
  457. })
  458. df = pd.concat([df, new_row], ignore_index=True)
  459. return df
  460. def _display_diff_statistics(self, row_diffs: List[RowDiff]):
  461. """显示差异统计"""
  462. stats = self._compute_diff_stats(row_diffs)
  463. col1, col2, col3, col4, col5 = st.columns(5)
  464. with col1:
  465. st.metric("总行数", stats['total_rows'])
  466. with col2:
  467. st.metric("🟢 新增", stats['added_rows'], delta=stats['added_rows'])
  468. with col3:
  469. st.metric("🔴 删除", stats['removed_rows'], delta=-stats['removed_rows'])
  470. with col4:
  471. st.metric("🟡 修改", stats['modified_rows'])
  472. with col5:
  473. st.metric("⚪ 未变", stats['unchanged_rows'])
  474. def _compute_diff_stats(self, row_diffs: List[RowDiff]) -> Dict:
  475. """计算差异统计"""
  476. stats = {
  477. 'total_rows': len(row_diffs),
  478. 'added_rows': 0,
  479. 'removed_rows': 0,
  480. 'modified_rows': 0,
  481. 'unchanged_rows': 0
  482. }
  483. for row_diff in row_diffs:
  484. if row_diff.change_type == ChangeType.ADDED:
  485. stats['added_rows'] += 1
  486. elif row_diff.change_type == ChangeType.REMOVED:
  487. stats['removed_rows'] += 1
  488. elif row_diff.change_type == ChangeType.MODIFIED:
  489. stats['modified_rows'] += 1
  490. else:
  491. stats['unchanged_rows'] += 1
  492. return stats
  493. def _create_main_diff_view(self, row_diffs: List[RowDiff]):
  494. """创建主要差异视图"""
  495. self._create_editable_diff_view(row_diffs)
  496. def _create_diff_table(self, row_diffs: List[RowDiff], side: str):
  497. """创建带差异高亮的表格"""
  498. df = st.session_state.df_original if side == 'left' else st.session_state.df_edited
  499. # 构建HTML表格
  500. html_table = self._build_diff_html_table(row_diffs, side, df)
  501. # 显示表格
  502. st.markdown(f"""
  503. <div class="vscode-diff-container">
  504. <div class="diff-header">
  505. {"原始版本" if side == 'left' else "编辑版本"}
  506. ({len(df)} 行)
  507. </div>
  508. <div class="diff-content">
  509. {html_table}
  510. </div>
  511. </div>
  512. """, unsafe_allow_html=True)
  513. def _build_diff_html_table(self, row_diffs: List[RowDiff], side: str, df: pd.DataFrame) -> str:
  514. """构建差异HTML表格"""
  515. if df.empty:
  516. return "<p>数据为空</p>"
  517. html = '<table class="diff-table">'
  518. # 表头
  519. html += '<thead><tr><th class="line-number">#</th>'
  520. for col in df.columns:
  521. html += f'<th>{col}</th>'
  522. html += '</tr></thead><tbody>'
  523. # 根据差异类型构建行
  524. for i, row_diff in enumerate(row_diffs):
  525. row_idx = row_diff.row_left if side == 'left' else row_diff.row_right
  526. if row_idx is None:
  527. continue # 该侧没有对应行
  528. if row_idx >= len(df):
  529. continue # 超出范围
  530. # 确定行样式
  531. row_class = self._get_row_css_class(row_diff.change_type)
  532. html += f'<tr class="{row_class}">'
  533. html += f'<td class="line-number">{row_idx + 1}</td>'
  534. # 构建单元格
  535. for col in df.columns:
  536. cell_class = row_class
  537. cell_value = df.iloc[row_idx][col]
  538. # 如果是修改的行,检查单元格级别的差异
  539. if row_diff.change_type == ChangeType.MODIFIED and row_diff.cell_diffs:
  540. cell_diff = next((cd for cd in row_diff.cell_diffs if cd.column == col), None)
  541. if cell_diff:
  542. cell_class = self._get_cell_css_class(cell_diff.change_type)
  543. display_value = str(cell_value) if not pd.isna(cell_value) else ""
  544. html += f'<td class="{cell_class}">{display_value}</td>'
  545. html += '</tr>'
  546. html += '</tbody></table>'
  547. return html
  548. def _get_row_css_class(self, change_type: ChangeType) -> str:
  549. """获取行的CSS类"""
  550. mapping = {
  551. ChangeType.ADDED: "diff-added",
  552. ChangeType.REMOVED: "diff-removed",
  553. ChangeType.MODIFIED: "diff-modified",
  554. ChangeType.UNCHANGED: "diff-unchanged"
  555. }
  556. return mapping.get(change_type, "diff-unchanged")
  557. def _get_cell_css_class(self, change_type: ChangeType) -> str:
  558. """获取单元格的CSS类"""
  559. return self._get_row_css_class(change_type)
  560. def _create_detailed_diff_view(self, row_diffs: List[RowDiff]):
  561. """创建详细差异视图"""
  562. st.markdown("---")
  563. st.subheader("📋 详细差异列表")
  564. # 筛选选项
  565. change_types = st.multiselect(
  566. "显示的变更类型",
  567. [ct.value for ct in ChangeType],
  568. default=[ChangeType.ADDED.value, ChangeType.REMOVED.value, ChangeType.MODIFIED.value]
  569. )
  570. filtered_diffs = [
  571. rd for rd in row_diffs
  572. if rd.change_type.value in change_types
  573. ]
  574. if not filtered_diffs:
  575. st.info("没有符合条件的差异")
  576. return
  577. # 显示差异详情
  578. for i, row_diff in enumerate(filtered_diffs):
  579. with st.expander(f"差异 {i+1}: {row_diff.change_type.value.upper()}", expanded=False):
  580. self._display_row_diff_details(row_diff)
  581. def _display_row_diff_details(self, row_diff: RowDiff):
  582. """显示行差异详情"""
  583. col1, col2 = st.columns(2)
  584. with col1:
  585. st.write("**位置信息:**")
  586. st.write(f"- 左侧行: {row_diff.row_left}")
  587. st.write(f"- 右侧行: {row_diff.row_right}")
  588. st.write(f"- 变更类型: {row_diff.change_type.value}")
  589. st.write(f"- 相似度: {row_diff.similarity:.2%}")
  590. with col2:
  591. if row_diff.cell_diffs:
  592. st.write("**单元格差异:**")
  593. cell_diff_data = []
  594. for cell_diff in row_diff.cell_diffs:
  595. if cell_diff.change_type != ChangeType.UNCHANGED:
  596. cell_diff_data.append({
  597. '列': cell_diff.column,
  598. '变更类型': cell_diff.change_type.value,
  599. '原值': str(cell_diff.old_value),
  600. '新值': str(cell_diff.new_value)
  601. })
  602. if cell_diff_data:
  603. st.dataframe(pd.DataFrame(cell_diff_data), use_container_width=True)
  604. def main():
  605. """主函数"""
  606. diff_viewer = VSCodeStyleDataFrameDiff()
  607. diff_viewer.create_diff_view()
  608. if __name__ == "__main__":
  609. main()