před 8 měsíci · 08dd3a85a8
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -123,4 +123,5 @@ jobs:
 
				       - name: Publish distribution to PyPI
			
 
				         run: |
			
 
				           pip install twine
			
 
				+          twine check dist/*
			
 
				           twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
			
--- a/projects/web_demo/web_demo/common/ext.py
+++ b/projects/web_demo/web_demo/common/ext.py
@@ -1,37 +1,51 @@
 
				 import hashlib
			
 
				 import mimetypes
			
 
				+import urllib.parse
			
 
				 
			
 
				 
			
 
				 def is_pdf(filename, file):
			
 
				     """
			
 
				-    判断文件是否为PDF格式。
			
 
				+    判断文件是否为PDF格式，支持中文名和特殊字符。
			
 
				 
			
 
				     :param filename: 文件名
			
 
				     :param file: 文件对象
			
 
				     :return: 如果文件是PDF格式，则返回True，否则返回False
			
 
				     """
			
 
				-    # 检查文件扩展名  https://arxiv.org/pdf/2405.08702 pdf链接可能存在不带扩展名的情况，先注释
			
 
				-    # if not filename.endswith('.pdf'):
			
 
				-    #     return False
			
 
				-
			
 
				-    # 检查MIME类型
			
 
				-    mime_type, _ = mimetypes.guess_type(filename)
			
 
				-    print(mime_type)
			
 
				-    if mime_type != 'application/pdf':
			
 
				-        return False
			
 
				-
			
 
				-    # 可选：读取文件的前几KB内容并检查MIME类型
			
 
				-    # 这一步是可选的，用于更严格的检查
			
 
				-    # if not mimetypes.guess_type(filename, strict=False)[0] == 'application/pdf':
			
 
				-    #     return False
			
 
				-
			
 
				-    # 检查文件内容
			
 
				-    file_start = file.read(5)
			
 
				-    file.seek(0)
			
 
				-    if not file_start.startswith(b'%PDF-'):
			
 
				-        return False
			
 
				-
			
 
				-    return True
			
 
				+    try:
			
 
				+        # 对文件名进行URL解码，处理特殊字符
			
 
				+        decoded_filename = urllib.parse.unquote(filename)
			
 
				+        
			
 
				+        # 检查MIME类型
			
 
				+        mime_type, _ = mimetypes.guess_type(decoded_filename)
			
 
				+        print(f"Detected MIME type: {mime_type}")
			
 
				+        
			
 
				+        # 某些情况下mime_type可能为None，需要特殊处理
			
 
				+        if mime_type is None:
			
 
				+            # 只检查文件内容的PDF标识
			
 
				+            file_start = file.read(5)
			
 
				+            file.seek(0)  # 重置文件指针
			
 
				+            return file_start.startswith(b'%PDF-')
			
 
				+            
			
 
				+        if mime_type != 'application/pdf':
			
 
				+            return False
			
 
				+
			
 
				+        # 检查文件内容的PDF标识
			
 
				+        file_start = file.read(5)
			
 
				+        file.seek(0)  # 重置文件指针
			
 
				+        if not file_start.startswith(b'%PDF-'):
			
 
				+            return False
			
 
				+
			
 
				+        return True
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(f"Error checking PDF format: {str(e)}")
			
 
				+        # 发生错误时，仍然尝试通过文件头判断
			
 
				+        try:
			
 
				+            file_start = file.read(5)
			
 
				+            file.seek(0)
			
 
				+            return file_start.startswith(b'%PDF-')
			
 
				+        except:
			
 
				+            return False
			
 
				 
			
 
				 
			
 
				 def url_is_pdf(file):
			
--- a/signatures/version1/cla.json
+++ b/signatures/version1/cla.json
@@ -143,6 +143,14 @@
 
				       "created_at": "2025-01-20T05:30:38Z",
			
 
				       "repoId": 765083837,
			
 
				       "pullRequestNo": 1578
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "shniubobo",
			
 
				+      "id": 6594544,
			
 
				+      "comment_id": 2660086464,
			
 
				+      "created_at": "2025-02-14T19:15:25Z",
			
 
				+      "repoId": 765083837,
			
 
				+      "pullRequestNo": 1693
			
 
				     }
			
 
				   ]
			
 
				 }