Selaa lähdekoodia

fix(web_api): Disallow both `pdf_file` and `pdf_path`

Also add some documentation.
shniubobo 9 kuukautta sitten
vanhempi
commit
792e08cfb2
1 muutettua tiedostoa jossa 26 lisäystä ja 3 poistoa
  1. 26 3
      projects/web_api/app.py

+ 26 - 3
projects/web_api/app.py

@@ -151,10 +151,33 @@ async def pdf_parse(
     return_info: bool = False,
     return_content_list: bool = False,
 ):
+    """
+    Execute the process of converting PDF to JSON and MD, outputting MD and JSON files
+    to the specified directory.
+
+    :param pdf_file: The PDF file to be parsed. Must not be specified together with
+        `pdf_path`
+    :param pdf_path: The path to the PDF file to be parsed. Must not be specified
+        together with `pdf_file`
+    :param parse_method: Parsing method, can be auto, ocr, or txt. Default is auto. If
+        results are not satisfactory, try ocr
+    :param is_json_md_dump: Whether to write parsed data to .json and .md files. Default
+        is True. Different stages of data will be written to different .json files (3 in
+        total), md content will be saved to .md file
+    :param output_dir: Output directory for results. A folder named after the PDF file
+        will be created to store all results
+    :param return_layout: Whether to return parsed PDF layout. Default to False
+    :param return_info: Whether to return parsed PDF info. Default to False
+    :param return_content_list: Whether to return parsed PDF content list. Default to
+        False
+    """
     try:
-        if pdf_file is None and pdf_path is None:
-            raise HTTPException(
-                status_code=400, detail="Must provide either pdf_file or pdf_path"
+        if (pdf_file is None and pdf_path is None) or (
+            pdf_file is not None and pdf_path is not None
+        ):
+            return JSONResponse(
+                content={"error": "Must provide either pdf_file or pdf_path"},
+                status_code=400,
             )
 
         # Get PDF filename