| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640 |
- # coding=utf8
- import sys
- import time
- from urllib.parse import quote
- from magic_pdf.dict2md.ocr_mkcontent import (
- ocr_mk_nlp_markdown,
- ocr_mk_mm_markdown,
- ocr_mk_mm_standard_format,
- ocr_mk_mm_markdown_with_para,
- )
- from magic_pdf.libs.commons import (
- read_file,
- join_path,
- parse_bucket_key,
- formatted_time,
- s3_image_save_path,
- )
- from magic_pdf.libs.drop_reason import DropReason
- from magic_pdf.libs.json_compressor import JsonCompressor
- from magic_pdf.dict2md.mkcontent import mk_nlp_markdown, mk_universal_format
- from magic_pdf.pdf_parse_by_model import parse_pdf_by_model
- from magic_pdf.filter.pdf_classify_by_type import classify
- from magic_pdf.filter.pdf_meta_scan import pdf_meta_scan
- from loguru import logger
- from magic_pdf.pdf_parse_for_train import parse_pdf_for_train
- rom magic_pdf.train_utils.convert_to_train_format import convert_to_train_format
- from app.common.s3 import get_s3_config, get_s3_client
- from magic_pdf.pdf_parse_by_ocr import parse_pdf_by_ocr
- def exception_handler(jso: dict, e):
- logger.exception(e)
- jso["need_drop"] = True
- jso["drop_reason"] = DropReason.Exception
- jso["exception"] = f"ERROR: {e}"
- return jso
- def get_data_type(jso: dict):
- data_type = jso.get("data_type")
- if data_type is None:
- data_type = jso.get("file_type")
- return data_type
- def get_bookid(jso: dict):
- book_id = jso.get("bookid")
- if book_id is None:
- book_id = jso.get("original_file_id")
- return book_id
- def get_data_source(jso: dict):
- data_source = jso.get("data_source")
- if data_source is None:
- data_source = jso.get("file_source")
- return data_source
- def meta_scan(jso: dict, doc_layout_check=True) -> dict:
- s3_pdf_path = jso.get("file_location")
- s3_config = get_s3_config(s3_pdf_path)
- if doc_layout_check:
- if (
- "doc_layout_result" not in jso
- ): # 检测json中是存在模型数据,如果没有则需要跳过该pdf
- jso["need_drop"] = True
- jso["drop_reason"] = DropReason.MISS_DOC_LAYOUT_RESULT
- return jso
- try:
- data_source = get_data_source(jso)
- file_id = jso.get("file_id")
- book_name = f"{data_source}/{file_id}"
- # 首页存在超量drawing问题
- # special_pdf_list = ['zlib/zlib_21822650']
- # if book_name in special_pdf_list:
- # jso['need_drop'] = True
- # jso['drop_reason'] = DropReason.SPECIAL_PDF
- # return jso
- start_time = time.time() # 记录开始时间
- logger.info(
- f"book_name is:{book_name},start_time is:{formatted_time(start_time)}",
- file=sys.stderr,
- )
- file_content = read_file(s3_pdf_path, s3_config)
- read_file_time = int(time.time() - start_time) # 计算执行时间
- start_time = time.time() # 记录开始时间
- res = pdf_meta_scan(s3_pdf_path, file_content)
- if res.get(
- "need_drop", False
- ): # 如果返回的字典里有need_drop,则提取drop_reason并跳过本次解析
- jso["need_drop"] = True
- jso["drop_reason"] = res["drop_reason"]
- else: # 正常返回
- jso["pdf_meta"] = res
- jso["content"] = ""
- jso["remark"] = ""
- jso["data_url"] = ""
- end_time = time.time() # 记录结束时间
- meta_scan_time = int(end_time - start_time) # 计算执行时间
- logger.info(
- f"book_name is:{book_name},end_time is:{formatted_time(end_time)},read_file_time is:{read_file_time},meta_scan_time is:{meta_scan_time}",
- file=sys.stderr,
- )
- jso["read_file_time"] = read_file_time
- jso["meta_scan_time"] = meta_scan_time
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def classify_by_type(jso: dict, debug_mode=False) -> dict:
- # 检测debug开关
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- return jso
- # 开始正式逻辑
- try:
- pdf_meta = jso.get("pdf_meta")
- data_source = get_data_source(jso)
- file_id = jso.get("file_id")
- book_name = f"{data_source}/{file_id}"
- total_page = pdf_meta["total_page"]
- page_width = pdf_meta["page_width_pts"]
- page_height = pdf_meta["page_height_pts"]
- img_sz_list = pdf_meta["image_info_per_page"]
- img_num_list = pdf_meta["imgs_per_page"]
- text_len_list = pdf_meta["text_len_per_page"]
- text_layout_list = pdf_meta["text_layout_per_page"]
- text_language = pdf_meta["text_language"]
- # allow_language = ['zh', 'en'] # 允许的语言,目前只允许简中和英文的
- # if text_language not in allow_language: # 如果语言不在允许的语言中,则drop
- # jso['need_drop'] = True
- # jso['drop_reason'] = DropReason.NOT_ALLOW_LANGUAGE
- # return jso
- pdf_path = pdf_meta["pdf_path"]
- is_encrypted = pdf_meta["is_encrypted"]
- is_needs_password = pdf_meta["is_needs_password"]
- if (
- is_encrypted or is_needs_password
- ): # 加密的,需要密码的,没有页面的,都不处理
- jso["need_drop"] = True
- jso["drop_reason"] = DropReason.ENCRYPTED
- else:
- start_time = time.time() # 记录开始时间
- is_text_pdf, results = classify(
- pdf_path,
- total_page,
- page_width,
- page_height,
- img_sz_list,
- text_len_list,
- img_num_list,
- text_layout_list,
- )
- classify_time = int(time.time() - start_time) # 计算执行时间
- if is_text_pdf:
- pdf_meta["is_text_pdf"] = is_text_pdf
- jso["pdf_meta"] = pdf_meta
- jso["classify_time"] = classify_time
- # print(json.dumps(pdf_meta, ensure_ascii=False))
- allow_language = ["zh", "en"] # 允许的语言,目前只允许简中和英文的
- if (
- text_language not in allow_language
- ): # 如果语言不在允许的语言中,则drop
- jso["need_drop"] = True
- jso["drop_reason"] = DropReason.NOT_ALLOW_LANGUAGE
- return jso
- else:
- # 先不drop
- pdf_meta["is_text_pdf"] = is_text_pdf
- jso["pdf_meta"] = pdf_meta
- jso["classify_time"] = classify_time
- jso["need_drop"] = True
- jso["drop_reason"] = DropReason.NOT_IS_TEXT_PDF
- extra_info = {"classify_rules": []}
- for condition, result in results.items():
- if not result:
- extra_info["classify_rules"].append(condition)
- jso["extra_info"] = extra_info
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def save_tables_to_s3(jso: dict, debug_mode=False) -> dict:
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- logger.info(
- f"book_name is:{get_data_source(jso)}/{jso['file_id']} need drop",
- file=sys.stderr,
- )
- jso["dropped"] = True
- return jso
- try:
- data_source = get_data_source(jso)
- file_id = jso.get("file_id")
- book_name = f"{data_source}/{file_id}"
- title = jso.get("title")
- url_encode_title = quote(title, safe="")
- if data_source != "scihub":
- return jso
- pdf_intermediate_dict = jso["pdf_intermediate_dict"]
- # 将 pdf_intermediate_dict 解压
- pdf_intermediate_dict = JsonCompressor.decompress_json(pdf_intermediate_dict)
- i = 0
- for page in pdf_intermediate_dict.values():
- if page.get("tables"):
- if len(page["tables"]) > 0:
- j = 0
- for table in page["tables"]:
- if debug_mode:
- image_path = join_path(
- "s3://mllm-raw-media/pdf2md_img/",
- book_name,
- table["image_path"],
- )
- else:
- image_path = join_path(
- "s3://mllm-raw-media/pdf2md_img/", table["image_path"]
- )
- if image_path.endswith(".jpg"):
- j += 1
- s3_client = get_s3_client(image_path)
- bucket_name, bucket_key = parse_bucket_key(image_path)
- # 通过s3_client获取图片到内存
- image_bytes = s3_client.get_object(
- Bucket=bucket_name, Key=bucket_key
- )["Body"].read()
- # 保存图片到新的位置
- if debug_mode:
- new_image_path = join_path(
- "s3://mllm-raw-media/pdf2md_img/table_new/",
- url_encode_title
- + "_"
- + table["image_path"].lstrip("tables/"),
- )
- else:
- new_image_path = join_path(
- "s3://mllm-raw-media/pdf2md_img/table_new/",
- url_encode_title + f"_page{i}_{j}.jpg",
- )
- logger.info(new_image_path, file=sys.stderr)
- bucket_name, bucket_key = parse_bucket_key(new_image_path)
- s3_client.put_object(
- Bucket=bucket_name, Key=bucket_key, Body=image_bytes
- )
- else:
- continue
- i += 1
- # 把无用的信息清空
- jso["doc_layout_result"] = ""
- jso["pdf_intermediate_dict"] = ""
- jso["pdf_meta"] = ""
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def drop_needdrop_pdf(jso: dict) -> dict:
- if jso.get("need_drop", False):
- logger.info(
- f"book_name is:{get_data_source(jso)}/{jso['file_id']} need drop",
- file=sys.stderr,
- )
- jso["dropped"] = True
- return jso
- def pdf_intermediate_dict_to_markdown(jso: dict, debug_mode=False) -> dict:
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- book_name = join_path(get_data_source(jso), jso["file_id"])
- logger.info(f"book_name is:{book_name} need drop", file=sys.stderr)
- jso["dropped"] = True
- return jso
- try:
- pdf_intermediate_dict = jso["pdf_intermediate_dict"]
- # 将 pdf_intermediate_dict 解压
- pdf_intermediate_dict = JsonCompressor.decompress_json(pdf_intermediate_dict)
- # markdown_content = mk_nlp_markdown(pdf_intermediate_dict)
- jso["content_list"] = mk_universal_format(pdf_intermediate_dict)
- # jso["content"] = markdown_content
- logger.info(f"book_name is:{get_data_source(jso)}/{jso['file_id']}")
- # 把无用的信息清空
- jso["doc_layout_result"] = ""
- jso["pdf_intermediate_dict"] = ""
- jso["pdf_meta"] = ""
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def parse_pdf(jso: dict, start_page_id=0, debug_mode=False) -> dict:
- # 检测debug开关
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- return jso
- # 开始正式逻辑
- s3_pdf_path = jso.get("file_location")
- s3_config = get_s3_config(s3_pdf_path)
- model_output_json_list = jso.get("doc_layout_result")
- data_source = get_data_source(jso)
- file_id = jso.get("file_id")
- book_name = f"{data_source}/{file_id}"
- # 1.23.22已修复
- # if debug_mode:
- # pass
- # else:
- # if book_name == "zlib/zlib_21929367":
- # jso['need_drop'] = True
- # jso['drop_reason'] = DropReason.SPECIAL_PDF
- # return jso
- junk_img_bojids = jso["pdf_meta"]["junk_img_bojids"]
- # total_page = jso['pdf_meta']['total_page']
- # 增加检测 max_svgs 数量的检测逻辑,如果 max_svgs 超过3000则drop
- svgs_per_page_list = jso["pdf_meta"]["svgs_per_page"]
- max_svgs = max(svgs_per_page_list)
- if max_svgs > 3000:
- jso["need_drop"] = True
- jso["drop_reason"] = DropReason.HIGH_COMPUTATIONAL_lOAD_BY_SVGS
- # elif total_page > 1000:
- # jso['need_drop'] = True
- # jso['drop_reason'] = DropReason.HIGH_COMPUTATIONAL_lOAD_BY_TOTAL_PAGES
- else:
- try:
- save_path = s3_image_save_path
- image_s3_config = get_s3_config(save_path)
- start_time = time.time() # 记录开始时间
- # 先打印一下book_name和解析开始的时间
- logger.info(
- f"book_name is:{book_name},start_time is:{formatted_time(start_time)}",
- file=sys.stderr,
- )
- pdf_info_dict = parse_pdf_by_model(
- s3_pdf_path,
- s3_config,
- model_output_json_list,
- save_path,
- book_name,
- pdf_model_profile=None,
- image_s3_config=image_s3_config,
- start_page_id=start_page_id,
- junk_img_bojids=junk_img_bojids,
- debug_mode=debug_mode,
- )
- if pdf_info_dict.get(
- "need_drop", False
- ): # 如果返回的字典里有need_drop,则提取drop_reason并跳过本次解析
- jso["need_drop"] = True
- jso["drop_reason"] = pdf_info_dict["drop_reason"]
- else: # 正常返回,将 pdf_info_dict 压缩并存储
- pdf_info_dict = JsonCompressor.compress_json(pdf_info_dict)
- jso["pdf_intermediate_dict"] = pdf_info_dict
- end_time = time.time() # 记录完成时间
- parse_time = int(end_time - start_time) # 计算执行时间
- # 解析完成后打印一下book_name和耗时
- logger.info(
- f"book_name is:{book_name},end_time is:{formatted_time(end_time)},cost_time is:{parse_time}",
- file=sys.stderr,
- )
- jso["parse_time"] = parse_time
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- """
- 统一处理逻辑
- 1.先调用parse_pdf对文本类pdf进行处理
- 2.再调用ocr_dropped_parse_pdf,对之前drop的pdf进行处理
- """
- def uni_parse_pdf(jso: dict, start_page_id=0, debug_mode=False) -> dict:
- jso = parse_pdf(jso, start_page_id=start_page_id, debug_mode=debug_mode)
- jso = ocr_dropped_parse_pdf(jso, start_page_id=start_page_id, debug_mode=debug_mode)
- return jso
- # 专门用来跑被drop的pdf,跑完之后需要把need_drop字段置为false
- def ocr_dropped_parse_pdf(jso: dict, start_page_id=0, debug_mode=False) -> dict:
- if not jso.get("need_drop", False):
- return jso
- else:
- jso = ocr_parse_pdf_core(
- jso, start_page_id=start_page_id, debug_mode=debug_mode
- )
- jso["need_drop"] = False
- return jso
- def ocr_parse_pdf(jso: dict, start_page_id=0, debug_mode=False) -> dict:
- # 检测debug开关
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- return jso
- jso = ocr_parse_pdf_core(jso, start_page_id=start_page_id, debug_mode=debug_mode)
- return jso
- def ocr_parse_pdf_core(jso: dict, start_page_id=0, debug_mode=False) -> dict:
- s3_pdf_path = jso.get("file_location")
- s3_config = get_s3_config(s3_pdf_path)
- model_output_json_list = jso.get("doc_layout_result")
- data_source = get_data_source(jso)
- file_id = jso.get("file_id")
- book_name = f"{data_source}/{file_id}"
- try:
- save_path = s3_image_save_path
- image_s3_config = get_s3_config(save_path)
- start_time = time.time() # 记录开始时间
- # 先打印一下book_name和解析开始的时间
- logger.info(
- f"book_name is:{book_name},start_time is:{formatted_time(start_time)}",
- file=sys.stderr,
- )
- pdf_info_dict = parse_pdf_by_ocr(
- s3_pdf_path,
- s3_config,
- model_output_json_list,
- save_path,
- book_name,
- pdf_model_profile=None,
- image_s3_config=image_s3_config,
- start_page_id=start_page_id,
- debug_mode=debug_mode,
- )
- pdf_info_dict = JsonCompressor.compress_json(pdf_info_dict)
- jso["pdf_intermediate_dict"] = pdf_info_dict
- end_time = time.time() # 记录完成时间
- parse_time = int(end_time - start_time) # 计算执行时间
- # 解析完成后打印一下book_name和耗时
- logger.info(
- f"book_name is:{book_name},end_time is:{formatted_time(end_time)},cost_time is:{parse_time}",
- file=sys.stderr,
- )
- jso["parse_time"] = parse_time
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def ocr_pdf_intermediate_dict_to_markdown(jso: dict, debug_mode=False) -> dict:
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- book_name = join_path(get_data_source(jso), jso["file_id"])
- logger.info(f"book_name is:{book_name} need drop", file=sys.stderr)
- jso["dropped"] = True
- return jso
- try:
- pdf_intermediate_dict = jso["pdf_intermediate_dict"]
- # 将 pdf_intermediate_dict 解压
- pdf_intermediate_dict = JsonCompressor.decompress_json(pdf_intermediate_dict)
- markdown_content = ocr_mk_mm_markdown(pdf_intermediate_dict)
- jso["content"] = markdown_content
- logger.info(
- f"book_name is:{get_data_source(jso)}/{jso['file_id']},markdown content length is {len(markdown_content)}",
- file=sys.stderr,
- )
- # 把无用的信息清空
- jso["doc_layout_result"] = ""
- jso["pdf_intermediate_dict"] = ""
- jso["pdf_meta"] = ""
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def ocr_pdf_intermediate_dict_to_markdown_with_para_for_qa(
- jso: dict, debug_mode=False
- ) -> dict:
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- book_name = join_path(get_data_source(jso), jso["file_id"])
- logger.info(f"book_name is:{book_name} need drop", file=sys.stderr)
- jso["dropped"] = True
- return jso
- try:
- pdf_intermediate_dict = jso["pdf_intermediate_dict"]
- # 将 pdf_intermediate_dict 解压
- pdf_intermediate_dict = JsonCompressor.decompress_json(pdf_intermediate_dict)
- markdown_content = ocr_mk_mm_markdown_with_para(pdf_intermediate_dict)
- jso["content_ocr"] = markdown_content
- logger.info(
- f"book_name is:{get_data_source(jso)}/{jso['file_id']},markdown content length is {len(markdown_content)}",
- file=sys.stderr,
- )
- # 把无用的信息清空
- jso["doc_layout_result"] = ""
- jso["pdf_intermediate_dict"] = ""
- jso["pdf_meta"] = ""
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def ocr_pdf_intermediate_dict_to_standard_format(jso: dict, debug_mode=False) -> dict:
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- book_name = join_path(get_data_source(jso), jso["file_id"])
- logger.info(f"book_name is:{book_name} need drop", file=sys.stderr)
- jso["dropped"] = True
- return jso
- try:
- pdf_intermediate_dict = jso["pdf_intermediate_dict"]
- # 将 pdf_intermediate_dict 解压
- pdf_intermediate_dict = JsonCompressor.decompress_json(pdf_intermediate_dict)
- standard_format = ocr_mk_mm_standard_format(pdf_intermediate_dict)
- jso["content_list"] = standard_format
- logger.info(
- f"book_name is:{get_data_source(jso)}/{jso['file_id']},content_list length is {len(standard_format)}",
- file=sys.stderr,
- )
- # 把无用的信息清空
- jso["doc_layout_result"] = ""
- jso["pdf_intermediate_dict"] = ""
- jso["pdf_meta"] = ""
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- def parse_pdf_for_model_train(jso: dict, start_page_id=0, debug_mode=False) -> dict:
- # 检测debug开关
- if debug_mode:
- pass
- else: # 如果debug没开,则检测是否有needdrop字段
- if jso.get("need_drop", False):
- return jso
- # 开始正式逻辑
- s3_pdf_path = jso.get("file_location")
- s3_config = get_s3_config(s3_pdf_path)
- model_output_json_list = jso.get("doc_layout_result")
- data_source = get_data_source(jso)
- file_id = jso.get("file_id")
- book_name = f"{data_source}/{file_id}"
- # 1.23.22已修复
- # if debug_mode:
- # pass
- # else:
- # if book_name == "zlib/zlib_21929367":
- # jso['need_drop'] = True
- # jso['drop_reason'] = DropReason.SPECIAL_PDF
- # return jso
- junk_img_bojids = jso["pdf_meta"]["junk_img_bojids"]
- # total_page = jso['pdf_meta']['total_page']
- # 增加检测 max_svgs 数量的检测逻辑,如果 max_svgs 超过3000则drop
- svgs_per_page_list = jso["pdf_meta"]["svgs_per_page"]
- max_svgs = max(svgs_per_page_list)
- if max_svgs > 3000:
- jso["need_drop"] = True
- jso["drop_reason"] = DropReason.HIGH_COMPUTATIONAL_lOAD_BY_SVGS
- # elif total_page > 1000:
- # jso['need_drop'] = True
- # jso['drop_reason'] = DropReason.HIGH_COMPUTATIONAL_lOAD_BY_TOTAL_PAGES
- else:
- try:
- save_path = s3_image_save_path
- image_s3_config = get_s3_config(save_path)
- start_time = time.time() # 记录开始时间
- # 先打印一下book_name和解析开始的时间
- logger.info(
- f"book_name is:{book_name},start_time is:{formatted_time(start_time)}",
- file=sys.stderr,
- )
- pdf_info_dict = parse_pdf_for_train(
- s3_pdf_path,
- s3_config,
- model_output_json_list,
- save_path,
- book_name,
- pdf_model_profile=None,
- image_s3_config=image_s3_config,
- start_page_id=start_page_id,
- junk_img_bojids=junk_img_bojids,
- debug_mode=debug_mode,
- )
- if pdf_info_dict.get(
- "need_drop", False
- ): # 如果返回的字典里有need_drop,则提取drop_reason并跳过本次解析
- jso["need_drop"] = True
- jso["drop_reason"] = pdf_info_dict["drop_reason"]
- else: # 正常返回,将 pdf_info_dict 压缩并存储
- pdf_info_dict = JsonCompressor.compress_json(pdf_info_dict)
- jso["pdf_intermediate_dict"] = pdf_info_dict
- jso["parsed_results"] = convert_to_train_format(pdf_info_dict)
- end_time = time.time() # 记录完成时间
- parse_time = int(end_time - start_time) # 计算执行时间
- # 解析完成后打印一下book_name和耗时
- logger.info(
- f"book_name is:{book_name},end_time is:{formatted_time(end_time)},cost_time is:{parse_time}",
- file=sys.stderr,
- )
- jso["parse_time"] = parse_time
- except Exception as e:
- jso = exception_handler(jso, e)
- return jso
- if __name__ == "__main__":
- pass
|