10 月之前 · 7dc9ba6389
--- a/api_examples/pipelines/test_video_classification.py
+++ b/api_examples/pipelines/test_video_classification.py
@@ -0,0 +1,30 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+pipeline = create_pipeline(pipeline="video_classification")
			
 
				+
			
 
				+output = pipeline.predict("./test_samples/general_video_classification_001.mp4", topk=8)
			
 
				+# input_list = ["./test_samples/general_video_classification_001.mp4",
			
 
				+#                 "./test_samples/__lt03EF4ao.mkv",
			
 
				+#                 "./test_samples/__u6odV5hrI.mp4",
			
 
				+#                 "./test_samples/_8c2EG7JDOw.mp4.webm",]
			
 
				+# output = pipeline.predict(input_list, topk=5)
			
 
				+
			
 
				+for res in output:
			
 
				+    print(res)
			
 
				+    res.print()  ## 打印预测的结构化输出
			
 
				+    res.save_to_video("./output/")  ## 保存结果可视化视频
			
 
				+    res.save_to_json("./output/")  ## 保存预测的结构化输出
			
--- a/paddlex/configs/pipelines/video_classification.yaml
+++ b/paddlex/configs/pipelines/video_classification.yaml
@@ -0,0 +1,9 @@
 
				+pipeline_name: video_classification
			
 
				+
			
 
				+SubModules:
			
 
				+  VideoClassification:
			
 
				+    module_name: video_classification
			
 
				+    model_name: PP-TSMv2-LCNetV2_8frames_uniform
			
 
				+    model_dir: null
			
 
				+    batch_size: 1    
			
 
				+    topk: 1
			
--- a/paddlex/inference/common/result/base_video_result.py
+++ b/paddlex/inference/common/result/base_video_result.py
@@ -14,7 +14,6 @@
 
				 
			
 
				 from .base_result import BaseResult
			
 
				 from .mixin import StrMixin, JsonMixin, ImgMixin, VideoMixin
			
 
				-from ...utils.io import VideoReader, VideoWriter
			
 
				 
			
 
				 
			
 
				 class BaseVideoResult(BaseResult, StrMixin, JsonMixin, VideoMixin):
			
@@ -32,8 +31,6 @@ class BaseVideoResult(BaseResult, StrMixin, JsonMixin, VideoMixin):
 
				         Raises:
			
 
				             AssertionError: If the required key (`BaseVideoResult.INPUT_IMG_KEY`) are not found in the data.
			
 
				         """
			
 
				-        self._video_reader = VideoReader(backend="decord")
			
 
				-        self._video_writer = VideoWriter(backend="opencv")
			
 
				 
			
 
				         super().__init__(data)
			
 
				         StrMixin.__init__(self)
			
--- a/paddlex/inference/common/result/mixin.py
+++ b/paddlex/inference/common/result/mixin.py
@@ -456,7 +456,7 @@ class XlsxMixin:
 
				 
			
 
				 class VideoMixin:
			
 
				     def __init__(self, backend="opencv", *args, **kwargs):
			
 
				-        self._video_writer = VideoWriter(backend=backend, *args, **kwargs)
			
 
				+        self._backend = backend
			
 
				         self._save_funcs.append(self.save_to_video)
			
 
				 
			
 
				     @abstractmethod
			
@@ -469,9 +469,8 @@ class VideoMixin:
 
				         return video
			
 
				 
			
 
				     def save_to_video(self, save_path, *args, **kwargs):
			
 
				-        if not str(save_path).lower().endswith((".mp4", ".avi", ".mkv")):
			
 
				+        video_writer = VideoWriter(backend=self._backend, *args, **kwargs)
			
 
				+        if not str(save_path).lower().endswith((".mp4", ".avi", ".mkv", ".webm")):
			
 
				             fp = Path(self["input_path"])
			
 
				             save_path = Path(save_path) / f"{fp.stem}{fp.suffix}"
			
 
				-        _save_list_data(
			
 
				-            self._video_writer.write, save_path, self.video, *args, **kwargs
			
 
				-        )
			
 
				+        _save_list_data(video_writer.write, save_path, self.video, *args, **kwargs)
			
--- a/paddlex/inference/models_new/video_classification/processors.py
+++ b/paddlex/inference/models_new/video_classification/processors.py
@@ -347,9 +347,9 @@ class VideoClasTopk:
 
				         Returns:
			
 
				             np.ndarray: The softmax-transformed data.
			
 
				         """
			
 
				-        exp_data = np.exp(data - np.max(data))
			
 
				-        softmax_data = exp_data / np.sum(exp_data)
			
 
				-        return softmax_data
			
 
				+        x_max = np.max(data, axis=-1, keepdims=True)
			
 
				+        e_x = np.exp(data - x_max)
			
 
				+        return e_x / np.sum(e_x, axis=-1, keepdims=True)
			
 
				 
			
 
				     def _parse_class_id_map(
			
 
				         self, class_ids: Optional[Sequence[Union[str, int]]]
			
@@ -384,7 +384,7 @@ class VideoClasTopk:
 
				                 - A list of arrays of scores for the top-k predictions.
			
 
				                 - A list of lists of label names for the top-k predictions.
			
 
				         """
			
 
				-        preds = self.softmax(preds)
			
 
				+        preds[0] = self.softmax(preds[0])
			
 
				         indexes = preds[0].argsort(axis=1)[:, -topk:][:, ::-1].astype("int32")
			
 
				         scores = [
			
 
				             np.around(pred[index], decimals=5) for pred, index in zip(preds[0], indexes)
			
--- a/paddlex/inference/models_new/video_classification/result.py
+++ b/paddlex/inference/models_new/video_classification/result.py
@@ -19,6 +19,7 @@ from PIL import Image, ImageDraw, ImageFont
 
				 
			
 
				 from ....utils.fonts import PINGFANG_FONT_FILE_PATH
			
 
				 from ...utils.color_map import get_colormap
			
 
				+from ...utils.io import VideoReader
			
 
				 from ...common.result import BaseVideoResult
			
 
				 
			
 
				 
			
@@ -28,7 +29,7 @@ class TopkVideoResult(BaseVideoResult):
 
				         """Draw label on image"""
			
 
				         labels = self.get("label_names", self["class_ids"])
			
 
				         label_str = f"{labels[0]} {self['scores'][0]:.2f}"
			
 
				-        video_reader = self._video_reader
			
 
				+        video_reader = VideoReader(backend="decord")
			
 
				         video = video_reader.read(self["input_path"])
			
 
				         video = list(video)
			
 
				         write_fps = video_reader.get_fps()
			
--- a/paddlex/inference/pipelines_new/__init__.py
+++ b/paddlex/inference/pipelines_new/__init__.py
@@ -25,6 +25,7 @@ from .pp_chatocr import PP_ChatOCRv3_Pipeline, PP_ChatOCRv4_Pipeline
 
				 from .image_classification import ImageClassificationPipeline
			
 
				 from .seal_recognition import SealRecognitionPipeline
			
 
				 from .table_recognition import TableRecognitionPipeline
			
 
				+from .video_classification import VideoClassificationPipeline
			
 
				 
			
 
				 
			
 
				 def get_pipeline_path(pipeline_name: str) -> str:
			
--- a/paddlex/inference/pipelines_new/video_classification/__init__.py
+++ b/paddlex/inference/pipelines_new/video_classification/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .pipeline import VideoClassificationPipeline
			
--- a/paddlex/inference/pipelines_new/video_classification/pipeline.py
+++ b/paddlex/inference/pipelines_new/video_classification/pipeline.py
@@ -0,0 +1,73 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from typing import Any, Dict, Optional, Union
			
 
				+import numpy as np
			
 
				+from ...utils.pp_option import PaddlePredictorOption
			
 
				+from ..base import BasePipeline
			
 
				+
			
 
				+# [TODO] 待更新models_new到models
			
 
				+from ...models_new.video_classification.result import TopkVideoResult
			
 
				+
			
 
				+
			
 
				+class VideoClassificationPipeline(BasePipeline):
			
 
				+    """Video Classification Pipeline"""
			
 
				+
			
 
				+    entities = "video_classification"
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        config: Dict,
			
 
				+        device: str = None,
			
 
				+        pp_option: PaddlePredictorOption = None,
			
 
				+        use_hpip: bool = False,
			
 
				+        hpi_params: Optional[Dict[str, Any]] = None,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Initializes the class with given configurations and options.
			
 
				+
			
 
				+        Args:
			
 
				+            config (Dict): Configuration dictionary containing model and other parameters.
			
 
				+            device (str): The device to run the prediction on. Default is None.
			
 
				+            pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
			
 
				+            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
			
 
				+            hpi_params (Optional[Dict[str, Any]]): HPIP specific parameters. Default is None.
			
 
				+        """
			
 
				+        super().__init__(
			
 
				+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_params=hpi_params
			
 
				+        )
			
 
				+
			
 
				+        video_classification_model_config = config["SubModules"]["VideoClassification"]
			
 
				+        self.video_classification_model = self.create_model(
			
 
				+            video_classification_model_config
			
 
				+        )
			
 
				+
			
 
				+    def predict(
			
 
				+        self,
			
 
				+        input: str | list[str] | np.ndarray | list[np.ndarray],
			
 
				+        topk: Union[int, None] = 1,
			
 
				+        **kwargs
			
 
				+    ) -> TopkVideoResult:
			
 
				+        """Predicts video classification results for the given input.
			
 
				+
			
 
				+        Args:
			
 
				+            input (str | list[str] | np.ndarray | list[np.ndarray]): The input image(s) or path(s) to the images.
			
 
				+            topk: Union[int, None]: The number of top predictions to return. Defaults to 1.
			
 
				+            **kwargs: Additional keyword arguments that can be passed to the function.
			
 
				+
			
 
				+        Returns:
			
 
				+            TopkVideoResult: The predicted top k results.
			
 
				+        """
			
 
				+
			
 
				+        yield from self.video_classification_model(input, topk=topk)