Browse Source

添加文件类型识别

chaixuhong 2 weeks ago
parent
commit
1f7a3a6747

+ 1 - 4
.gitignore

@@ -8,10 +8,7 @@ target/
 **.log
 logs/
 ### IntelliJ IDEA ###
-.idea/modules.xml
-.idea/jarRepositories.xml
-.idea/compiler.xml
-.idea/libraries/
+.idea/
 *.iws
 *.iml
 *.ipr

+ 0 - 10
.idea/.gitignore

@@ -1,10 +0,0 @@
-# 默认忽略的文件
-/shelf/
-/workspace.xml
-# 已忽略包含查询文件的默认文件夹
-/queries/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
-# 基于编辑器的 HTTP 客户端请求
-/httpRequests/

+ 0 - 13
.idea/encodings.xml

@@ -1,13 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Encoding">
-    <file url="file://$PROJECT_DIR$/schedule-consumer/src/main/java" charset="UTF-8" />
-    <file url="file://$PROJECT_DIR$/schedule-consumer/src/main/resources" charset="UTF-8" />
-    <file url="file://$PROJECT_DIR$/schedule-manager/src/main/java" charset="UTF-8" />
-    <file url="file://$PROJECT_DIR$/schedule-manager/src/main/resources" charset="UTF-8" />
-    <file url="file://$PROJECT_DIR$/schedule-producer/src/main/java" charset="UTF-8" />
-    <file url="file://$PROJECT_DIR$/schedule-producer/src/main/resources" charset="UTF-8" />
-    <file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
-    <file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
-  </component>
-</project>

+ 0 - 8
.idea/inspectionProfiles/Project_Default.xml

@@ -1,8 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <profile version="1.0">
-    <option name="myName" value="Project Default" />
-    <inspection_tool class="AutoCloseableResource" enabled="true" level="WARNING" enabled_by_default="true">
-      <option name="METHOD_MATCHER_CONFIG" value="java.util.Formatter,format,java.io.Writer,append,com.google.common.base.Preconditions,checkNotNull,org.hibernate.Session,close,java.io.PrintWriter,printf,java.io.PrintStream,printf,java.lang.foreign.Arena,ofAuto,java.lang.foreign.Arena,global,cn.com.yusys.manager.parser.DockerInstanceManager,dockerClient" />
-    </inspection_tool>
-  </profile>
-</component>

+ 0 - 14
.idea/misc.xml

@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ExternalStorageConfigurationManager" enabled="true" />
-  <component name="MavenProjectsManager">
-    <option name="originalFiles">
-      <list>
-        <option value="$PROJECT_DIR$/pom.xml" />
-      </list>
-    </option>
-  </component>
-  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="temurin-1.8" project-jdk-type="JavaSDK">
-    <output url="file://$PROJECT_DIR$/out" />
-  </component>
-</project>

+ 0 - 6
.idea/vcs.xml

@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>

+ 27 - 1
schedule-producer/pom.xml

@@ -37,6 +37,32 @@
             <groupId>org.projectlombok</groupId>
             <artifactId>lombok</artifactId>
         </dependency>
+        <dependency>
+            <groupId>com.github.librepdf</groupId>
+            <artifactId>openpdf</artifactId>
+            <version>1.3.32</version>
+        </dependency>
+        <dependency>
+            <groupId>com.drewnoakes</groupId>
+            <artifactId>metadata-extractor</artifactId>
+            <version>2.19.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org</groupId>
+            <artifactId>jaudiotagger</artifactId>
+            <version>2.0.3</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.googlecode.mp4parser</groupId>
+            <artifactId>isoparser</artifactId>
+            <version>1.1.22</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>
@@ -56,4 +82,4 @@
         </plugins>
     </build>
 
-</project>
+</project>

+ 452 - 0
schedule-producer/src/main/java/cn/com/yusys/producer/util/FileMetadataUtil.java

@@ -0,0 +1,452 @@
+package cn.com.yusys.producer.util;
+
+import com.coremedia.iso.IsoFile;
+import com.coremedia.iso.boxes.MovieHeaderBox;
+import com.coremedia.iso.boxes.MovieBox;
+import com.drew.imaging.ImageMetadataReader;
+import com.drew.lang.GeoLocation;
+import com.drew.metadata.Metadata;
+import com.drew.metadata.Tag;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
+import com.drew.metadata.exif.GpsDirectory;
+import com.lowagie.text.pdf.PdfReader;
+import org.jaudiotagger.audio.AudioFile;
+import org.jaudiotagger.audio.AudioFileIO;
+import org.jaudiotagger.audio.AudioHeader;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.awt.image.BufferedImage;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.Instant;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.imageio.ImageIO;
+
+/**
+ * File metadata extractor.
+ * 融合 OS 基础属性与轻量内容元数据提取(不依赖 PDFBox)。
+ */
+public final class FileMetadataUtil {
+
+    private static final Pattern FIRST_NUMBER_PATTERN = Pattern.compile("([-+]?[0-9]*\\.?[0-9]+)");
+
+    private FileMetadataUtil() {
+    }
+
+    public static Map<String, Object> extract(Path path) throws IOException {
+        if (path == null) {
+            throw new IllegalArgumentException("path is null");
+        }
+
+        Path absolutePath = path.toAbsolutePath();
+        Map<String, Object> metadataMap = new LinkedHashMap<>();
+
+        // 1. 提取操作系统层级的基础元数据 (保留了你原有的优秀逻辑)
+        extractOsMetadata(absolutePath, metadataMap);
+
+        // 2. 提取内容层级的轻量元数据(PDF / 图片)
+        extractContentMetadata(absolutePath, metadataMap);
+
+        return metadataMap;
+    }
+
+    public static Map<String, Object> extract(String path) throws IOException {
+        return extract(Paths.get(path));
+    }
+
+    /**
+     * 提取操作系统层面的基础文件属性
+     */
+    private static void extractOsMetadata(Path absolutePath, Map<String, Object> metadata) throws IOException {
+        BasicFileAttributes attrs = Files.readAttributes(absolutePath, BasicFileAttributes.class);
+
+        String fileName = absolutePath.getFileName() == null ? absolutePath.toString() : absolutePath.getFileName().toString();
+        String extension = FileTypeDetector.getExtension(fileName);
+        String mimeType = Files.probeContentType(absolutePath);
+
+        metadata.put("fileName", fileName);
+        metadata.put("extension", extension);
+        metadata.put("absolutePath", absolutePath.toString());
+        metadata.put("sizeBytes", attrs.size());
+        metadata.put("mimeType", mimeType);
+        metadata.put("category", FileTypeDetector.detectCategoryLabel(absolutePath));
+        metadata.put("createdAt", toIsoString(attrs.creationTime().toInstant()));
+        metadata.put("lastModifiedAt", toIsoString(attrs.lastModifiedTime().toInstant()));
+        metadata.put("lastAccessAt", toIsoString(attrs.lastAccessTime().toInstant()));
+        metadata.put("isDirectory", attrs.isDirectory());
+        metadata.put("isRegularFile", attrs.isRegularFile());
+        metadata.put("isReadable", Files.isReadable(absolutePath));
+        metadata.put("isWritable", Files.isWritable(absolutePath));
+    }
+
+    /**
+     * 使用轻量方式提取内容层元数据(避免 PDFBox)
+     */
+    private static void extractContentMetadata(Path absolutePath, Map<String, Object> metadataMap) {
+        if (!Files.isRegularFile(absolutePath) || !Files.isReadable(absolutePath)) {
+            return;
+        }
+
+        String mimeType = (String) metadataMap.get("mimeType");
+        String extension = (String) metadataMap.get("extension");
+
+        if (isPdf(mimeType, extension)) {
+            extractPdfMetadata(absolutePath, metadataMap);
+        } else if (isImage(absolutePath, mimeType)) {
+            extractImageMetadata(absolutePath, metadataMap);
+        } else if (isAudio(absolutePath, mimeType)) {
+            extractAudioMetadata(absolutePath, metadataMap);
+        } else if (isVideo(absolutePath, mimeType)) {
+            extractVideoMetadata(absolutePath, metadataMap, mimeType, extension);
+        }
+    }
+
+    private static boolean isPdf(String mimeType, String extension) {
+        if (mimeType != null && "application/pdf".equalsIgnoreCase(mimeType)) {
+            return true;
+        }
+        return "pdf".equalsIgnoreCase(extension);
+    }
+
+    private static boolean isImage(Path absolutePath, String mimeType) {
+        if (mimeType != null && mimeType.toLowerCase().startsWith("image/")) {
+            return true;
+        }
+        return FileTypeDetector.detectCategory(absolutePath) == FileTypeDetector.Category.IMAGE;
+    }
+
+    private static boolean isAudio(Path absolutePath, String mimeType) {
+        if (mimeType != null && mimeType.toLowerCase().startsWith("audio/")) {
+            return true;
+        }
+        return FileTypeDetector.detectCategory(absolutePath) == FileTypeDetector.Category.AUDIO;
+    }
+
+    private static boolean isVideo(Path absolutePath, String mimeType) {
+        if (mimeType != null && mimeType.toLowerCase().startsWith("video/")) {
+            return true;
+        }
+        return FileTypeDetector.detectCategory(absolutePath) == FileTypeDetector.Category.VIDEO;
+    }
+
+    private static void extractPdfMetadata(Path absolutePath, Map<String, Object> metadataMap) {
+        try (InputStream stream = Files.newInputStream(absolutePath)) {
+            PdfReader reader = null;
+            try {
+                reader = new PdfReader(stream);
+                Map<String, String> info = reader.getInfo();
+                Map<String, Object> pdfInfo = new LinkedHashMap<>();
+                if (info != null) {
+                    pdfInfo.putAll(info);
+                }
+                pdfInfo.put("numberOfPages", reader.getNumberOfPages());
+                metadataMap.put("pdfMetadata", pdfInfo);
+            } finally {
+                if (reader != null) {
+                    reader.close();
+                }
+            }
+        } catch (Exception e) {
+            metadataMap.put("pdfMetadataError", "Parse skipped or failed: " + e.getMessage());
+        }
+    }
+
+    private static void extractImageMetadata(Path absolutePath, Map<String, Object> metadataMap) {
+        try {
+            BufferedImage image = ImageIO.read(absolutePath.toFile());
+            if (image == null) {
+                return;
+            }
+            Map<String, Object> imageInfo = new LinkedHashMap<>();
+            imageInfo.put("width", image.getWidth());
+            imageInfo.put("height", image.getHeight());
+
+            Metadata metadata = ImageMetadataReader.readMetadata(absolutePath.toFile());
+            ExifSubIFDDirectory exifSub = metadata.getFirstDirectoryOfType(ExifSubIFDDirectory.class);
+            ExifIFD0Directory exifIfd0 = metadata.getFirstDirectoryOfType(ExifIFD0Directory.class);
+            GpsDirectory gps = metadata.getFirstDirectoryOfType(GpsDirectory.class);
+
+            Date originalDate = exifSub != null ? exifSub.getDateOriginal() : null;
+            if (originalDate != null) {
+                imageInfo.put("shootingTime", toIsoString(originalDate.toInstant()));
+            }
+
+            if (exifIfd0 != null) {
+                String make = exifIfd0.getString(ExifIFD0Directory.TAG_MAKE);
+                String model = exifIfd0.getString(ExifIFD0Directory.TAG_MODEL);
+                if (make != null) {
+                    imageInfo.put("cameraMake", make);
+                }
+                if (model != null) {
+                    imageInfo.put("cameraModel", model);
+                }
+            }
+
+            if (gps != null) {
+                GeoLocation geoLocation = gps.getGeoLocation();
+                if (geoLocation != null && !geoLocation.isZero()) {
+                    imageInfo.put("gpsLatitude", geoLocation.getLatitude());
+                    imageInfo.put("gpsLongitude", geoLocation.getLongitude());
+                }
+                String altitudeDesc = gps.getString(GpsDirectory.TAG_ALTITUDE);
+                if (altitudeDesc != null) {
+                    Double altitudeValue = parseFirstNumber(altitudeDesc);
+                    if (altitudeValue != null) {
+                        imageInfo.put("altitudeMeters", altitudeValue);
+                    }
+                }
+            }
+
+            metadataMap.put("imageMetadata", imageInfo);
+        } catch (Exception e) {
+            metadataMap.put("imageMetadataError", "Parse skipped or failed: " + e.getMessage());
+        }
+    }
+
+    private static void extractAudioMetadata(Path absolutePath, Map<String, Object> metadataMap) {
+        try {
+            AudioFile audioFile = AudioFileIO.read(absolutePath.toFile());
+            AudioHeader header = audioFile.getAudioHeader();
+            if (header == null) {
+                return;
+            }
+            Map<String, Object> audioInfo = new LinkedHashMap<>();
+            int durationSeconds = header.getTrackLength();
+            audioInfo.put("durationSeconds", durationSeconds);
+            audioInfo.put("durationMillis", durationSeconds * 1000L);
+            audioInfo.put("bitRate", header.getBitRate());
+            audioInfo.put("sampleRate", header.getSampleRate());
+            audioInfo.put("channels", header.getChannels());
+            audioInfo.put("format", header.getFormat());
+            metadataMap.put("audioMetadata", audioInfo);
+        } catch (Exception e) {
+            metadataMap.put("audioMetadataError", "Parse skipped or failed: " + e.getMessage());
+        }
+    }
+
+    private static boolean isMp4Like(String mimeType, String extension) {
+        if (mimeType != null) {
+            String lower = mimeType.toLowerCase();
+            if (lower.equals("video/mp4") || lower.equals("video/quicktime")) {
+                return true;
+            }
+        }
+        return "mp4".equalsIgnoreCase(extension) || "m4v".equalsIgnoreCase(extension) || "mov".equalsIgnoreCase(extension);
+    }
+
+    private static void extractVideoMetadata(Path absolutePath,
+                                             Map<String, Object> metadataMap,
+                                             String mimeType,
+                                             String extension) {
+        Map<String, Object> videoInfo = new LinkedHashMap<>();
+
+        if (isMp4Like(mimeType, extension)) {
+            IsoFile isoFile = null;
+            try {
+                isoFile = new IsoFile(absolutePath.toString());
+                MovieBox movieBox = isoFile.getMovieBox();
+                if (movieBox != null) {
+                    MovieHeaderBox mvhd = movieBox.getMovieHeaderBox();
+                    if (mvhd != null) {
+                        long duration = mvhd.getDuration();
+                        long timescale = mvhd.getTimescale();
+                        if (timescale > 0) {
+                            double seconds = (double) duration / (double) timescale;
+                            long millis = (long) (seconds * 1000.0);
+                            videoInfo.put("durationSeconds", seconds);
+                            videoInfo.put("durationMillis", millis);
+                        }
+                        if (mvhd.getCreationTime() != null) {
+                            videoInfo.put("creationTime", mvhd.getCreationTime().toInstant().toString());
+                        }
+                        if (mvhd.getModificationTime() != null) {
+                            videoInfo.put("modificationTime", mvhd.getModificationTime().toInstant().toString());
+                        }
+                    }
+                }
+            } catch (Exception e) {
+                videoInfo.put("mp4ParseError", e.getMessage());
+            } finally {
+                if (isoFile != null) {
+                    try {
+                        isoFile.close();
+                    } catch (IOException ignore) {
+                        // ignore close failure
+                    }
+                }
+            }
+        }
+
+        // 通用兜底:使用 metadata-extractor 尝试解析拍摄时间/地点/时长
+        try {
+            Metadata metadata = ImageMetadataReader.readMetadata(absolutePath.toFile());
+            Date creationDate = findFirstDate(metadata, "creation", "create date", "media create");
+            if (creationDate != null) {
+                String creationIso = creationDate.toInstant().toString();
+                if (!videoInfo.containsKey("creationTime")) {
+                    videoInfo.put("creationTime", creationIso);
+                }
+                if (!videoInfo.containsKey("shootingTime")) {
+                    videoInfo.put("shootingTime", creationIso);
+                }
+            }
+
+            String location = findFirstTagString(metadata, "location", "gps");
+            if (location != null) {
+                videoInfo.put("location", location);
+                Double[] latLon = parseIso6709(location);
+                if (latLon != null) {
+                    videoInfo.put("gpsLatitude", latLon[0]);
+                    videoInfo.put("gpsLongitude", latLon[1]);
+                }
+            }
+
+            Double durationSeconds = findDurationSeconds(metadata);
+            if (durationSeconds != null && !videoInfo.containsKey("durationSeconds")) {
+                long millis = (long) (durationSeconds * 1000.0);
+                videoInfo.put("durationSeconds", durationSeconds);
+                videoInfo.put("durationMillis", millis);
+            }
+        } catch (Exception e) {
+            videoInfo.put("genericParseError", e.getMessage());
+        }
+
+        if (!videoInfo.isEmpty()) {
+            metadataMap.put("videoMetadata", videoInfo);
+        }
+    }
+
+    private static String toIsoString(Instant instant) {
+        if (instant == null) {
+            return null;
+        }
+        return instant.toString();
+    }
+
+    private static Date findFirstDate(Metadata metadata, String... keywords) {
+        if (metadata == null) {
+            return null;
+        }
+        for (com.drew.metadata.Directory directory : metadata.getDirectories()) {
+            for (Tag tag : directory.getTags()) {
+                String tagName = tag.getTagName();
+                if (tagName == null) {
+                    continue;
+                }
+                String lower = tagName.toLowerCase();
+                for (String key : keywords) {
+                    if (lower.contains(key)) {
+                        Object obj = directory.getObject(tag.getTagType());
+                        if (obj instanceof Date) {
+                            return (Date) obj;
+                        }
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+    private static String findFirstTagString(Metadata metadata, String... keywords) {
+        if (metadata == null) {
+            return null;
+        }
+        for (com.drew.metadata.Directory directory : metadata.getDirectories()) {
+            for (Tag tag : directory.getTags()) {
+                String tagName = tag.getTagName();
+                if (tagName == null) {
+                    continue;
+                }
+                String lower = tagName.toLowerCase();
+                for (String key : keywords) {
+                    if (lower.contains(key)) {
+                        return tag.getDescription();
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+    private static Double findDurationSeconds(Metadata metadata) {
+        if (metadata == null) {
+            return null;
+        }
+        for (com.drew.metadata.Directory directory : metadata.getDirectories()) {
+            for (Tag tag : directory.getTags()) {
+                String tagName = tag.getTagName();
+                if (tagName == null) {
+                    continue;
+                }
+                String lower = tagName.toLowerCase();
+                if (lower.contains("duration")) {
+                    Object obj = directory.getObject(tag.getTagType());
+                    if (obj instanceof Number) {
+                        return ((Number) obj).doubleValue();
+                    }
+                    String desc = tag.getDescription();
+                    Double parsed = parseFirstNumber(desc);
+                    if (parsed != null) {
+                        return parsed;
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+    private static Double parseFirstNumber(String text) {
+        if (text == null) {
+            return null;
+        }
+        Matcher matcher = FIRST_NUMBER_PATTERN.matcher(text);
+        if (!matcher.find()) {
+            return null;
+        }
+        try {
+            return Double.parseDouble(matcher.group(1));
+        } catch (NumberFormatException e) {
+            return null;
+        }
+    }
+
+    private static Double[] parseIso6709(String location) {
+        if (location == null) {
+            return null;
+        }
+        String value = location.trim();
+        if (value.isEmpty()) {
+            return null;
+        }
+        int split = -1;
+        for (int i = 1; i < value.length(); i++) {
+            char c = value.charAt(i);
+            if (c == '+' || c == '-') {
+                split = i;
+                break;
+            }
+        }
+        if (split <= 0) {
+            return null;
+        }
+        String latStr = value.substring(0, split);
+        String lonStr = value.substring(split);
+        latStr = latStr.replace("/", "");
+        lonStr = lonStr.replace("/", "");
+        try {
+            Double lat = Double.parseDouble(latStr);
+            Double lon = Double.parseDouble(lonStr);
+            return new Double[]{lat, lon};
+        } catch (NumberFormatException e) {
+            return null;
+        }
+    }
+}

+ 161 - 0
schedule-producer/src/main/java/cn/com/yusys/producer/util/FileTypeDetector.java

@@ -0,0 +1,161 @@
+package cn.com.yusys.producer.util;
+
+import lombok.Getter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.*;
+
+public final class FileTypeDetector {
+
+    @Getter
+    public enum Category {
+        DOCUMENT("文档类"),
+        IMAGE("图片类"),
+        AUDIO("音频类"),
+        VIDEO("视频类"),
+        OTHER("其他");
+
+        private final String label;
+
+        Category(String label) {
+            this.label = label;
+        }
+
+    }
+
+    private static final Map<String, Category> EXTENSION_MAP = new HashMap<>();
+
+    static {
+
+        register(Category.IMAGE,
+                "jpg", "jpeg", "png", "gif", "bmp", "webp", "tiff", "tif", "svg", "heic", "heif");
+
+        register(Category.AUDIO,
+                "mp3", "wav", "m4a", "flac", "aac", "ogg", "opus", "wma", "amr");
+
+        register(Category.VIDEO,
+                "mp4", "mkv", "avi", "mov", "wmv", "flv", "webm", "m4v", "3gp", "mpg", "mpeg");
+
+        register(Category.DOCUMENT,
+                "pdf", "txt", "md", "rtf", "doc", "docx", "ppt", "pptx",
+                "xls", "xlsx", "csv", "json", "xml", "yaml", "yml",
+                "html", "htm", "log", "ini", "properties");
+    }
+
+    private static void register(Category category, String... extensions) {
+        for (String ext : extensions) {
+            EXTENSION_MAP.put(ext, category);
+        }
+    }
+
+    private FileTypeDetector() {
+    }
+
+    public static Category detectCategory(Path path) {
+
+        if (path == null) {
+            throw new IllegalArgumentException("path is null");
+        }
+
+        // 1 mime检测
+        Category mimeCategory = detectByMime(path);
+        if (mimeCategory != null) {
+            return mimeCategory;
+        }
+
+        // 2 扩展名检测
+        String ext = getExtension(path.getFileName() == null ? "" : path.getFileName().toString());
+        Category category = EXTENSION_MAP.get(ext);
+
+        if (category != null) {
+            return category;
+        }
+
+        return Category.OTHER;
+    }
+
+    public static Category detectCategory(String path) {
+        return detectCategory(Paths.get(path));
+    }
+
+    public static String detectCategoryLabel(Path path) {
+        return detectCategory(path).getLabel();
+    }
+
+    public static String detectCategoryLabel(String path) {
+        return detectCategory(path).getLabel();
+    }
+
+    private static Category detectByMime(Path path) {
+
+        try {
+
+            String mime = Files.probeContentType(path);
+            if (mime == null) {
+                return null;
+            }
+
+            mime = mime.toLowerCase(Locale.ROOT);
+
+            if (mime.startsWith("image/")) {
+                return Category.IMAGE;
+            }
+
+            if (mime.startsWith("audio/")) {
+                return Category.AUDIO;
+            }
+
+            if (mime.startsWith("video/")) {
+                return Category.VIDEO;
+            }
+
+            if (mime.startsWith("text/") || mime.contains("pdf") || mime.contains("word")) {
+                if (mime.contains("pdf") && isScannedPdf(path)) {
+                    return Category.IMAGE;
+                }
+                return Category.DOCUMENT;
+            }
+
+        } catch (IOException ignored) {
+        } catch (Exception e) {
+        }
+
+        return null;
+    }
+
+    public static String getExtension(String filename) {
+
+        if (filename == null || filename.isEmpty()) {
+            return "";
+        }
+
+        int idx = filename.lastIndexOf('.');
+
+        if (idx < 0 || idx == filename.length() - 1) {
+            return "";
+        }
+
+        return filename.substring(idx + 1).toLowerCase(Locale.ROOT);
+    }
+
+    private static boolean isScannedPdf(Path path) throws IOException {
+        // 读取文件前 1MB 即可,扫描件的特征在头部和尾部很明显
+        byte[] data = new byte[(int) Math.min(Files.size(path), 1024 * 1024)];
+        try (InputStream is = Files.newInputStream(path)) {
+            is.read(data);
+        }
+        String s = new String(data, StandardCharsets.US_ASCII);
+
+        // 逻辑:如果包含字体定义,通常不是扫描件
+        boolean hasFont = s.contains("/Font");
+        // 如果包含大量图片定义
+        boolean hasImage = s.contains("/Image");
+
+        // 扫描件特征:有图无字
+        return hasImage && !hasFont;
+    }
+}

+ 111 - 0
schedule-producer/src/test/java/util/FileTypeDetectorTest.java

@@ -0,0 +1,111 @@
+package util;
+
+import cn.com.yusys.producer.util.FileMetadataUtil;
+import cn.com.yusys.producer.util.FileTypeDetector;
+import org.junit.Test;
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ * 遍历指定目录下所有文件并检测文件类型
+ */
+public class FileTypeDetectorTest {
+
+    // 指定需要处理的根目录路径
+    private static final String TARGET_DIR_PATH = "/Users/chaizi/Work/Projects/yusys/ai-study/code/trea-demo01/duomotai/examples/";
+
+    @Test
+    public void testDetectAllFiles() {
+        // 创建目录文件对象
+        File targetDir = new File(TARGET_DIR_PATH);
+
+        // 1. 校验目录是否存在且是有效目录
+        if (!targetDir.exists()) {
+            System.err.println("错误:指定的目录不存在 -> " + TARGET_DIR_PATH);
+            return;
+        }
+        if (!targetDir.isDirectory()) {
+            System.err.println("错误:指定的路径不是目录 -> " + TARGET_DIR_PATH);
+            return;
+        }
+
+        // 2. 获取目录下所有文件(不含子目录,如需递归处理子目录可看扩展方案)
+        File[] files = targetDir.listFiles();
+        if (files == null || files.length == 0) {
+            System.out.println("提示:指定目录下没有文件 -> " + TARGET_DIR_PATH);
+            return;
+        }
+
+        // 3. 遍历所有文件并调用检测方法
+        for (File file : files) {
+            // 只处理文件,跳过子目录和以.开头的隐藏文件
+            if (file.isFile() && !file.getName().startsWith(".")) {
+                String filePath = file.getAbsolutePath();
+                try {
+                    System.out.println("开始检测文件:" + filePath);
+                    // 调用文件类型检测方法
+                    String categoryLabel = FileTypeDetector.detectCategoryLabel(filePath);
+                    Map<String, Object> extract = FileMetadataUtil.extract(filePath);
+                    extract.forEach((key, value) -> System.out.println(key + ": " + value));
+                    System.out.println("文件 [" + file.getName() + "] 检测结果:" + categoryLabel);
+                    System.out.println("----------------------------------------");
+                } catch (Exception e) {
+                    // 捕获单个文件处理异常,不影响其他文件
+                    System.err.println("处理文件失败 [" + filePath + "]:" + e.getMessage());
+                    e.printStackTrace();
+                }
+            }
+        }
+
+        System.out.println("所有文件检测完成!");
+    }
+
+    // 扩展:递归处理目录下所有文件(包括子目录)
+    @Test
+    public void testDetectAllFilesRecursively() {
+        File targetDir = new File(TARGET_DIR_PATH);
+        if (!targetDir.exists() || !targetDir.isDirectory()) {
+            System.err.println("错误:目录不存在或不是有效目录 -> " + TARGET_DIR_PATH);
+            return;
+        }
+
+        // 递归遍历目录
+        traverseDirectory(targetDir);
+        System.out.println("递归检测所有文件完成!");
+    }
+
+    /**
+     * 递归遍历目录并处理所有文件
+     * @param dir 目标目录
+     */
+    private void traverseDirectory(File dir) {
+        File[] files = dir.listFiles();
+        if (files == null) {
+            return;
+        }
+
+        for (File file : files) {
+            if (file.isDirectory()) {
+                // 跳过以.开头的隐藏目录
+                if (!file.getName().startsWith(".")) {
+                    // 递归处理子目录
+                    traverseDirectory(file);
+                }
+            } else {
+                // 处理文件,跳过以.开头的隐藏文件
+                if (!file.getName().startsWith(".")) {
+                    String filePath = file.getAbsolutePath();
+                    try {
+                        System.out.println("开始检测文件:" + filePath);
+                        String categoryLabel = FileTypeDetector.detectCategoryLabel(filePath);
+                        System.out.println("文件 [" + file.getName() + "] 检测结果:" + categoryLabel);
+                        System.out.println("----------------------------------------");
+                    } catch (Exception e) {
+                        System.err.println("处理文件失败 [" + filePath + "]:" + e.getMessage());
+                    }
+                }
+            }
+        }
+    }
+}