浏览代码

Merge pull request #203 from SunAhong1993/syf_docs

add dataset path check
Jason 5 年之前
父节点
当前提交
df84bc6b78

+ 2 - 1
paddlex/cv/datasets/dataset.py

@@ -46,7 +46,7 @@ def is_valid(sample):
                 return False
             elif isinstance(s, np.ndarray) and s.size == 0:
                 return False
-            elif isinstance(s, collections.Sequence) and len(s) == 0:
+            elif isinstance(s, collections.abc.Sequence) and len(s) == 0:
                 return False
     return True
 
@@ -55,6 +55,7 @@ def get_encoding(path):
     f = open(path, 'rb')
     data = f.read()
     file_encoding = chardet.detect(data).get('encoding')
+    f.close()
     return file_encoding
 
 

+ 3 - 0
paddlex/cv/datasets/easydata_cls.py

@@ -18,6 +18,7 @@ import random
 import copy
 import json
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .imagenet import ImageNet
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
             for line in f:
                 img_file, json_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                json_file = path_normalization(json_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(json_file):

+ 3 - 0
paddlex/cv/datasets/easydata_det.py

@@ -20,6 +20,7 @@ import json
 import cv2
 import numpy as np
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .voc import VOCDetection
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
             for line in f:
                 img_file, json_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                json_file = path_normalization(json_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(json_file):

+ 3 - 0
paddlex/cv/datasets/easydata_seg.py

@@ -20,6 +20,7 @@ import json
 import cv2
 import numpy as np
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import get_encoding
 from .dataset import is_pic
@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
             for line in f:
                 img_file, json_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                json_file = path_normalization(json_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(json_file):

+ 2 - 0
paddlex/cv/datasets/imagenet.py

@@ -17,6 +17,7 @@ import os.path as osp
 import random
 import copy
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -66,6 +67,7 @@ class ImageNet(Dataset):
         with open(file_list, encoding=get_encoding(file_list)) as f:
             for line in f:
                 items = line.strip().split()
+                items[0] = path_normalization(items[0])
                 if not is_pic(items[0]):
                     continue
                 full_path = osp.join(data_dir, items[0])

+ 3 - 1
paddlex/cv/datasets/seg_dataset.py

@@ -17,6 +17,7 @@ import os.path as osp
 import random
 import copy
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import get_encoding
 from .dataset import is_pic
@@ -61,10 +62,11 @@ class SegDataset(Dataset):
                 for line in f:
                     item = line.strip()
                     self.labels.append(item)
-
         with open(file_list, encoding=get_encoding(file_list)) as f:
             for line in f:
                 items = line.strip().split()
+                items[0] = path_normalization(items[0])
+                items[1] = path_normalization(items[1])
                 if not is_pic(items[0]):
                     continue
                 full_path_im = osp.join(data_dir, items[0])

+ 8 - 2
paddlex/cv/datasets/voc.py

@@ -22,6 +22,7 @@ import numpy as np
 from collections import OrderedDict
 import xml.etree.ElementTree as ET
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
                     break
                 img_file, xml_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                xml_file = path_normalization(xml_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(xml_file):
@@ -106,8 +109,11 @@ class VOCDetection(Dataset):
                     ct = int(tree.find('id').text)
                     im_id = np.array([int(tree.find('id').text)])
                 pattern = re.compile('<object>', re.IGNORECASE)
-                obj_tag = pattern.findall(
-                    str(ET.tostringlist(tree.getroot())))[0][1:-1]
+                obj_match = pattern.findall(
+                    str(ET.tostringlist(tree.getroot())))
+                if len(obj_match) == 0:
+                    continue
+                obj_tag = obj_match[0][1:-1]
                 objs = tree.findall(obj_tag)
                 pattern = re.compile('<size>', re.IGNORECASE)
                 size_tag = pattern.findall(

+ 4 - 0
paddlex/tools/x2coco.py

@@ -22,6 +22,7 @@ import shutil
 import numpy as np
 import PIL.ImageDraw
 from .base import MyEncoder, is_pic, get_encoding
+from paddlex.utils import path_normalization
         
         
 class X2COCO(object):
@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
         image["height"] = json_info["imageHeight"]
         image["width"] = json_info["imageWidth"]
         image["id"] = image_id + 1
+        json_info["imagePath"] = path_normalization(json_info["imagePath"])
         image["file_name"] = osp.split(json_info["imagePath"])[-1]
         return image
     
@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
         image["height"] = img.shape[0]
         image["width"] = img.shape[1]
         image["id"] = image_id + 1
+        img_path = path_normalization(img_path)
         image["file_name"] = osp.split(img_path)[-1]
         return image
     
@@ -268,6 +271,7 @@ class JingLing2COCO(X2COCO):
         image["height"] = json_info["size"]["height"]
         image["width"] = json_info["size"]["width"]
         image["id"] = image_id + 1
+        json_info["path"] = path_normalization(json_info["path"])
         image["file_name"] = osp.split(json_info["path"])[-1]
         return image
     

+ 1 - 0
paddlex/utils/__init__.py

@@ -17,6 +17,7 @@ from . import logging
 from . import utils
 from . import save
 from .utils import seconds_to_hms
+from .utils import path_normalization
 from .download import download
 from .download import decompress
 from .download import download_and_decompress

+ 14 - 5
paddlex/utils/utils.py

@@ -20,6 +20,7 @@ import numpy as np
 import six
 import yaml
 import math
+import platform
 from . import logging
 
 
@@ -49,18 +50,26 @@ def get_environ_info():
                 info['num'] = fluid.core.get_cuda_device_count()
     return info
 
+def path_normalization(path):
+    win_sep = "\\"
+    other_sep = "/"
+    if platform.system() == "Windows":
+        path = win_sep.join(path.split(other_sep))
+    else:
+        path = other_sep.join(path.split(win_sep))
+    return path
 
 def parse_param_file(param_file, return_shape=True):
     from paddle.fluid.proto.framework_pb2 import VarType
     f = open(param_file, 'rb')
-    version = np.fromstring(f.read(4), dtype='int32')
-    lod_level = np.fromstring(f.read(8), dtype='int64')
+    version = np.frombuffer(f.read(4), dtype='int32')
+    lod_level = np.frombuffer(f.read(8), dtype='int64')
     for i in range(int(lod_level)):
-        _size = np.fromstring(f.read(8), dtype='int64')
+        _size = np.frombuffer(f.read(8), dtype='int64')
         _ = f.read(_size)
-    version = np.fromstring(f.read(4), dtype='int32')
+    version = np.frombuffer(f.read(4), dtype='int32')
     tensor_desc = VarType.TensorDesc()
-    tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
+    tensor_desc_size = np.frombuffer(f.read(4), dtype='int32')
     tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
     tensor_shape = tuple(tensor_desc.dims)
     if return_shape: