Parcourir la source

fix: match multiple captions

icecraft il y a 8 mois
Parent
commit
15cd97ff17
1 fichiers modifiés avec 36 ajouts et 0 suppressions
  1. 36 0
      magic_pdf/model/magic_model.py

+ 36 - 0
magic_pdf/model/magic_model.py

@@ -553,6 +553,42 @@ class MagicModel:
                 }
             )
 
+        for i in range(len(objects)):
+            j = i + OBJ_IDX_OFFSET
+            if j in seen_idx:
+                continue
+            seen_idx.add(j)
+            nearest_dis, nearest_sub_idx = float('inf'), -1
+            for k in range(len(subjects)):
+                dis = bbox_distance(objects[i]['bbox'], subjects[k]['bbox'])
+                if dis < nearest_dis:
+                    nearest_dis = dis
+                    nearest_sub_idx = k
+
+            for k in range(len(subjects)):
+                if k != nearest_sub_idx: continue
+                if k in seen_sub_idx:
+                    for kk in range(len(ret)):
+                        if ret[kk]['sub_idx'] == k:
+                            ret[kk]['obj_bboxes'].append({'score': objects[i]['score'], 'bbox': objects[i]['bbox']})
+                            break
+                else:
+                    ret.append(
+                        {
+                            'sub_bbox': {
+                                'bbox': subjects[k]['bbox'],
+                                'score': subjects[k]['score'],
+                            },
+                            'obj_bboxes': [
+                                {'score': objects[i]['score'], 'bbox': objects[i]['bbox']}
+                            ],
+                            'sub_idx': k,
+                        }
+                    )
+                seen_sub_idx.add(k)
+                seen_idx.add(k)
+
+
         for i in range(len(subjects)):
             if i in seen_sub_idx:
                 continue