mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-04-29 12:10:24 +00:00
Some checks are pending
CodeQL Advanced / Analyze (actions) (push) Waiting to run
CodeQL Advanced / Analyze (javascript-typescript) (push) Waiting to run
CodeQL Advanced / Analyze (python) (push) Waiting to run
Pre-commit / pre-commit (push) Waiting to run
Test / Run Python Tests (push) Waiting to run
Co-authored-by: Douglas <douglas.ym.lai@gmail.com>
79 lines
3.4 KiB
Python
79 lines
3.4 KiB
Python
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
|
|
from dataclasses import dataclass
|
|
import json
|
|
import sys
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
class RectAndField:
|
|
rect: list[float]
|
|
rect_type: str
|
|
field: dict
|
|
|
|
|
|
def get_bounding_box_messages(fields_json_stream) -> list[str]:
|
|
messages = []
|
|
fields = json.load(fields_json_stream)
|
|
messages.append(f"Read {len(fields['form_fields'])} fields")
|
|
|
|
def rects_intersect(r1, r2):
|
|
disjoint_horizontal = r1[0] >= r2[2] or r1[2] <= r2[0]
|
|
disjoint_vertical = r1[1] >= r2[3] or r1[3] <= r2[1]
|
|
return not (disjoint_horizontal or disjoint_vertical)
|
|
|
|
rects_and_fields = []
|
|
for f in fields["form_fields"]:
|
|
rects_and_fields.append(RectAndField(f["label_bounding_box"], "label", f))
|
|
rects_and_fields.append(RectAndField(f["entry_bounding_box"], "entry", f))
|
|
|
|
has_error = False
|
|
for i, ri in enumerate(rects_and_fields):
|
|
for j in range(i + 1, len(rects_and_fields)):
|
|
rj = rects_and_fields[j]
|
|
if ri.field["page_number"] == rj.field["page_number"] and rects_intersect(ri.rect, rj.rect):
|
|
has_error = True
|
|
if ri.field is rj.field:
|
|
messages.append(f"FAILURE: intersection between label and entry bounding boxes for `{ri.field['description']}` ({ri.rect}, {rj.rect})")
|
|
else:
|
|
messages.append(f"FAILURE: intersection between {ri.rect_type} bounding box for `{ri.field['description']}` ({ri.rect}) and {rj.rect_type} bounding box for `{rj.field['description']}` ({rj.rect})")
|
|
if len(messages) >= 20:
|
|
messages.append("Aborting further checks; fix bounding boxes and try again")
|
|
return messages
|
|
if ri.rect_type == "entry":
|
|
if "entry_text" in ri.field:
|
|
font_size = ri.field["entry_text"].get("font_size", 14)
|
|
entry_height = ri.rect[3] - ri.rect[1]
|
|
if entry_height < font_size:
|
|
has_error = True
|
|
messages.append(f"FAILURE: entry bounding box height ({entry_height}) for `{ri.field['description']}` is too short for the text content (font size: {font_size}). Increase the box height or decrease the font size.")
|
|
if len(messages) >= 20:
|
|
messages.append("Aborting further checks; fix bounding boxes and try again")
|
|
return messages
|
|
|
|
if not has_error:
|
|
messages.append("SUCCESS: All bounding boxes are valid")
|
|
return messages
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
print("Usage: check_bounding_boxes.py [fields.json]")
|
|
sys.exit(1)
|
|
with open(sys.argv[1]) as f:
|
|
messages = get_bounding_box_messages(f)
|
|
for msg in messages:
|
|
print(msg)
|