agent-zero/plugins/_office/helpers/pptx_writer.py
Alessandro 3466160e4c Harden Office canvas sync and PPTX output
Sync document_artifact results into an already-open Office canvas without auto-opening a closed canvas.

Generate PPTX artifacts through the Office plugin writer so PowerPoint decks open in Impress with visible multi-slide content.

Add focused regression coverage for canvas sync behavior and PPTX slide creation.
2026-05-02 15:28:43 +02:00

225 lines
8.8 KiB
Python

from __future__ import annotations
import io
import json
import re
import zipfile
from typing import Any
from xml.sax.saxutils import escape
A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main"
P_NS = "http://schemas.openxmlformats.org/presentationml/2006/main"
R_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
CT_NS = "http://schemas.openxmlformats.org/package/2006/content-types"
REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
def pptx_from_text(title: str, content: str) -> bytes:
return pptx_from_slides(slides_from_text(title, content))
def slides_from_text(title: str, content: str) -> list[dict[str, Any]]:
normalized = normalize_slides(content)
document_title = str(title or "Presentation").strip() or "Presentation"
if not normalized:
return [{"title": document_title, "bullets": []}]
text = str(content or "")
if len(normalized) == 1 and "---" not in text:
slide = normalized[0]
slide_title = str(slide.get("title") or "").strip()
if slide_title and slide_title.casefold() != document_title.casefold():
return [{"title": document_title, "bullets": [slide_title, *slide.get("bullets", [])]}]
return normalized
def normalize_slides(value: Any) -> list[dict[str, Any]]:
if value is None:
return []
if isinstance(value, str):
stripped = value.strip()
if not stripped:
return []
if stripped.startswith("[") or stripped.startswith("{"):
return normalize_slides(json.loads(stripped))
chunks = re.split(r"(?m)^\s*---+\s*$", stripped)
result = []
for chunk in chunks:
lines = [_clean_slide_line(line) for line in chunk.splitlines() if line.strip()]
lines = [line for line in lines if line]
if lines:
result.append({"title": lines[0], "bullets": lines[1:]})
return result
if isinstance(value, dict):
return [_slide_from_mapping(value)]
if isinstance(value, list):
result = []
for item in value:
if isinstance(item, dict):
result.append(_slide_from_mapping(item))
elif isinstance(item, str):
result.extend(normalize_slides(item))
elif isinstance(item, (list, tuple)):
lines = [_clean_slide_line(part) for part in item if str(part).strip()]
if lines:
result.append({"title": lines[0], "bullets": lines[1:]})
else:
result.append({"title": str(item), "bullets": []})
return result
return [{"title": str(value), "bullets": []}]
def pptx_from_slides(slides: list[dict[str, Any]]) -> bytes:
normalized = normalize_slides(slides)
if not normalized:
normalized = [{"title": "Presentation", "bullets": []}]
try:
return _pptx_from_slides_with_python_pptx(normalized)
except Exception:
return _pptx_from_slides_ooxml(normalized)
def _slide_from_mapping(value: dict[str, Any]) -> dict[str, Any]:
title = _clean_slide_line(value.get("title") or value.get("heading") or "Slide")
bullets = value.get("bullets")
if bullets is None:
body = value.get("body") or value.get("content") or ""
bullets = [_clean_slide_line(line) for line in str(body).splitlines() if line.strip()]
elif isinstance(bullets, str):
bullets = [_clean_slide_line(line) for line in bullets.splitlines() if line.strip()]
else:
bullets = [_clean_slide_line(item) for item in bullets]
return {"title": title or "Slide", "bullets": [bullet for bullet in bullets if bullet]}
def _clean_slide_line(value: Any) -> str:
line = str(value or "").strip()
line = re.sub(r"^\s{0,3}#{1,6}\s+", "", line)
line = re.sub(r"^\s*(?:[-*•]|\d+[.)])\s+", "", line)
return line.strip()
def _pptx_from_slides_with_python_pptx(slides: list[dict[str, Any]]) -> bytes:
from pptx import Presentation # type: ignore
from pptx.util import Inches # type: ignore
presentation = Presentation()
for slide_spec in slides:
layout = presentation.slide_layouts[1] if len(presentation.slide_layouts) > 1 else presentation.slide_layouts[0]
slide = presentation.slides.add_slide(layout)
title = str(slide_spec.get("title") or "Slide")
bullets = [str(item) for item in slide_spec.get("bullets") or []]
if slide.shapes.title:
slide.shapes.title.text = title
else:
title_box = slide.shapes.add_textbox(Inches(0.6), Inches(0.35), Inches(8.8), Inches(0.8))
title_box.text_frame.text = title
body_shape = slide.placeholders[1] if len(slide.placeholders) > 1 else None
if body_shape is None:
body_shape = slide.shapes.add_textbox(Inches(0.85), Inches(1.45), Inches(8.35), Inches(4.55))
text_frame = body_shape.text_frame
text_frame.clear()
if not bullets:
text_frame.text = ""
continue
for index, bullet in enumerate(bullets):
paragraph = text_frame.paragraphs[0] if index == 0 else text_frame.add_paragraph()
paragraph.text = bullet
paragraph.level = 0
buffer = io.BytesIO()
presentation.save(buffer)
return buffer.getvalue()
def _pptx_from_slides_ooxml(slides: list[dict[str, Any]]) -> bytes:
files: dict[str, str | bytes] = {
"[Content_Types].xml": _pptx_content_types(len(slides)),
"_rels/.rels": (
'<?xml version="1.0" encoding="UTF-8"?>'
f'<Relationships xmlns="{REL_NS}">'
'<Relationship Id="rId1" '
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
'Target="ppt/presentation.xml"/>'
"</Relationships>"
),
"ppt/_rels/presentation.xml.rels": _pptx_presentation_rels(len(slides)),
"ppt/presentation.xml": _pptx_presentation_xml(len(slides)),
}
for index, slide in enumerate(slides, start=1):
files[f"ppt/slides/slide{index}.xml"] = _pptx_slide_xml(slide)
return _zip_map(files)
def _pptx_content_types(count: int) -> str:
overrides = [
'<Override PartName="/ppt/presentation.xml" '
'ContentType="application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"/>'
]
for index in range(1, count + 1):
overrides.append(
f'<Override PartName="/ppt/slides/slide{index}.xml" '
'ContentType="application/vnd.openxmlformats-officedocument.presentationml.slide+xml"/>'
)
return (
'<?xml version="1.0" encoding="UTF-8"?>'
f'<Types xmlns="{CT_NS}">'
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
'<Default Extension="xml" ContentType="application/xml"/>'
+ "".join(overrides)
+ "</Types>"
)
def _pptx_presentation_rels(count: int) -> str:
rels = []
for index in range(1, count + 1):
rels.append(
f'<Relationship Id="rId{index}" '
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" '
f'Target="slides/slide{index}.xml"/>'
)
return '<?xml version="1.0" encoding="UTF-8"?>' + f'<Relationships xmlns="{REL_NS}">' + "".join(rels) + "</Relationships>"
def _pptx_presentation_xml(count: int) -> str:
slide_ids = "".join(f'<p:sldId id="{255 + index}" r:id="rId{index}"/>' for index in range(1, count + 1))
return (
'<?xml version="1.0" encoding="UTF-8"?>'
f'<p:presentation xmlns:p="{P_NS}" xmlns:r="{R_NS}">'
f"<p:sldIdLst>{slide_ids}</p:sldIdLst>"
'<p:sldSz cx="9144000" cy="5143500"/>'
"</p:presentation>"
)
def _pptx_slide_xml(slide: dict[str, Any]) -> str:
title = str(slide.get("title") or "Slide")
bullets = [str(item) for item in slide.get("bullets") or []]
paragraphs = [title, *bullets]
text = "".join(f"<a:p><a:r><a:t>{escape(item)}</a:t></a:r></a:p>" for item in paragraphs)
return (
'<?xml version="1.0" encoding="UTF-8"?>'
f'<p:sld xmlns:a="{A_NS}" xmlns:p="{P_NS}">'
"<p:cSld><p:spTree>"
'<p:nvGrpSpPr><p:cNvPr id="1" name=""/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr>'
"<p:grpSpPr/>"
'<p:sp><p:nvSpPr><p:cNvPr id="2" name="Content"/><p:cNvSpPr/><p:nvPr/></p:nvSpPr>'
f"<p:txBody><a:bodyPr/><a:lstStyle/>{text}</p:txBody>"
"</p:sp>"
"</p:spTree></p:cSld>"
"</p:sld>"
)
def _zip_map(files_map: dict[str, str | bytes]) -> bytes:
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w", compression=zipfile.ZIP_DEFLATED) as archive:
for name, value in files_map.items():
archive.writestr(name, value.encode("utf-8") if isinstance(value, str) else value)
return buffer.getvalue()