
def render_pdf(d, path):
"""Draw a realistic 3-page report. Page breaks are forced so the headline metric on
page 1 (abstract) is physically separated from the results table on page 3."""
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.platypus import (SimpleDocTemplate, Paragraph, Spacer,
Table, TableStyle, PageBreak)
ss = getSampleStyleSheet()
H1 = ParagraphStyle("H1", parent=ss["Title"], fontSize=16, leading=20, spaceAfter=6)
AUTH = ParagraphStyle("AUTH", parent=ss["Normal"], fontSize=9.5, textColor=colors.grey, spaceAfter=10)
H2 = ParagraphStyle("H2", parent=ss["Heading2"], fontSize=12, spaceBefore=8, spaceAfter=4)
BODY = ParagraphStyle("BODY", parent=ss["Normal"], fontSize=10, leading=14, spaceAfter=6)
sota_phrase = (f"surpassing the previous best of {d['prior_best']}"
if d["beats_sota"] else
f"approaching but not exceeding the previous best of {d['prior_best']}")
authors_line = ", ".join(f"{n} ({a})" for (n, a) in d["authors"])
story = []
story += [Paragraph(d["title"], H1), Paragraph(authors_line, AUTH), Paragraph("Abstract", H2)]
story += [Paragraph(
f"We introduce {d['method']}, a model for {d['task']}. On the {d['primary_benchmark']} "
f"benchmark, {d['method']} attains {d['test_acc']} {d['metric_name']} on the held-out "
f"test set, {sota_phrase}. Our {d['params_m']}M-parameter model is evaluated across "
f"{len(d['datasets'])} datasets ({', '.join(d['datasets'])}). "
f"Extensive ablations confirm the contribution of each component.", BODY)]
story += [Paragraph("Keywords", H2),
Paragraph(f"{d['task']}; representation learning; {d['primary_benchmark']}", BODY),
PageBreak()]
story += [Paragraph("1 Method and Training Details", H2)]
story += [Paragraph(
f"{d['method']} is trained end-to-end with the {d['optimizer']} optimizer. "
f"We tune on a validation split and report final numbers on the test split. "
f"The full training configuration is summarized in Table 1.", BODY)]
hp = [["Hyperparameter", "Value"],
["Optimizer", d["optimizer"]],
["Learning rate", str(d["lr"])],
["Batch size", str(d["batch"])],
["Epochs", str(d["epochs"])],
["Parameters", f"{d['params_m']}M"]]
t1 = Table(hp, colWidths=[2.4 * inch, 2.0 * inch])
t1.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#2b3a67")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTSIZE", (0, 0), (-1, -1), 9.5),
("GRID", (0, 0), (-1, -1), 0.4, colors.grey),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#eef1f8")]),
("LEFTPADDING", (0, 0), (-1, -1), 8), ("TOPPADDING", (0, 0), (-1, -1), 4),
("BOTTOMPADDING", (0, 0), (-1, -1), 4)]))
story += [Spacer(1, 4), t1, Spacer(1, 6),
Paragraph("Table 1. Training configuration.", BODY),
Paragraph("2 Datasets", H2),
Paragraph(
f"We evaluate on {', '.join(d['datasets'])}. {d['primary_benchmark']} is our "
f"primary benchmark; the remaining datasets are used for generalization "
f"studies.", BODY),
PageBreak()]
story += [Paragraph("3 Results", H2)]
res = [["Method", f"Val. {d['metric_name']}", f"Test {d['metric_name']}"],
[f"{d['baseline_name']} (baseline)", str(d["baseline_val"]), str(d["baseline_test"])],
[f"{d['method']} (ours)", str(d["val_acc"]), str(d["test_acc"])]]
t2 = Table(res, colWidths=[2.6 * inch, 1.7 * inch, 1.7 * inch])
t2.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#7a2e2e")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTSIZE", (0, 0), (-1, -1), 9.5),
("GRID", (0, 0), (-1, -1), 0.4, colors.grey),
("FONTNAME", (0, 2), (-1, 2), "Helvetica-Bold"),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f7eeee")]),
("LEFTPADDING", (0, 0), (-1, -1), 8), ("TOPPADDING", (0, 0), (-1, -1), 4),
("BOTTOMPADDING", (0, 0), (-1, -1), 4)]))
story += [Spacer(1, 4), t2, Spacer(1, 6),
Paragraph(f"Table 2. Results on {d['primary_benchmark']}. "
f"Best test result in bold.", BODY),
Paragraph("4 Limitations", H2)]
for lim in d["limitations"]:
story += [Paragraph("• " + lim, BODY)]
story += [Paragraph("5 Funding and Code Availability", H2),
Paragraph(d["funding_note"], BODY)]
SimpleDocTemplate(path, pagesize=LETTER,
topMargin=0.8 * inch, bottomMargin=0.8 * inch,
leftMargin=0.9 * inch, rightMargin=0.9 * inch).build(story)
print("STEP 3/7 · Generating synthetic report PDFs…")
CORPUS = []
for i, d in enumerate(DOCS):
path = f"/content/report_{i}.pdf" if os.path.isdir("/content") else f"report_{i}.pdf"
render_pdf(d, path)
CORPUS.append((d, ground_truth(d), path))
print(f" ✓ {os.path.basename(path)} — {d['method']}")
print()
if SHOW_FIRST_PAGE:
try:
import pypdfium2 as pdfium, matplotlib.pyplot as plt
pg = pdfium.PdfDocument(CORPUS[0][2])[0]
img = pg.render(scale=2.0).to_pil()
plt.figure(figsize=(6.4, 8.3)); plt.imshow(img); plt.axis("off")
plt.title("What lift reads — page 1 of report_0.pdf", fontsize=10); plt.show()
except Exception as e:
print(" (page preview skipped:", e, ")\n")





