fix(13): stamp page numbers on rendered images, fix signature block pattern in prompt

- Red PAGE N label stamped on each image so GPT-4o correctly attributes fields to pages
- Prompt: add 'blank line above label' signature block pattern (common in real estate docs)
- Prompt: explicit rule — place field on blank underline, not on the (Label) text below it

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Chandler Copeland
2026-03-21 17:52:31 -06:00
parent 8ac5acb486
commit c80133ea58
2 changed files with 16 additions and 7 deletions

View File

@@ -42,6 +42,16 @@ export async function extractPdfText(filePath: string): Promise<PageImage[]> {
// @ts-ignore — @napi-rs/canvas context is compatible at runtime but types diverge
await page.render({ canvasContext: ctx, viewport }).promise;
// Stamp a visible page number in the top-left corner so GPT-4o can correlate
// each image to the correct page number when multiple images are sent in one prompt.
const label = `PAGE ${pageNum}`;
const fontSize = Math.round(viewport.height * 0.025);
ctx.fillStyle = 'rgba(220,30,30,0.85)';
ctx.fillRect(0, 0, fontSize * (label.length * 0.65), fontSize * 1.5);
ctx.fillStyle = '#ffffff';
ctx.font = `bold ${fontSize}px sans-serif`;
ctx.fillText(label, 4, fontSize * 1.15);
const jpegBuffer = canvas.toBuffer('image/jpeg' as never, 85);
pages.push({

View File

@@ -125,16 +125,15 @@ export async function classifyFieldsWithAI(
WHAT TO PLACE FIELDS ON (only these):
- Visible blank underlines: ____________ (a horizontal line with nothing on it)
- Labeled blank lines: "Name: ______", "Address: ______", "Price: $______"
- Signature lines labeled "(Seller's Signature)", "(Buyer's Signature)", "(Agent)", etc.
- SIGNATURE BLOCK PATTERN (common in real estate docs): a blank underline on one line, with a label like "(Seller's Signature)", "(Date)", "(Address/Phone)" printed BELOW it on the next line. Place the field ON the blank underline line, not on the label line.
- Date underlines labeled "(Date)" or similar
- Initials boxes: small "[ ]" or "____" next to "Initials" labels, usually at page bottom margins
WHAT NOT TO PLACE FIELDS ON — STRICT:
- ANY paragraph body text, even if it contains an address, name, or value inline
- Document title, headings, section numbers
- Printed values that are already filled in (e.g. a pre-printed address in the document body)
- Descriptive or instructional text
- If the text is part of a sentence or clause, do NOT place a field on it
- ANY paragraph body text or sentence, even if it contains a name or address inline
- Document title, headings, section numbers, legal boilerplate paragraphs
- The label text itself — e.g. "(Seller's Signature)" is a label, NOT a blank; place the field on the line ABOVE it
- Printed values already filled in
FIELD TYPES:
- "client-signature" → buyer or seller/client signature lines
@@ -162,7 +161,7 @@ PREFILL:
content: [
{
type: 'text',
text: `Client name: ${clientName}\nProperty address: ${propertyAddress}\n\nAnalyze every page below. Return ALL blanks and form fields you can see — one field per blank line/box. Pages are in order starting from page 1.`,
text: `Client name: ${clientName}\nProperty address: ${propertyAddress}\n\nAnalyze every page image below. Each image has a red PAGE N label in the top-left corner — use that number as the "page" value for fields on that image. Return ALL blank lines and form field areas you can see. One field per blank.`,
},
...imageBlocks,
] as ContentBlock[],