diff --git a/teressa-copeland-homes/package-lock.json b/teressa-copeland-homes/package-lock.json index c4b4b69..20f2eac 100644 --- a/teressa-copeland-homes/package-lock.json +++ b/teressa-copeland-homes/package-lock.json @@ -11,6 +11,7 @@ "@cantoo/pdf-lib": "^2.6.3", "@dnd-kit/core": "^6.3.1", "@dnd-kit/utilities": "^3.2.2", + "@napi-rs/canvas": "^0.1.97", "@react-email/components": "^1.0.10", "@react-email/render": "^2.0.4", "@vercel/blob": "^2.3.1", @@ -3307,7 +3308,6 @@ "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.97.tgz", "integrity": "sha512-8cFniXvrIEnVwuNSRCW9wirRZbHvrD3JVujdS2P5n5xiJZNZMOZcfOvJ1pb66c7jXMKHHglJEDVJGbm8XWFcXQ==", "license": "MIT", - "optional": true, "workspaces": [ "e2e/*" ], diff --git a/teressa-copeland-homes/package.json b/teressa-copeland-homes/package.json index d0e4ea5..498a428 100644 --- a/teressa-copeland-homes/package.json +++ b/teressa-copeland-homes/package.json @@ -18,6 +18,7 @@ "@cantoo/pdf-lib": "^2.6.3", "@dnd-kit/core": "^6.3.1", "@dnd-kit/utilities": "^3.2.2", + "@napi-rs/canvas": "^0.1.97", "@react-email/components": "^1.0.10", "@react-email/render": "^2.0.4", "@vercel/blob": "^2.3.1", diff --git a/teressa-copeland-homes/src/lib/ai/extract-text.ts b/teressa-copeland-homes/src/lib/ai/extract-text.ts index bb2e225..6a0b72e 100644 --- a/teressa-copeland-homes/src/lib/ai/extract-text.ts +++ b/teressa-copeland-homes/src/lib/ai/extract-text.ts @@ -4,6 +4,7 @@ // @ts-ignore — legacy .mjs build; types re-exported from main pdfjs-dist declaration import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist/legacy/build/pdf.mjs'; +import { createCanvas } from '@napi-rs/canvas'; import { readFile } from 'node:fs/promises'; import { join } from 'node:path'; @@ -12,73 +13,43 @@ import { join } from 'node:path'; // Empty string is falsy → PDFWorker.workerSrc getter throws before the import runs. GlobalWorkerOptions.workerSrc = `file://${join(process.cwd(), 'node_modules/pdfjs-dist/legacy/build/pdf.worker.mjs')}`; -/** A single line of text, grouped by approximate Y position. */ -export interface TextLine { - yPct: number; // % from page TOP (0 = top, 100 = bottom) - xPct: number; // % from page LEFT of the first item on this line - text: string; // all items on this line joined +/** A rendered page image, ready to send to GPT-4o vision. */ +export interface PageImage { + page: number; // 1-indexed + width: number; // original PDF width in points (scale 1.0) + height: number; // original PDF height in points (scale 1.0) + base64: string; // JPEG base64 of the rendered page (no data: prefix) } -/** Per-page structured data for AI consumption. */ -export interface PageText { - page: number; // 1-indexed - width: number; // page width in PDF points - height: number; // page height in PDF points - lines: TextLine[]; // text grouped into lines, sorted top-to-bottom -} +// Legacy type alias kept for callers that still reference PageText +export type PageText = PageImage; -export async function extractPdfText(filePath: string): Promise { +const RENDER_SCALE = 1.5; // 72dpi × 1.5 = 108dpi — good for vision without huge payloads + +export async function extractPdfText(filePath: string): Promise { const data = new Uint8Array(await readFile(filePath)); const pdf = await getDocument({ data }).promise; - const pages: PageText[] = []; + const pages: PageImage[] = []; for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) { const page = await pdf.getPage(pageNum); - const viewport = page.getViewport({ scale: 1.0 }); - const textContent = await page.getTextContent(); + const viewport = page.getViewport({ scale: RENDER_SCALE }); - const W = viewport.width; - const H = viewport.height; + // Create an @napi-rs/canvas and render the PDF page into it + const canvas = createCanvas(Math.round(viewport.width), Math.round(viewport.height)); + const ctx = canvas.getContext('2d'); - // Collect raw items with positions - const rawItems: { text: string; x: number; yFromTop: number }[] = []; - for (const item of textContent.items) { - if (typeof item !== 'object' || item === null || !('str' in item)) continue; - const i = item as { str: string; transform: number[] }; - if (!i.str.trim()) continue; - const x = i.transform[4]; - const yFromTop = H - i.transform[5]; // PDF y is from bottom; flip to screen coords - rawItems.push({ text: i.str, x, yFromTop }); - } + // @ts-ignore — @napi-rs/canvas context is compatible at runtime but types diverge + await page.render({ canvasContext: ctx, viewport }).promise; - // Group items into lines by rounding yFromTop to nearest 4pt bucket - const lineMap = new Map(); - for (const item of rawItems) { - const bucket = Math.round(item.yFromTop / 4) * 4; - const existing = lineMap.get(bucket); - if (existing) { - existing.items.push(item); - existing.minX = Math.min(existing.minX, item.x); - } else { - lineMap.set(bucket, { items: [item], minX: item.x }); - } - } + const jpegBuffer = canvas.toBuffer('image/jpeg' as never, 85); - // Sort lines top-to-bottom, join items left-to-right - const lines: TextLine[] = Array.from(lineMap.entries()) - .sort(([a], [b]) => a - b) - .map(([yBucket, { items, minX }]) => { - const sorted = items.sort((a, b) => a.x - b.x); - return { - yPct: Math.round((yBucket / H) * 1000) / 10, // 1 decimal place - xPct: Math.round((minX / W) * 1000) / 10, - text: sorted.map((i) => i.text).join(' '), - }; - }) - // Cap at 120 lines per page to stay within context limits - .slice(0, 120); - - pages.push({ page: pageNum, width: W, height: H, lines }); + pages.push({ + page: pageNum, + width: page.getViewport({ scale: 1.0 }).width, + height: page.getViewport({ scale: 1.0 }).height, + base64: jpegBuffer.toString('base64'), + }); } return pages; diff --git a/teressa-copeland-homes/src/lib/ai/field-placement.ts b/teressa-copeland-homes/src/lib/ai/field-placement.ts index 729a97f..83045f4 100644 --- a/teressa-copeland-homes/src/lib/ai/field-placement.ts +++ b/teressa-copeland-homes/src/lib/ai/field-placement.ts @@ -99,47 +99,64 @@ export async function classifyFieldsWithAI( const clientName = client?.name ?? 'Unknown'; const propertyAddress = client?.propertyAddress ?? 'Unknown'; - // Build structured page summary — each line includes yPct/xPct so the AI has spatial context - const pagesSummary = pageTexts.map((p) => { - const linesSummary = p.lines - .map((l) => ` y=${l.yPct}% x=${l.xPct}%: ${l.text}`) - .join('\n'); - return `=== Page ${p.page} (${p.width}x${p.height}pt) ===\n${linesSummary}`; - }).join('\n\n'); + // Build vision messages — one image_url block per page + type ContentBlock = + | { type: 'text'; text: string } + | { type: 'image_url'; image_url: { url: string; detail: 'high' } }; + + const imageBlocks: ContentBlock[] = pageTexts.map((p) => ({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${p.base64}`, + detail: 'high', + }, + })); const response = await openai.chat.completions.create({ model: 'gpt-4o', messages: [ { role: 'system', - content: `You are a real estate document form field extractor. You receive structured text from PDF pages where each line includes its Y position (% from top) and X position (% from left). + content: `You are a real estate document form field extractor. You will receive images of PDF pages. Your job is to identify every location that needs to be filled in. -Your job: identify every location that requires a FIELD to be filled in. +WHAT TO PLACE FIELDS ON: +- Blank underlines: ____________ +- Labeled blanks: "Name: ______", "Address: ______", "Price: $______" +- Signature lines with labels like "(Seller's Signature)", "(Buyer's Signature)", "(Agent)" +- Date lines labeled "(Date)" or with a date underline +- Initials boxes: "[ ]" or "_____ Initials" or small boxes at page bottoms/margins -FIELD PLACEMENT RULES: -1. Only place fields at actual form field locations — blank lines (___), labeled input areas, signature blocks, date lines, and initials boxes. -2. NEVER place fields inside paragraph body text, headings, or descriptive content. -3. Look for these patterns as indicators of form fields: - - Lines of underscores: "_______" or "___________" - - Labels followed by blank space: "Date: ___", "Name: ___", "Address: ___" - - Signature lines labeled: "(Seller's Signature)", "(Buyer's Signature)", "(Agent Signature)", "Seller", "Buyer" - - Initials indicators: "Initials", "[ ]", "(Initials)", "_________ Initials" - - Date lines: "(Date)", "Date ___", "___ / ___ / ___" -4. For EVERY such blank or label you find, add a field — even if you have nothing to prefill. Leave prefillValue as "" if you don't know the value. -5. Match field types: - - "client-signature" → buyer/client signature lines - - "agent-signature" → agent/listing agent signature lines - - "initials" → initials boxes or short initial blanks - - "agent-initials" → agent-specific initials - - "date" → date fields - - "text" → any other fill-in-the-blank (names, addresses, prices, etc.) -6. Place the field AT the blank/label's yPct. Use the xPct from that line for xPct. -7. Do NOT place checkbox fields. -8. For text fields where the value matches the client name or property address, set prefillValue. Otherwise use "".`, +WHAT NOT TO PLACE FIELDS ON: +- Paragraph body text, instructions, legal boilerplate +- Headings and section titles + +FIELD TYPES: +- "client-signature" → buyer or seller/client signature lines +- "agent-signature" → agent or listing agent signature lines +- "initials" → buyer/seller initials boxes +- "agent-initials" → agent initials boxes +- "date" → any date field +- "text" → all other blanks (names, addresses, prices, terms, etc.) + +POSITIONING: +- xPct and yPct are percentages from the TOP-LEFT of that specific page image +- Place the field AT the blank line, not above or below it +- For a line like "Buyer's Signature __________ Date _______", place a client-signature at the signature blank's x/y and a date field at the date blank's x/y — they are separate fields on the same line +- Do NOT place checkbox fields + +PREFILL: +- For text fields: if the blank is clearly for client name ("${clientName}") or property address ("${propertyAddress}"), set prefillValue to that value +- All other fields: prefillValue = ""`, }, { role: 'user', - content: `Client name: ${clientName}\nProperty address: ${propertyAddress}\n\nDocument pages (each line shows position and text):\n\n${pagesSummary}`, + content: [ + { + type: 'text', + text: `Client name: ${clientName}\nProperty address: ${propertyAddress}\n\nAnalyze every page below. Return ALL blanks and form fields you can see — one field per blank line/box. Pages are in order starting from page 1.`, + }, + ...imageBlocks, + ] as ContentBlock[], }, ], response_format: {