diff --git a/teressa-copeland-homes/src/lib/ai/field-placement.ts b/teressa-copeland-homes/src/lib/ai/field-placement.ts index 5f389ce..c4ea417 100644 --- a/teressa-copeland-homes/src/lib/ai/field-placement.ts +++ b/teressa-copeland-homes/src/lib/ai/field-placement.ts @@ -1,63 +1,14 @@ // server-only — never import from client components -// This module calls the OpenAI API (OPENAI_API_KEY env var required) and is Node.js only. +// Classifies blank fields detected by extract-text.ts using gpt-4.1 (text-only prompt). +// Coordinates are already exact from the PDF text layer — AI only determines field type and prefill value. import OpenAI from 'openai'; -import type { PageText } from './extract-text'; +import type { BlankField } from './extract-text'; import type { SignatureFieldData, SignatureFieldType } from '@/lib/db/schema'; -export interface AiFieldCoords { - page: number; - fieldType: SignatureFieldType; - xPct: number; // % from left, top-left origin (AI output) - yPct: number; // % from top, top-left origin (AI output) - widthPct: number; - heightPct: number; - prefillValue: string; -} - -/** - * Convert AI percentage coordinates (top-left origin) to PDF user-space points (bottom-left origin). - * - * pageWidth/pageHeight in PDF points (from page.getViewport({ scale: 1.0 })). - * - * Formula mirrors FieldPlacer.tsx handleDragEnd (lines 289-291): - * pdfX = (clampedX / renderedW) * pageInfo.originalWidth - * pdfY = ((renderedH - (clampedY + fieldHpx)) / renderedH) * pageInfo.originalHeight - * - * Translated to percentage inputs: - * pdfX = (xPct / 100) * pageWidth - * screenY = (yPct / 100) * pageHeight (top-left origin from AI) - * fieldH = (heightPct / 100) * pageHeight - * pdfY = pageHeight - screenY - fieldH (bottom edge in PDF space) - */ -export function aiCoordsToPagePdfSpace( - coords: AiFieldCoords, - pageWidth: number, - pageHeight: number, -): { x: number; y: number; width: number; height: number } { - const fieldWidth = (coords.widthPct / 100) * pageWidth; - const fieldHeight = (coords.heightPct / 100) * pageHeight; - const screenX = (coords.xPct / 100) * pageWidth; - const screenY = (coords.yPct / 100) * pageHeight; // screen Y from top - - const x = screenX; - // Nudge yPct down by 0.5% so the field sits on the underline rather than floating above it. - // AI tends to report the top of the blank area; we want the field aligned to the line itself. - const nudgedScreenY = screenY + pageHeight * 0.005; - // PDF y = distance from BOTTOM. screenY is from top, so flip: - // pdfY = pageHeight - screenY - fieldHeight (bottom edge of field) - // Clamp to [0, pageHeight - fieldHeight] so AI coords near page edges - // don't produce negative y values that render outside the canvas. - const rawY = pageHeight - nudgedScreenY - fieldHeight; - const y = Math.max(0, Math.min(rawY, pageHeight - fieldHeight)); - - return { x, y, width: fieldWidth, height: fieldHeight }; -} - -// Manual JSON schema for GPT-4o-mini structured output. -// NOTE: Do NOT use zodResponseFormat — it is broken with Zod v4 (confirmed issues #1540, #1602, #1709). -// With strict: true, ALL properties must be in required and ALL objects must have additionalProperties: false. -const FIELD_PLACEMENT_SCHEMA = { +// Manual JSON schema for gpt-4.1 structured output. +// NOTE: Do NOT use zodResponseFormat — broken with Zod v4 (issues #1540, #1602, #1709). +const CLASSIFICATION_SCHEMA = { type: 'object', properties: { fields: { @@ -65,15 +16,11 @@ const FIELD_PLACEMENT_SCHEMA = { items: { type: 'object', properties: { - page: { type: 'integer' }, - fieldType: { type: 'string', enum: ['text', 'initials', 'date', 'client-signature', 'agent-signature', 'agent-initials'] }, - xPct: { type: 'number' }, - yPct: { type: 'number' }, // % from page TOP (AI top-left origin) - widthPct: { type: 'number' }, - heightPct: { type: 'number' }, - prefillValue: { type: 'string' }, // only for text fields; empty string if none + index: { type: 'integer' }, + fieldType: { type: 'string', enum: ['text', 'initials', 'date', 'client-signature', 'agent-signature', 'agent-initials', 'checkbox'] }, + prefillValue: { type: 'string' }, }, - required: ['page', 'fieldType', 'xPct', 'yPct', 'widthPct', 'heightPct', 'prefillValue'], + required: ['index', 'fieldType', 'prefillValue'], additionalProperties: false, }, }, @@ -82,146 +29,251 @@ const FIELD_PLACEMENT_SCHEMA = { additionalProperties: false, } as const; +// Field height in PDF points by type (used to position the box over the underline) +const FIELD_HEIGHTS: Record = { + 'client-signature': 20, + 'agent-signature': 20, + 'initials': 14, + 'agent-initials': 14, + 'date': 12, + 'text': 12, + 'checkbox': 14, +}; + +// Width clamping — use the exact measured blank width but stay within these bounds +const SIZE_LIMITS: Record = { + 'client-signature': { minW: 80, maxW: 260 }, + 'agent-signature': { minW: 80, maxW: 260 }, + 'initials': { minW: 20, maxW: 80 }, + 'agent-initials': { minW: 20, maxW: 80 }, + 'date': { minW: 50, maxW: 130 }, + 'text': { minW: 30, maxW: 280 }, + 'checkbox': { minW: 14, maxW: 20 }, +}; + /** - * Call GPT-4o-mini to classify and place fields from extracted PDF text. - * - * Returns: - * - fields: SignatureFieldData[] — ready to write to DB; coordinates converted to PDF user-space - * - textFillData: Record — keyed by field.id (UUID); only text fields with non-empty prefillValue + * Classify blank fields detected from the PDF text layer. + * AI receives a compact text description of each blank and its surrounding context. + * Returns SignatureFieldData[] with exact coordinates from pdfjs + AI-assigned types. */ export async function classifyFieldsWithAI( - pageTexts: PageText[], + blanks: BlankField[], client: { name: string | null; propertyAddress: string | null } | null, ): Promise<{ fields: SignatureFieldData[]; textFillData: Record }> { if (!process.env.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY not configured'); } - const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); + if (blanks.length === 0) { + return { fields: [], textFillData: {} }; + } - const clientName = client?.name ?? 'Unknown'; + const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); + const clientName = client?.name ?? 'Unknown'; const propertyAddress = client?.propertyAddress ?? 'Unknown'; - // Build vision messages — one image_url block per page - type ContentBlock = - | { type: 'text'; text: string } - | { type: 'image_url'; image_url: { url: string; detail: 'high' } }; - - const imageBlocks: ContentBlock[] = pageTexts.map((p) => ({ - type: 'image_url', - image_url: { - url: `data:image/jpeg;base64,${p.base64}`, - detail: 'high', - }, - })); + // Build a compact text description of every blank for the AI + const blankDescriptions = blanks.map((b, i) => { + const parts = [`[${i}] page${b.page}`]; + if (b.rowTotal !== undefined && b.rowTotal > 1) parts.push(`row=${b.rowIndex}/${b.rowTotal}`); + if (b.contextBefore) parts.push(`before="${b.contextBefore}"`); + if (b.contextAfter) parts.push(`after="${b.contextAfter}"`); + if (b.contextAbove) parts.push(`above="${b.contextAbove}"`); + if (b.contextBelow) parts.push(`below="${b.contextBelow}"`); + return parts.join(' '); + }).join('\n'); const response = await openai.chat.completions.create({ - model: 'gpt-4o', + model: 'gpt-4.1', messages: [ { role: 'system', - content: `You are a real estate document form field extractor. You will receive images of PDF pages. Your job is to identify every location that needs to be filled in. + content: `You are classifying blank lines in a Utah real estate document. +Each blank is a sequence of underscores (or a bracket box) found in the PDF. You are given the text immediately before and after it on the same line, plus the lines above and below. -WHAT TO PLACE FIELDS ON (only these): -- Visible blank underlines: ____________ (a horizontal line with nothing on it) -- Labeled blank lines: "Name: ______", "Address: ______", "Price: $______" -- SIGNATURE BLOCK PATTERN (common in real estate docs): a blank underline on one line, with a label like "(Seller's Signature)", "(Date)", "(Address/Phone)" printed BELOW it on the next line. Place the field ON the blank underline line, not on the label line. -- Date underlines labeled "(Date)" or similar -- Initials boxes: small "[ ]" or "____" next to "Initials" labels, usually at page bottom margins +Client name: ${clientName} +Property address: ${propertyAddress} -WHAT NOT TO PLACE FIELDS ON — STRICT: -- ANY paragraph body text or sentence, even if it contains a name or address inline -- Document title, headings, section numbers, legal boilerplate paragraphs -- The label text itself — e.g. "(Seller's Signature)" is a label, NOT a blank; place the field on the line ABOVE it -- Printed values already filled in +━━━ FIELD TYPE RULES ━━━ -FIELD TYPES: -- "client-signature" → buyer or seller/client signature lines -- "agent-signature" → agent or listing agent signature lines -- "initials" → buyer/seller initials boxes -- "agent-initials" → agent initials boxes -- "date" → any date field -- "text" → all other blanks (names, addresses, prices, terms, etc.) +"text" — any fill-in blank for written information: names, company, address, price, MLS#, term, + percentage, phone, or any other data field. + → Use for ALL inline blanks surrounded by sentence text. + → Use for Address/Phone blanks even when they appear on the same row as signature blanks. -POSITIONING AND SIZING: -- xPct and yPct are percentages from the TOP-LEFT of that specific page image -- Place the field AT the blank underline — align it to sit on top of the line -- For a row like "Signature __________ Date _______", create TWO separate fields: one for the signature blank and one for the date blank, each at their own x position -- widthPct: match the visual width of the underline — short blanks get small widths (5-15%), long lines wider (20-30%) -- heightPct: THIN — use 1.2% for text/date/initials, 1.8% for signatures. Fields must not overlap the text above the blank. -- yPct: point to the underline itself. The field sits ON the blank line. If you place it too high it will cover the printed text above. -- Do NOT place checkbox fields +"date" — a date blank. + → *** HIGHEST PRIORITY RULE: if the last word(s) in contextBefore are "Date" or "date", + classify as "date". This overrides ALL other rules including "initials". *** + → Also use when contextBefore ends with a date-like label ("day of", month/year). + → A footer like "Seller's Initials [ ] Date ___" has TWO blanks: the bracket = "initials", + the underscore AFTER "Date" = "date". Never let "Seller's Initials" earlier in the line + override the immediate "Date" label right before the underscore blank. -PREFILL: -- For text fields: if the blank is clearly for client name ("${clientName}") or property address ("${propertyAddress}"), set prefillValue to that value -- All other fields: prefillValue = ""`, +"client-signature" — ONLY for a blank on its OWN DEDICATED LINE where a buyer or seller physically signs. + The line will have NO sentence text before or after the blank — just a long underline. + The label "(Seller's Signature)", "(Buyer's Signature)", or similar appears on the line BELOW the blank + (in contextBelow) or immediately beside it. + → NEVER use for inline blanks inside a sentence. + → NEVER use for Address/Phone blanks even when they are on the same row as a signature. + → When a signature block has TWO rows of blanks (two sellers), BOTH rows get "client-signature" + for their signature blank. Look for "(Seller's Signature)" in contextAbove OR contextBelow. + +"agent-signature" — ONLY for a blank on its OWN DEDICATED LINE for the listing agent or broker. + Look for "(Agent's Signature)", "(Broker)", "Seller's Agent", or "by:" in contextAbove/contextBelow. + → NEVER use for inline blanks inside a sentence. + +"initials" — a short blank or bracket box labeled "Seller's Initials" or "Buyer's Initials". + These appear at the bottom margin of each page. + → "Seller's Initials" → "initials" (this is the CLIENT/seller, not the agent) + → "Buyer's Initials" → "initials" + → The blank immediately after the word "Date" on the same line → "date", not "initials" + +"agent-initials" — ONLY for blanks explicitly labeled for the AGENT's initials. + +"checkbox" — a small tick-box [ ] embedded inside sentence text. + These appear as bracket items mid-sentence: "[ ] ARE [ ] ARE NOT", "[ ] IS [ ] IS NOT", + "check applicable box: [ ] County Records [ ] Appraisal", etc. + → contextBefore and contextAfter BOTH contain real sentence words (not blank labels). + → These are NOT form-fill blanks — they are selection checkboxes and must be ignored. + → ALWAYS classify inline bracket blanks surrounded by sentence text as "checkbox". + +━━━ SIGNATURE BLOCK ROWS ━━━ +Real estate signature blocks have multiple blanks on the same line, e.g.: + [sig blank] [address/phone blank] [date blank] + (Seller's Signature) (Address/Phone) (Date) + +When a blank is part of a multi-blank row its description includes row=N/T, +where N is its 1-indexed position and T is the total blanks on that line. + row=1/3 = first of three row=2/3 = middle row=3/3 = last of three + +Use row position + contextBelow to classify each blank: +- row=1/T (first) + contextBelow has "(Seller's Signature)" or "(Buyer's Signature)" → "client-signature" +- row=1/T (first) + contextBelow has agent/broker label → "agent-signature" +- row=N/T where 1 < N < T (middle) → "text" (Address/Phone) +- row=T/T (last) + contextBelow contains "(Date)" → "date" +- row=1/1 (only blank on row) → use contextBefore / contextBelow label rules below + +━━━ QUICK DECISION TREE ━━━ +1. contextBefore ends with "Date" or "date"? → "date" + *** This is the ONLY way a blank becomes "date" via contextBefore. *** + +2. contextBefore ends with "Initials" (e.g. "Seller's Initials", "Buyer's Initials")? → "initials" + *** This fires BEFORE the text/checkbox rules. Do NOT reclassify as "text" just because + contextAfter also contains the word "Date". The Date label is AFTER this blank — it labels + the NEXT blank, not this one. *** + +3. Blank is a bracket [ ] with real sentence words both before AND after (not "Initials")? → "checkbox" +4. Blank is inside a sentence (real label words both before AND after)? → "text" +5. contextBefore ends with "Address" or "Phone" or "Address/Phone"? → "text" +6. row=T/T (last on row, T>1) AND contextBelow contains "(Date)"? → "date" +7. row=1/T (first on row, T>1) AND contextBelow contains "(Seller's Signature)" or "(Buyer's Signature)"? → "client-signature" +8. row=1/T (first on row, T>1) AND contextBelow contains agent/broker label? → "agent-signature" +9. row=N/T where 1 < N < T (middle on row)? → "text" +10. Everything else? → "text" + +━━━ FOOTER INITIALS/DATE PATTERN ━━━ +Page footers contain: "Seller's Initials [initials-blank] Date [date-blank]" +Two blanks, two rules: +- contextBefore ends with "Initials" → "initials" ← rule 2 above already handles this +- contextBefore ends with "Date" → "date" ← rule 1 above already handles this +The word "Date" in contextAfter does NOT make a blank a date field. +The word "Date" in contextBefore DOES make a blank a date field. + +━━━ PREFILL ━━━ +- Blank clearly for client/buyer/seller NAME (not address) → prefillValue = "${clientName}" +- Blank clearly for PROPERTY address (listing address, not a signature block Address/Phone field) → prefillValue = "${propertyAddress}" +- "Address/Phone" blanks in signature blocks are for the signer's personal contact info → prefillValue = "" +- Everything else → prefillValue = "" + +Return a classification for EVERY blank index. Do not skip any.`, }, { role: 'user', - content: [ - { - type: 'text', - text: `Client name: ${clientName}\nProperty address: ${propertyAddress}\n\nAnalyze every page image below. Each image has a red PAGE N label in the top-left corner — use that number as the "page" value for fields on that image. Return ALL blank lines and form field areas you can see. One field per blank.`, - }, - ...imageBlocks, - ] as ContentBlock[], + content: blankDescriptions, }, ], response_format: { type: 'json_schema', json_schema: { - name: 'field_placement', + name: 'field_classification', strict: true, - schema: FIELD_PLACEMENT_SCHEMA, + schema: CLASSIFICATION_SCHEMA, }, }, }); - const raw = JSON.parse(response.choices[0].message.content!) as { fields: AiFieldCoords[] }; + const raw = JSON.parse(response.choices[0].message.content!) as { + fields: Array<{ index: number; fieldType: string; prefillValue: string }>; + }; + // Deterministic post-processing: override AI classifications that are + // unambiguously wrong based on structural context, regardless of AI output. + for (const result of raw.fields) { + const blank = blanks[result.index]; + if (!blank) continue; + const before = blank.contextBefore.trim(); + const lastWord = before.split(/\s+/).pop()?.toLowerCase() ?? ''; + const lastTwo = before.split(/\s+/).slice(-2).join(' ').toLowerCase(); + const rowIdx = blank.rowIndex ?? 1; + const rowTot = blank.rowTotal ?? 1; + + // Rule A — footer: "Date ___" → always date + if (lastWord === 'date') { + result.fieldType = 'date'; + + // Rule B — footer: "Seller's/Buyer's Initials [ ]" → always initials + } else if (lastWord === 'initials' || lastTwo === "seller's initials" || lastTwo === "buyer's initials") { + result.fieldType = 'initials'; + + // Rule C — signature rows: only the FIRST blank on a multi-blank row can be a + // signature. Middle and last blanks are address/phone (text) or date — never signatures. + } else if (rowTot > 1 && rowIdx > 1 && + (result.fieldType === 'client-signature' || result.fieldType === 'agent-signature')) { + // Last blank on the row → date if contextBelow labels it "(Date)", else text + if (rowIdx === rowTot && blank.contextBelow.includes('(Date)')) { + result.fieldType = 'date'; + } else { + result.fieldType = 'text'; + } + + // Rule D — addr/phone blank separated onto its own detected line (rowTotal=2, rowIndex=1): + // When y-position drift splits a 3-blank signature row, the sig blank ends up row=1/1 + // and addr+date end up as row=1/2 and row=2/2. The row=1/2 blank has contextBelow + // containing "(Address/Phone)" and "(Date)" but no sig blank before it — it's the + // address/phone field, not a signature. + } else if (rowTot === 2 && rowIdx === 1 && + (result.fieldType === 'client-signature' || result.fieldType === 'agent-signature') && + blank.contextBelow.includes('(Address/Phone)') && + blank.contextBelow.includes('(Date)') && + !blank.contextBelow.includes('(Seller') && + !blank.contextBelow.includes('(Buyer')) { + result.fieldType = 'text'; + } + } - // Convert AI coords to PDF user-space and build SignatureFieldData[] const fields: SignatureFieldData[] = []; const textFillData: Record = {}; - for (const aiField of raw.fields) { - // Never place checkboxes — positions depend on user input and can't be AI-determined - if (aiField.fieldType === 'checkbox') continue; + for (const result of raw.fields) { + if (result.fieldType === 'checkbox') continue; + const fieldType = result.fieldType as SignatureFieldType; + const blank = blanks[result.index]; + if (!blank) continue; - const pageInfo = pageTexts.find((p) => p.page === aiField.page); - const pageWidth = pageInfo?.width ?? 612; // fallback: US Letter - const pageHeight = pageInfo?.height ?? 792; + const height = FIELD_HEIGHTS[fieldType] ?? 12; + const lim = SIZE_LIMITS[fieldType] ?? SIZE_LIMITS['text']; + const width = Math.max(lim.minW, Math.min(blank.width, lim.maxW)); - const { x, y, width: rawW, height: rawH } = aiCoordsToPagePdfSpace(aiField, pageWidth, pageHeight); - - // Use AI-estimated size, clamped to type-appropriate min/max - const sizeLimits: Record = { - 'client-signature': { minW: 100, maxW: 250, minH: 16, maxH: 26 }, - 'agent-signature': { minW: 100, maxW: 250, minH: 16, maxH: 26 }, - 'initials': { minW: 30, maxW: 70, minH: 12, maxH: 18 }, - 'agent-initials': { minW: 30, maxW: 70, minH: 12, maxH: 18 }, - 'date': { minW: 55, maxW: 120, minH: 12, maxH: 16 }, - 'text': { minW: 40, maxW: 260, minH: 12, maxH: 16 }, - 'checkbox': { minW: 14, maxW: 20, minH: 14, maxH: 20 }, - }; - const lim = sizeLimits[aiField.fieldType] ?? sizeLimits['text']; - const width = Math.max(lim.minW, Math.min(rawW, lim.maxW)); - const height = Math.max(lim.minH, Math.min(rawH, lim.maxH)); + // Position the field box so it sits ON the underscore line. + // blank.y is the text baseline (PDF bottom-left origin). Underscores descend just below + // the baseline, so anchor the field bottom 2pt below baseline and extend upward by height. + // This keeps the box tight to the underline without climbing into the text above. + const y = Math.max(0, blank.y - 2); const id = crypto.randomUUID(); + fields.push({ id, page: blank.page, x: blank.x, y, width, height, type: fieldType }); - fields.push({ - id, - page: aiField.page, - x, - y, - width, - height, - type: aiField.fieldType, - }); - - // Build textFillData for text fields with a non-empty prefill value (keyed by UUID) - if (aiField.fieldType === 'text' && aiField.prefillValue) { - textFillData[id] = aiField.prefillValue; + if (fieldType === 'text' && result.prefillValue) { + textFillData[id] = result.prefillValue; } }