diff --git a/teressa-copeland-homes/package-lock.json b/teressa-copeland-homes/package-lock.json index aa79deb..c4b4b69 100644 --- a/teressa-copeland-homes/package-lock.json +++ b/teressa-copeland-homes/package-lock.json @@ -21,6 +21,7 @@ "next": "16.2.0", "next-auth": "5.0.0-beta.30", "nodemailer": "^7.0.13", + "openai": "^6.32.0", "postgres": "^3.4.8", "react": "19.2.4", "react-dom": "19.2.4", @@ -11345,6 +11346,27 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/openai": { + "version": "6.32.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.32.0.tgz", + "integrity": "sha512-j3k+BjydAf8yQlcOI7WUQMQTbbF5GEIMAE2iZYCOzwwB3S2pCheaWYp+XZRNAch4jWVc52PMDGRRjutao3lLCg==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", diff --git a/teressa-copeland-homes/package.json b/teressa-copeland-homes/package.json index 36242ce..d0e4ea5 100644 --- a/teressa-copeland-homes/package.json +++ b/teressa-copeland-homes/package.json @@ -28,6 +28,7 @@ "next": "16.2.0", "next-auth": "5.0.0-beta.30", "nodemailer": "^7.0.13", + "openai": "^6.32.0", "postgres": "^3.4.8", "react": "19.2.4", "react-dom": "19.2.4", diff --git a/teressa-copeland-homes/src/lib/ai/extract-text.ts b/teressa-copeland-homes/src/lib/ai/extract-text.ts new file mode 100644 index 0000000..6167522 --- /dev/null +++ b/teressa-copeland-homes/src/lib/ai/extract-text.ts @@ -0,0 +1,43 @@ +// server-only — never import from client components +// This module uses pdfjs-dist legacy build in Node.js fake-worker mode (no browser worker). +// The client components (PdfViewer.tsx, PreviewModal.tsx) set workerSrc independently. + +// @ts-ignore — legacy .mjs build; types re-exported from main pdfjs-dist declaration +import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist/legacy/build/pdf.mjs'; +import { readFile } from 'node:fs/promises'; + +// Empty string = no worker thread (fake/synchronous worker) — required for Node.js server context. +// Do NOT use: new URL('pdfjs-dist/build/pdf.worker.min.mjs', import.meta.url) — that is browser-only. +GlobalWorkerOptions.workerSrc = ''; + +export interface PageText { + page: number; // 1-indexed + text: string; // all text items joined with spaces, capped at 2000 chars + width: number; // page width in PDF points (72 DPI) + height: number; // page height in PDF points (72 DPI) +} + +export async function extractPdfText(filePath: string): Promise { + const data = new Uint8Array(await readFile(filePath)); + const pdf = await getDocument({ data }).promise; + const pages: PageText[] = []; + + for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) { + const page = await pdf.getPage(pageNum); + const viewport = page.getViewport({ scale: 1.0 }); + const textContent = await page.getTextContent(); + const rawText = textContent.items + .filter((item: unknown) => typeof item === 'object' && item !== null && 'str' in item) + .map((item: unknown) => (item as { str: string }).str) + .join(' '); + // Cap text per page at 2000 chars to stay within GPT-4o-mini context limits + const text = rawText.slice(0, 2000); + pages.push({ + page: pageNum, + width: viewport.width, + height: viewport.height, + text, + }); + } + return pages; +} diff --git a/teressa-copeland-homes/src/lib/ai/field-placement.ts b/teressa-copeland-homes/src/lib/ai/field-placement.ts new file mode 100644 index 0000000..df65324 --- /dev/null +++ b/teressa-copeland-homes/src/lib/ai/field-placement.ts @@ -0,0 +1,168 @@ +// server-only — never import from client components +// This module calls the OpenAI API (OPENAI_API_KEY env var required) and is Node.js only. + +import OpenAI from 'openai'; +import type { PageText } from './extract-text'; +import type { SignatureFieldData, SignatureFieldType } from '@/lib/db/schema'; + +export interface AiFieldCoords { + page: number; + fieldType: SignatureFieldType; + xPct: number; // % from left, top-left origin (AI output) + yPct: number; // % from top, top-left origin (AI output) + widthPct: number; + heightPct: number; + prefillValue: string; +} + +/** + * Convert AI percentage coordinates (top-left origin) to PDF user-space points (bottom-left origin). + * + * pageWidth/pageHeight in PDF points (from page.getViewport({ scale: 1.0 })). + * + * Formula mirrors FieldPlacer.tsx handleDragEnd (lines 289-291): + * pdfX = (clampedX / renderedW) * pageInfo.originalWidth + * pdfY = ((renderedH - (clampedY + fieldHpx)) / renderedH) * pageInfo.originalHeight + * + * Translated to percentage inputs: + * pdfX = (xPct / 100) * pageWidth + * screenY = (yPct / 100) * pageHeight (top-left origin from AI) + * fieldH = (heightPct / 100) * pageHeight + * pdfY = pageHeight - screenY - fieldH (bottom edge in PDF space) + */ +export function aiCoordsToPagePdfSpace( + coords: AiFieldCoords, + pageWidth: number, + pageHeight: number, +): { x: number; y: number; width: number; height: number } { + const fieldWidth = (coords.widthPct / 100) * pageWidth; + const fieldHeight = (coords.heightPct / 100) * pageHeight; + const screenX = (coords.xPct / 100) * pageWidth; + const screenY = (coords.yPct / 100) * pageHeight; // screen Y from top + + const x = screenX; + // PDF y = distance from BOTTOM. screenY is from top, so flip: + // pdfY = pageHeight - screenY - fieldHeight (bottom edge of field) + const y = pageHeight - screenY - fieldHeight; + + return { x, y, width: fieldWidth, height: fieldHeight }; +} + +// Manual JSON schema for GPT-4o-mini structured output. +// NOTE: Do NOT use zodResponseFormat — it is broken with Zod v4 (confirmed issues #1540, #1602, #1709). +// With strict: true, ALL properties must be in required and ALL objects must have additionalProperties: false. +const FIELD_PLACEMENT_SCHEMA = { + type: 'object', + properties: { + fields: { + type: 'array', + items: { + type: 'object', + properties: { + page: { type: 'integer' }, + fieldType: { type: 'string', enum: ['text', 'checkbox', 'initials', 'date', 'client-signature', 'agent-signature', 'agent-initials'] }, + xPct: { type: 'number' }, + yPct: { type: 'number' }, // % from page TOP (AI top-left origin) + widthPct: { type: 'number' }, + heightPct: { type: 'number' }, + prefillValue: { type: 'string' }, // only for text fields; empty string if none + }, + required: ['page', 'fieldType', 'xPct', 'yPct', 'widthPct', 'heightPct', 'prefillValue'], + additionalProperties: false, + }, + }, + }, + required: ['fields'], + additionalProperties: false, +} as const; + +/** + * Call GPT-4o-mini to classify and place fields from extracted PDF text. + * + * Returns: + * - fields: SignatureFieldData[] — ready to write to DB; coordinates converted to PDF user-space + * - textFillData: Record — keyed by field.id (UUID); only text fields with non-empty prefillValue + */ +export async function classifyFieldsWithAI( + pageTexts: PageText[], + client: { name: string | null; propertyAddress: string | null } | null, +): Promise<{ fields: SignatureFieldData[]; textFillData: Record }> { + if (!process.env.OPENAI_API_KEY) { + throw new Error('OPENAI_API_KEY not configured'); + } + + const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); + + const clientName = client?.name ?? 'Unknown'; + const propertyAddress = client?.propertyAddress ?? 'Unknown'; + + // Build pages summary — text already capped at 2000 chars per page in extractPdfText + const pagesSummary = pageTexts + .map((p) => `Page ${p.page} (${p.width}x${p.height}pt):\n${p.text}`) + .join('\n\n'); + + const response = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [ + { + role: 'system', + content: `You are a real estate document form field extractor. +Given extracted text from a PDF page (with context about page number and dimensions), +identify where signature, text, checkbox, initials, and date fields should be placed. +Return fields as percentage positions (0-100) from the TOP-LEFT of the page. +Use these field types: text (for typed values), checkbox, initials, date, client-signature, agent-signature, agent-initials. +For text fields that match the client profile, set prefillValue to the known value. Otherwise use empty string.`, + }, + { + role: 'user', + content: `Client name: ${clientName}\nProperty address: ${propertyAddress}\n\nPDF pages:\n${pagesSummary}`, + }, + ], + response_format: { + type: 'json_schema', + json_schema: { + name: 'field_placement', + strict: true, + schema: FIELD_PLACEMENT_SCHEMA, + }, + }, + }); + + const raw = JSON.parse(response.choices[0].message.content!) as { fields: AiFieldCoords[] }; + + // Convert AI coords to PDF user-space and build SignatureFieldData[] + const fields: SignatureFieldData[] = []; + const textFillData: Record = {}; + + for (const aiField of raw.fields) { + const pageInfo = pageTexts.find((p) => p.page === aiField.page); + const pageWidth = pageInfo?.width ?? 612; // fallback: US Letter + const pageHeight = pageInfo?.height ?? 792; + + const { x, y } = aiCoordsToPagePdfSpace(aiField, pageWidth, pageHeight); + + // Use standard sizes regardless of AI width/height — consistent with FieldPlacer defaults + const isCheckbox = aiField.fieldType === 'checkbox'; + const width = isCheckbox ? 24 : 144; // pts: checkbox=24x24, others=144x36 + const height = isCheckbox ? 24 : 36; + + const id = crypto.randomUUID(); + + fields.push({ + id, + page: aiField.page, + x, + y, + width, + height, + type: aiField.fieldType, + }); + + // Build textFillData for text fields with a non-empty prefill value (keyed by UUID) + if (aiField.fieldType === 'text' && aiField.prefillValue) { + textFillData[id] = aiField.prefillValue; + } + } + + return { fields, textFillData }; +}