fix(13): use AI-estimated field sizes with type bounds, stricter no-inline-text rule
- Replace fixed 144x36 with AI widthPct/heightPct clamped to per-type min/max (signatures 100-250x20-40pt, initials 36-80x16-28pt, date 60-130x14-24pt, text 60-280x14-24pt) - Prompt: explicit 'no inline body text' rule — if text is part of a sentence, skip it - Prompt: widthPct should match visual underline width, heightPct kept thin (~2-2.5%) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -119,16 +119,19 @@ export async function classifyFieldsWithAI(
|
|||||||
role: 'system',
|
role: 'system',
|
||||||
content: `You are a real estate document form field extractor. You will receive images of PDF pages. Your job is to identify every location that needs to be filled in.
|
content: `You are a real estate document form field extractor. You will receive images of PDF pages. Your job is to identify every location that needs to be filled in.
|
||||||
|
|
||||||
WHAT TO PLACE FIELDS ON:
|
WHAT TO PLACE FIELDS ON (only these):
|
||||||
- Blank underlines: ____________
|
- Visible blank underlines: ____________ (a horizontal line with nothing on it)
|
||||||
- Labeled blanks: "Name: ______", "Address: ______", "Price: $______"
|
- Labeled blank lines: "Name: ______", "Address: ______", "Price: $______"
|
||||||
- Signature lines with labels like "(Seller's Signature)", "(Buyer's Signature)", "(Agent)"
|
- Signature lines labeled "(Seller's Signature)", "(Buyer's Signature)", "(Agent)", etc.
|
||||||
- Date lines labeled "(Date)" or with a date underline
|
- Date underlines labeled "(Date)" or similar
|
||||||
- Initials boxes: "[ ]" or "_____ Initials" or small boxes at page bottoms/margins
|
- Initials boxes: small "[ ]" or "____" next to "Initials" labels, usually at page bottom margins
|
||||||
|
|
||||||
WHAT NOT TO PLACE FIELDS ON:
|
WHAT NOT TO PLACE FIELDS ON — STRICT:
|
||||||
- Paragraph body text, instructions, legal boilerplate
|
- ANY paragraph body text, even if it contains an address, name, or value inline
|
||||||
- Headings and section titles
|
- Document title, headings, section numbers
|
||||||
|
- Printed values that are already filled in (e.g. a pre-printed address in the document body)
|
||||||
|
- Descriptive or instructional text
|
||||||
|
- If the text is part of a sentence or clause, do NOT place a field on it
|
||||||
|
|
||||||
FIELD TYPES:
|
FIELD TYPES:
|
||||||
- "client-signature" → buyer or seller/client signature lines
|
- "client-signature" → buyer or seller/client signature lines
|
||||||
@@ -138,10 +141,12 @@ FIELD TYPES:
|
|||||||
- "date" → any date field
|
- "date" → any date field
|
||||||
- "text" → all other blanks (names, addresses, prices, terms, etc.)
|
- "text" → all other blanks (names, addresses, prices, terms, etc.)
|
||||||
|
|
||||||
POSITIONING:
|
POSITIONING AND SIZING:
|
||||||
- xPct and yPct are percentages from the TOP-LEFT of that specific page image
|
- xPct and yPct are percentages from the TOP-LEFT of that specific page image
|
||||||
- Place the field AT the blank line, not above or below it
|
- Place the field AT the blank underline — align it to sit on top of the line
|
||||||
- For a line like "Buyer's Signature __________ Date _______", place a client-signature at the signature blank's x/y and a date field at the date blank's x/y — they are separate fields on the same line
|
- For a row like "Signature __________ Date _______", create TWO separate fields: one for the signature blank and one for the date blank, each at their own x position
|
||||||
|
- widthPct: match the visual width of the underline — short blanks get small widths, long signature lines get wider
|
||||||
|
- heightPct: keep fields thin — signature/text ~2.5%, initials/date ~2%
|
||||||
- Do NOT place checkbox fields
|
- Do NOT place checkbox fields
|
||||||
|
|
||||||
PREFILL:
|
PREFILL:
|
||||||
@@ -183,10 +188,21 @@ PREFILL:
|
|||||||
const pageWidth = pageInfo?.width ?? 612; // fallback: US Letter
|
const pageWidth = pageInfo?.width ?? 612; // fallback: US Letter
|
||||||
const pageHeight = pageInfo?.height ?? 792;
|
const pageHeight = pageInfo?.height ?? 792;
|
||||||
|
|
||||||
const { x, y } = aiCoordsToPagePdfSpace(aiField, pageWidth, pageHeight);
|
const { x, y, width: rawW, height: rawH } = aiCoordsToPagePdfSpace(aiField, pageWidth, pageHeight);
|
||||||
|
|
||||||
const width = 144; // pts: 2 inches
|
// Use AI-estimated size, clamped to type-appropriate min/max
|
||||||
const height = 36; // pts: 0.5 inches
|
const sizeLimits: Record<SignatureFieldType, { minW: number; maxW: number; minH: number; maxH: number }> = {
|
||||||
|
'client-signature': { minW: 100, maxW: 250, minH: 20, maxH: 40 },
|
||||||
|
'agent-signature': { minW: 100, maxW: 250, minH: 20, maxH: 40 },
|
||||||
|
'initials': { minW: 36, maxW: 80, minH: 16, maxH: 28 },
|
||||||
|
'agent-initials': { minW: 36, maxW: 80, minH: 16, maxH: 28 },
|
||||||
|
'date': { minW: 60, maxW: 130, minH: 14, maxH: 24 },
|
||||||
|
'text': { minW: 60, maxW: 280, minH: 14, maxH: 24 },
|
||||||
|
'checkbox': { minW: 16, maxW: 24, minH: 16, maxH: 24 },
|
||||||
|
};
|
||||||
|
const lim = sizeLimits[aiField.fieldType] ?? sizeLimits['text'];
|
||||||
|
const width = Math.max(lim.minW, Math.min(rawW, lim.maxW));
|
||||||
|
const height = Math.max(lim.minH, Math.min(rawH, lim.maxH));
|
||||||
|
|
||||||
const id = crypto.randomUUID();
|
const id = crypto.randomUUID();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user