wip: skyslope scraper — fix name extraction via body text parsing, preview+download flow ready

This commit is contained in:
Chandler Copeland
2026-03-19 23:06:17 -06:00
parent 1983f2c8cd
commit ac5b98fe33
21 changed files with 497 additions and 156 deletions

View File

@@ -0,0 +1,47 @@
import { chromium } from 'playwright';
import { config } from 'dotenv';
import path from 'path';
config({ path: path.resolve(process.cwd(), '.env.local') });
(async () => {
const savedState = await import('node:fs/promises').then(fs =>
fs.readFile(path.resolve(process.cwd(), 'scripts/.ure-session.json'), 'utf8').then(JSON.parse).catch(() => null)
);
const browser = await chromium.launch({ headless: false });
const context = await browser.newContext({ storageState: savedState ?? undefined, acceptDownloads: true });
const page = await context.newPage();
await page.goto('https://www.utahrealestate.com/sso/connect/client/skyslope', { waitUntil: 'domcontentloaded' });
const newPage = await context.waitForEvent('page', { timeout: 10_000 }).catch(() => null);
const activePage = newPage ?? page;
await activePage.waitForLoadState('domcontentloaded');
await activePage.waitForTimeout(3000);
await activePage.goto('https://forms.skyslope.com/browse-libraries', { waitUntil: 'domcontentloaded' });
await activePage.waitForTimeout(5000);
// Count all buttons and get text of first 5
const result = await activePage.evaluate(() => {
const allBtns = Array.from(document.querySelectorAll('button'));
const btnTexts = allBtns.slice(0, 10).map(b => `"${b.textContent?.trim()}"`);
// Also get first row-like elements
const rows = Array.from(document.querySelectorAll('tr, [class*="row"], [class*="item"]'))
.slice(0, 3)
.map(el => `<${el.tagName} class="${el.className.toString().slice(0,50)}"> "${el.textContent?.replace(/\s+/g,' ').trim().slice(0,60)}"`);
return { totalBtns: allBtns.length, btnTexts, rows };
});
console.log(JSON.stringify(result, null, 2));
// Try clicking first form name and screenshot
const firstFormName = activePage.locator('text="Acknowledgement of Third Party Approval Addendum to REPC - UAR"').first();
if (await firstFormName.isVisible({ timeout: 3000 }).catch(() => false)) {
console.log('Clicking first form name...');
await firstFormName.click();
await activePage.waitForTimeout(3000);
await activePage.screenshot({ path: 'scripts/debug-after-click.png' });
console.log('Screenshot saved: debug-after-click.png');
// Get all buttons visible now
const btns = await activePage.locator('button').allTextContents();
console.log('Buttons after click:', btns.slice(0, 15));
}
await browser.close();
})();