Update scrape-within-domain-only.js

removed the next button handler as it is not needed
This commit is contained in:
Kиро.Kрика 2024-08-14 23:04:48 +03:00 committed by GitHub
parent aada7776f8
commit a062ce11ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,7 +3,6 @@ const cheerio = require('cheerio');
const fs = require('fs'); const fs = require('fs');
const path = require('path'); const path = require('path');
const crypto = require('crypto'); // For hashing filenames const crypto = require('crypto'); // For hashing filenames
const url = require('url');
const websiteUrl = 'https://www.tu-sofia.bg/'; const websiteUrl = 'https://www.tu-sofia.bg/';
const outputDir = './output'; const outputDir = './output';
@ -126,13 +125,7 @@ const ignoredDomainsRegex = /facebook\.com|linkedin\.com|youtube\.com|focus-news
} }
}); });
queue.push(...newLinks); queue.push(...newLinks);
// Click on the "Next" button if present
try {
await page.click('selector-for-next-button');
} catch (clickError) {
console.log(`Next button not found on ${currentPageUrl} or error clicking it: ${clickError.message}`);
}
} catch (error) { } catch (error) {
console.log(`Error loading or processing ${currentPageUrl}: ${error.message}`); console.log(`Error loading or processing ${currentPageUrl}: ${error.message}`);
continue; // Skip to the next URL continue; // Skip to the next URL