Update scrape-within-domain-only.js
removed the next button handler as it is not needed
This commit is contained in:
parent
aada7776f8
commit
a062ce11ab
@ -3,7 +3,6 @@ const cheerio = require('cheerio');
|
|||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const crypto = require('crypto'); // For hashing filenames
|
const crypto = require('crypto'); // For hashing filenames
|
||||||
const url = require('url');
|
|
||||||
|
|
||||||
const websiteUrl = 'https://www.tu-sofia.bg/';
|
const websiteUrl = 'https://www.tu-sofia.bg/';
|
||||||
const outputDir = './output';
|
const outputDir = './output';
|
||||||
@ -127,12 +126,6 @@ const ignoredDomainsRegex = /facebook\.com|linkedin\.com|youtube\.com|focus-news
|
|||||||
});
|
});
|
||||||
queue.push(...newLinks);
|
queue.push(...newLinks);
|
||||||
|
|
||||||
// Click on the "Next" button if present
|
|
||||||
try {
|
|
||||||
await page.click('selector-for-next-button');
|
|
||||||
} catch (clickError) {
|
|
||||||
console.log(`Next button not found on ${currentPageUrl} or error clicking it: ${clickError.message}`);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(`Error loading or processing ${currentPageUrl}: ${error.message}`);
|
console.log(`Error loading or processing ${currentPageUrl}: ${error.message}`);
|
||||||
continue; // Skip to the next URL
|
continue; // Skip to the next URL
|
||||||
|
Loading…
Reference in New Issue
Block a user