From a062ce11abf4fab3d3f43632ef91138d0b4a52ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=D0=B8=D1=80=D0=BE=2EK=D1=80=D0=B8=D0=BA=D0=B0?= <95271587+Goshko812@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:04:48 +0300 Subject: [PATCH] Update scrape-within-domain-only.js removed the next button handler as it is not needed --- scrape-within-domain-only.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/scrape-within-domain-only.js b/scrape-within-domain-only.js index 6426c8b..5bb28dd 100644 --- a/scrape-within-domain-only.js +++ b/scrape-within-domain-only.js @@ -3,7 +3,6 @@ const cheerio = require('cheerio'); const fs = require('fs'); const path = require('path'); const crypto = require('crypto'); // For hashing filenames -const url = require('url'); const websiteUrl = 'https://www.tu-sofia.bg/'; const outputDir = './output'; @@ -126,13 +125,7 @@ const ignoredDomainsRegex = /facebook\.com|linkedin\.com|youtube\.com|focus-news } }); queue.push(...newLinks); - - // Click on the "Next" button if present - try { - await page.click('selector-for-next-button'); - } catch (clickError) { - console.log(`Next button not found on ${currentPageUrl} or error clicking it: ${clickError.message}`); - } + } catch (error) { console.log(`Error loading or processing ${currentPageUrl}: ${error.message}`); continue; // Skip to the next URL