From 4c877e346fc30b65fba44e917411cf7ef710b393 Mon Sep 17 00:00:00 2001 From: puckoprutt Date: Tue, 18 Feb 2025 18:33:35 +0100 Subject: [PATCH] added a max spider --- dagens_lunch/items.py | 10 ++++++++++ dagens_lunch/spiders/max.py | 24 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 dagens_lunch/spiders/max.py diff --git a/dagens_lunch/items.py b/dagens_lunch/items.py index 157ef62..089a9fc 100644 --- a/dagens_lunch/items.py +++ b/dagens_lunch/items.py @@ -42,3 +42,13 @@ class DagensLunchMalmens(scrapy.Item): friday = scrapy.Field() scraped_by = scrapy.Field() scraped_at = scrapy.Field() + +class DagensLunchMax(scrapy.Item): + place = scrapy.Field() + monday = scrapy.Field() + tuesday = scrapy.Field() + wednesday = scrapy.Field() + thursday = scrapy.Field() + friday = scrapy.Field() + scraped_by = scrapy.Field() + scraped_at = scrapy.Field() diff --git a/dagens_lunch/spiders/max.py b/dagens_lunch/spiders/max.py new file mode 100644 index 0000000..8382e1c --- /dev/null +++ b/dagens_lunch/spiders/max.py @@ -0,0 +1,24 @@ +from datetime import datetime +import scrapy +from ..items import DagensLunchMax + +class MaxSpider(scrapy.Spider): + name = "max" + allowed_domains = ["max.se"] + start_urls = ["https://max.se/maten/meny/maltider/dagens-lunch/"] + + def parse(self, response): + days = [] + lista = response.xpath("//div/div[2]/div[contains(@class, 'o-product-info')]/ul[contains(@class, 'o-product-info__variations')]") + for li in lista.xpath("./li/text()").getall(): + days.append(li.split(" ")[-1]) + return DagensLunchMax( + place="Max", + monday=days[0], + tuesday=days[1], + wednesday=days[2], + thursday=days[3], + friday=days[4], + scraped_by=self.__class__.__name__, + scraped_at=f"{datetime.now().isoformat()}" + )