Merge pull request 'add_max' (#1) from add_max into main

Reviewed-on: #1
This commit is contained in:
puckoprutt 2025-02-18 17:50:55 +00:00
commit 75e4771ef3
5 changed files with 127 additions and 1 deletions

3
.gitignore vendored
View File

@ -1,3 +1,4 @@
.vscode
latest
dagens_lunch/spiders/__pycache__/*.pyc
dagens_lunch/__pycache__/*.pyc
dagens_lunch/__pycache__/*.pyc

View File

@ -42,3 +42,13 @@ class DagensLunchMalmens(scrapy.Item):
friday = scrapy.Field()
scraped_by = scrapy.Field()
scraped_at = scrapy.Field()
class DagensLunchMax(scrapy.Item):
place = scrapy.Field()
monday = scrapy.Field()
tuesday = scrapy.Field()
wednesday = scrapy.Field()
thursday = scrapy.Field()
friday = scrapy.Field()
scraped_by = scrapy.Field()
scraped_at = scrapy.Field()

View File

@ -96,3 +96,5 @@ FEEDS = {
}
}
FEED_EXPORT_ENCODING = "utf-8"
LOG_ENABLED = False

View File

@ -0,0 +1,24 @@
from datetime import datetime
import scrapy
from ..items import DagensLunchMax
class MaxSpider(scrapy.Spider):
name = "max"
allowed_domains = ["max.se"]
start_urls = ["https://max.se/maten/meny/maltider/dagens-lunch/"]
def parse(self, response):
days = []
lista = response.xpath("//div/div[2]/div[contains(@class, 'o-product-info')]/ul[contains(@class, 'o-product-info__variations')]")
for li in lista.xpath("./li/text()").getall():
days.append(li.split(" ")[-1])
return DagensLunchMax(
place="Max",
monday=days[0],
tuesday=days[1],
wednesday=days[2],
thursday=days[3],
friday=days[4],
scraped_by=self.__class__.__name__,
scraped_at=f"{datetime.now().isoformat()}"
)

89
run_spiders.py Executable file
View File

@ -0,0 +1,89 @@
import pathlib
import json
from scrapy import spiderloader
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from scrapy.signalmanager import dispatcher
from scrapy.utils.project import get_project_settings
dir = pathlib.Path(__file__).resolve().parent
dir = dir / "latest"
file = dir / "all.json"
valsaren_file = dir / "valsaren.json"
malmens_file = dir / "malmens.json"
heat_file = dir / "heat.json"
max_file = dir / "max.json"
def dagens_lunch_results():
results = []
settings = get_project_settings()
process = CrawlerProcess(settings)
spider_loader = spiderloader.SpiderLoader.from_settings(settings)
def crawler_results(signal, sender, item, response, spider):
results.append(item)
print(f"{spider.name} is done!")
if file.is_file() and file.exists():
print(f"removing {file.name}")
file.unlink()
if valsaren_file.is_file and valsaren_file.exists():
print(f"removing {valsaren_file.name}")
valsaren_file.unlink()
if malmens_file.is_file() and malmens_file.exists():
print(f"removing {malmens_file.name}")
malmens_file.unlink()
if heat_file.is_file and heat_file.exists():
print(f"removing {heat_file.name}")
heat_file.unlink()
if max_file.is_file() and max_file.exists():
print(f"removing {max_file.name}")
max_file.unlink()
dispatcher.connect(crawler_results, signal=signals.item_scraped)
for spider_name in spider_loader.list():
print(f"getting menu from {spider_name}")
process.crawl(spider_name)
process.start()
return results
if __name__ == "__main__":
_valsaren = None
_malmens = None
_heat = None
_max = None
res = dagens_lunch_results()
with file.open("a") as f:
f.write("[\n")
for idx, item in enumerate(res):
if idx > 0:
f.write(",")
f.write(json.dumps(dict(item), indent=4))
if item["place"].split(" ")[0].lower() == "valsaren":
_valsaren = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "malmens":
_malmens = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "heat":
_heat = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "max":
_max = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "unknown":
print(f"please learn me more about {item['place']}")
f.write("\n]")
print(f"created: {file}")
valsaren_file.write_text(_valsaren)
print(f"created: {valsaren_file}")
malmens_file.write_text(_malmens)
print(f"created: {malmens_file}")
heat_file.write_text(_heat)
print(f"created: {heat_file}")
max_file.write_text(_max)
print(f"created: {max_file}")