86 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import scrapy
from io import BytesIO
from PyPDF2 import PdfReader
from ..items import DagensLunchVarda
class VardaSpider(scrapy.Spider):
name = "varda"
allowed_domains = ["restaurangvarda.se"]
start_urls = ["https://restaurangvarda.se"]
def extract_pdf(self, response):
dagens = response.meta.get("dagens")
reader = PdfReader(BytesIO(response.body))
all_text = reader.pages[0].extract_text().split(" ")
days = []
day_price = []
is_pasta = False
for text in all_text:
if text.lower().startswith("lunchmeny"):
dagens["about"] = text
elif (text.lower().startswith("måndag") or text.lower().startswith("tisdag") or text.lower().startswith("onsdag") or
text.lower().startswith("torsdag") or text.lower().startswith("fredag")):
s = text.split(':-')
d, p = self.get_days(s[0])
for i in range(0, d+1):
days.append(s[1])
day_price.append(p)
elif "fisken" in text.lower():
s = text.split(":-")
dagens["fish"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()})"
elif "gröna" in text.lower():
s = text.split(":-")
dagens["green"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()}kr)"
elif text.lower() == "pasta":
is_pasta = True
elif "pasta" in text.lower() or is_pasta:
is_pasta = False
pasta, salad = text.split("SALLAD")
sallad_price, salad = salad.split(":-")
dagens["salad"] = f"{salad.strip()} ({sallad_price.strip()}kr)"
dagens["pasta"] = [f"{x.strip().replace(":-", "kr")}" for x in list(filter(None, pasta.strip().split("*")))]
elif "burgare" in text.lower():
s = text.split(":-")
dagens["burger"] = [f"{x.strip()} ({s[0].split(" ")[-1].strip()}kr)" for x in list(filter(None, s[1].split("*")))]
elif "alltid" in text.lower():
s = text.split(":-")
dagens["always"] = f"{s[2].strip()} (hel: {s[1].split(" ")[-1]}kr / halv: {s[0].split(" ")[-1]}kr)"
elif "grillat" in text.lower():
s = text.split(":-")
dagens["bbq"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()}kr)"
dagens["monday"] = f"{days[0].strip()} ({day_price[0].strip()}kr)"
dagens["tuesday"] = f"{days[1].strip()} ({day_price[1].strip()}kr)"
dagens["wednesday"] = f"{days[2].strip()} ({day_price[2].strip()}kr)"
dagens["thursday"] = f"{days[3].strip()} ({day_price[3].strip()}kr)"
dagens["friday"] = f"{days[4].strip()} ({day_price[4].strip()}kr)"
return dagens
def get_days(self, string):
days = ["måndag", "tisdag", "onsdag", "torsdag", "fredag"]
if "" in string:
s = string.split(" ")
price = s[3].strip()
try:
first = days.index(s[0].strip().lower().decode())
except AttributeError:
first = days.index(s[0].strip().lower())
try:
second = days.index(s[2].strip().lower().decode())
except AttributeError:
second = days.index(s[2].strip().lower())
many_days = second - first
return (many_days, price)
return (0, string.split(" ")[-1])
def parse(self, response):
lunch_url = ""
for url in response.xpath("//a[contains(@class, 'elementor-sub-item')]/@href").getall():
if "Lunchmenyn" in url:
lunch_url = url
break
dagens = DagensLunchVarda()
dagens["place"] = "Varda"
yield scrapy.Request(lunch_url, callback=self.extract_pdf, meta={"dagens": dagens})
return dagens