86 lines
3.8 KiB
Python
86 lines
3.8 KiB
Python
import scrapy
|
||
from io import BytesIO
|
||
from PyPDF2 import PdfReader
|
||
from ..items import DagensLunchVarda
|
||
|
||
class VardaSpider(scrapy.Spider):
|
||
name = "varda"
|
||
allowed_domains = ["restaurangvarda.se"]
|
||
start_urls = ["https://restaurangvarda.se"]
|
||
|
||
def extract_pdf(self, response):
|
||
dagens = response.meta.get("dagens")
|
||
reader = PdfReader(BytesIO(response.body))
|
||
all_text = reader.pages[0].extract_text().split(" ")
|
||
days = []
|
||
day_price = []
|
||
is_pasta = False
|
||
for text in all_text:
|
||
if text.lower().startswith("lunchmeny"):
|
||
dagens["about"] = text
|
||
elif (text.lower().startswith("måndag") or text.lower().startswith("tisdag") or text.lower().startswith("onsdag") or
|
||
text.lower().startswith("torsdag") or text.lower().startswith("fredag")):
|
||
s = text.split(':-')
|
||
d, p = self.get_days(s[0])
|
||
for i in range(0, d+1):
|
||
days.append(s[1])
|
||
day_price.append(p)
|
||
elif "fisken" in text.lower():
|
||
s = text.split(":-")
|
||
dagens["fish"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()})"
|
||
elif "gröna" in text.lower():
|
||
s = text.split(":-")
|
||
dagens["green"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()}kr)"
|
||
elif text.lower() == "pasta":
|
||
is_pasta = True
|
||
elif "pasta" in text.lower() or is_pasta:
|
||
is_pasta = False
|
||
pasta, salad = text.split("SALLAD")
|
||
sallad_price, salad = salad.split(":-")
|
||
dagens["salad"] = f"{salad.strip()} ({sallad_price.strip()}kr)"
|
||
dagens["pasta"] = [f"{x.strip().replace(":-", "kr")}" for x in list(filter(None, pasta.strip().split("*")))]
|
||
elif "burgare" in text.lower():
|
||
s = text.split(":-")
|
||
dagens["burger"] = [f"{x.strip()} ({s[0].split(" ")[-1].strip()}kr)" for x in list(filter(None, s[1].split("*")))]
|
||
elif "alltid" in text.lower():
|
||
s = text.split(":-")
|
||
dagens["always"] = f"{s[2].strip()} (hel: {s[1].split(" ")[-1]}kr / halv: {s[0].split(" ")[-1]}kr)"
|
||
elif "grillat" in text.lower():
|
||
s = text.split(":-")
|
||
dagens["bbq"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()}kr)"
|
||
dagens["monday"] = f"{days[0].strip()} ({day_price[0].strip()}kr)"
|
||
dagens["tuesday"] = f"{days[1].strip()} ({day_price[1].strip()}kr)"
|
||
dagens["wednesday"] = f"{days[2].strip()} ({day_price[2].strip()}kr)"
|
||
dagens["thursday"] = f"{days[3].strip()} ({day_price[3].strip()}kr)"
|
||
dagens["friday"] = f"{days[4].strip()} ({day_price[4].strip()}kr)"
|
||
return dagens
|
||
|
||
def get_days(self, string):
|
||
days = ["måndag", "tisdag", "onsdag", "torsdag", "fredag"]
|
||
if "–" in string:
|
||
s = string.split(" ")
|
||
price = s[3].strip()
|
||
try:
|
||
first = days.index(s[0].strip().lower().decode())
|
||
except AttributeError:
|
||
first = days.index(s[0].strip().lower())
|
||
|
||
try:
|
||
second = days.index(s[2].strip().lower().decode())
|
||
except AttributeError:
|
||
second = days.index(s[2].strip().lower())
|
||
|
||
many_days = second - first
|
||
return (many_days, price)
|
||
return (0, string.split(" ")[-1])
|
||
|
||
def parse(self, response):
|
||
lunch_url = ""
|
||
for url in response.xpath("//a[contains(@class, 'elementor-sub-item')]/@href").getall():
|
||
if "Lunchmenyn" in url:
|
||
lunch_url = url
|
||
break
|
||
dagens = DagensLunchVarda()
|
||
dagens["place"] = "Varda"
|
||
yield scrapy.Request(lunch_url, callback=self.extract_pdf, meta={"dagens": dagens})
|
||
return dagens |