Compare commits
No commits in common. "83239dd637237eb2fcd0ee0edb7aa2777f98c18f" and "f63779b0b8237d04ff64bcc81b476af0b1fff708" have entirely different histories.
83239dd637
...
f63779b0b8
@ -52,26 +52,3 @@ class DagensLunchMax(scrapy.Item):
|
|||||||
friday = scrapy.Field()
|
friday = scrapy.Field()
|
||||||
scraped_by = scrapy.Field()
|
scraped_by = scrapy.Field()
|
||||||
scraped_at = scrapy.Field()
|
scraped_at = scrapy.Field()
|
||||||
|
|
||||||
class DagensLunchVarda(scrapy.Item):
|
|
||||||
place = scrapy.Field()
|
|
||||||
about = scrapy.Field()
|
|
||||||
monday = scrapy.Field()
|
|
||||||
monday_price = scrapy.Field()
|
|
||||||
tuesday = scrapy.Field()
|
|
||||||
tuesday_price = scrapy.Field()
|
|
||||||
wednesday = scrapy.Field()
|
|
||||||
wednesday_price = scrapy.Field()
|
|
||||||
thursday = scrapy.Field()
|
|
||||||
thursday_price = scrapy.Field()
|
|
||||||
friday = scrapy.Field()
|
|
||||||
friday_price = scrapy.Field()
|
|
||||||
always = scrapy.Field()
|
|
||||||
bbq = scrapy.Field()
|
|
||||||
burger = scrapy.Field()
|
|
||||||
pasta = scrapy.Field()
|
|
||||||
salad = scrapy.Field()
|
|
||||||
green = scrapy.Field()
|
|
||||||
fish = scrapy.Field()
|
|
||||||
scraped_by = scrapy.Field()
|
|
||||||
scraped_at = scrapy.Field()
|
|
||||||
@ -1,87 +0,0 @@
|
|||||||
import scrapy
|
|
||||||
from io import BytesIO
|
|
||||||
from PyPDF2 import PdfReader
|
|
||||||
from ..items import DagensLunchVarda
|
|
||||||
|
|
||||||
class VardaSpider(scrapy.Spider):
|
|
||||||
name = "varda"
|
|
||||||
allowed_domains = ["restaurangvarda.se"]
|
|
||||||
start_urls = ["https://restaurangvarda.se"]
|
|
||||||
|
|
||||||
def extract_pdf(self, response):
|
|
||||||
dagens = response.meta.get("dagens")
|
|
||||||
reader = PdfReader(BytesIO(response.body))
|
|
||||||
all_text = reader.pages[0].extract_text().split(" ")
|
|
||||||
days = []
|
|
||||||
day_price = []
|
|
||||||
is_pasta = False
|
|
||||||
for text in all_text:
|
|
||||||
if text.lower().startswith("lunchmeny"):
|
|
||||||
dagens["about"] = text
|
|
||||||
elif (text.lower().startswith("måndag") or text.lower().startswith("tisdag") or text.lower().startswith("onsdag") or
|
|
||||||
text.lower().startswith("torsdag") or text.lower().startswith("fredag")):
|
|
||||||
s = text.split(':-')
|
|
||||||
d, p = self.get_days(s[0])
|
|
||||||
print(d)
|
|
||||||
for i in range(0, d+1):
|
|
||||||
days.append(s[1])
|
|
||||||
day_price.append(p)
|
|
||||||
elif "fisken" in text.lower():
|
|
||||||
s = text.split(":-")
|
|
||||||
dagens["fish"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()})"
|
|
||||||
elif "gröna" in text.lower():
|
|
||||||
s = text.split(":-")
|
|
||||||
dagens["green"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()}kr)"
|
|
||||||
elif text.lower() == "pasta":
|
|
||||||
is_pasta = True
|
|
||||||
elif "pasta" in text.lower() or is_pasta:
|
|
||||||
is_pasta = False
|
|
||||||
pasta, salad = text.split("SALLAD")
|
|
||||||
sallad_price, salad = salad.split(":-")
|
|
||||||
dagens["salad"] = f"{salad.strip()} ({sallad_price.strip()}kr)"
|
|
||||||
dagens["pasta"] = [f"{x.strip().replace(":-", "kr")}" for x in list(filter(None, pasta.strip().split("*")))]
|
|
||||||
elif "burgare" in text.lower():
|
|
||||||
s = text.split(":-")
|
|
||||||
dagens["burger"] = [f"{x.strip()} ({s[0].split(" ")[-1].strip()}kr)" for x in list(filter(None, s[1].split("*")))]
|
|
||||||
elif "alltid" in text.lower():
|
|
||||||
s = text.split(":-")
|
|
||||||
dagens["always"] = f"{s[2].strip()} (hel: {s[1].split(" ")[-1]}kr / halv: {s[0].split(" ")[-1]}kr)"
|
|
||||||
elif "grillat" in text.lower():
|
|
||||||
s = text.split(":-")
|
|
||||||
dagens["bbq"] = f"{s[1].strip()} ({s[0].split(" ")[-1].strip()}kr)"
|
|
||||||
dagens["monday"] = f"{days[0].strip()} ({day_price[0].strip()}kr)"
|
|
||||||
dagens["tuesday"] = f"{days[1].strip()} ({day_price[1].strip()}kr)"
|
|
||||||
dagens["wednesday"] = f"{days[2].strip()} ({day_price[2].strip()}kr)"
|
|
||||||
dagens["thursday"] = f"{days[3].strip()} ({day_price[3].strip()}kr)"
|
|
||||||
dagens["friday"] = f"{days[4].strip()} ({day_price[4].strip()}kr)"
|
|
||||||
return dagens
|
|
||||||
|
|
||||||
def get_days(self, string):
|
|
||||||
days = ["måndag", "tisdag", "onsdag", "torsdag", "fredag"]
|
|
||||||
if "–" in string:
|
|
||||||
s = string.split(" ")
|
|
||||||
price = s[3].strip()
|
|
||||||
try:
|
|
||||||
first = days.index(s[0].strip().lower().decode())
|
|
||||||
except AttributeError:
|
|
||||||
first = days.index(s[0].strip().lower())
|
|
||||||
|
|
||||||
try:
|
|
||||||
second = days.index(s[2].strip().lower().decode())
|
|
||||||
except AttributeError:
|
|
||||||
second = days.index(s[2].strip().lower())
|
|
||||||
|
|
||||||
many_days = second - first
|
|
||||||
return (many_days, price)
|
|
||||||
return (0, string.split(" ")[-1])
|
|
||||||
|
|
||||||
def parse(self, response):
|
|
||||||
lunch_url = ""
|
|
||||||
for url in response.xpath("//a[contains(@class, 'elementor-sub-item')]/@href").getall():
|
|
||||||
if "Lunchmenyn" in url:
|
|
||||||
lunch_url = url
|
|
||||||
break
|
|
||||||
dagens = DagensLunchVarda()
|
|
||||||
dagens["place"] = "Varda"
|
|
||||||
yield scrapy.Request(lunch_url, callback=self.extract_pdf, meta={"dagens": dagens})
|
|
||||||
return dagens
|
|
||||||
@ -14,7 +14,6 @@ valsaren_file = dir / "valsaren.json"
|
|||||||
malmens_file = dir / "malmens.json"
|
malmens_file = dir / "malmens.json"
|
||||||
heat_file = dir / "heat.json"
|
heat_file = dir / "heat.json"
|
||||||
max_file = dir / "max.json"
|
max_file = dir / "max.json"
|
||||||
varda_file = dir / "varda.json"
|
|
||||||
|
|
||||||
def dagens_lunch_results():
|
def dagens_lunch_results():
|
||||||
results = []
|
results = []
|
||||||
@ -42,9 +41,6 @@ def dagens_lunch_results():
|
|||||||
if max_file.is_file() and max_file.exists():
|
if max_file.is_file() and max_file.exists():
|
||||||
print(f"removing {max_file.name}")
|
print(f"removing {max_file.name}")
|
||||||
max_file.unlink()
|
max_file.unlink()
|
||||||
if varda_file.is_file() and varda_file.exists():
|
|
||||||
print(f"removing {varda_file.name}")
|
|
||||||
varda_file.unlink()
|
|
||||||
|
|
||||||
dispatcher.connect(crawler_results, signal=signals.item_scraped)
|
dispatcher.connect(crawler_results, signal=signals.item_scraped)
|
||||||
|
|
||||||
@ -58,7 +54,6 @@ def dagens_lunch_results():
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
_valsaren = None
|
_valsaren = None
|
||||||
_malmens = None
|
_malmens = None
|
||||||
_varda = None
|
|
||||||
_heat = None
|
_heat = None
|
||||||
_max = None
|
_max = None
|
||||||
res = dagens_lunch_results()
|
res = dagens_lunch_results()
|
||||||
@ -77,8 +72,6 @@ if __name__ == "__main__":
|
|||||||
_heat = json.dumps(dict(item))
|
_heat = json.dumps(dict(item))
|
||||||
elif item["place"].split(" ")[0].lower() == "max":
|
elif item["place"].split(" ")[0].lower() == "max":
|
||||||
_max = json.dumps(dict(item))
|
_max = json.dumps(dict(item))
|
||||||
elif item["place"].split(" ")[0].lower() == "varda":
|
|
||||||
_varda = json.dumps(dict(item))
|
|
||||||
elif item["place"].split(" ")[0].lower() == "unknown":
|
elif item["place"].split(" ")[0].lower() == "unknown":
|
||||||
print(f"please learn me more about {item['place']}")
|
print(f"please learn me more about {item['place']}")
|
||||||
f.write("\n]")
|
f.write("\n]")
|
||||||
@ -90,9 +83,6 @@ if __name__ == "__main__":
|
|||||||
malmens_file.write_text(_malmens)
|
malmens_file.write_text(_malmens)
|
||||||
print(f"created: {malmens_file}")
|
print(f"created: {malmens_file}")
|
||||||
|
|
||||||
varda_file.write_text(_varda)
|
|
||||||
print(f"created: {varda_file}")
|
|
||||||
|
|
||||||
heat_file.write_text(_heat)
|
heat_file.write_text(_heat)
|
||||||
print(f"created: {heat_file}")
|
print(f"created: {heat_file}")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user