Compare commits
5 Commits
a53de15fbb
...
75e4771ef3
| Author | SHA1 | Date | |
|---|---|---|---|
| 75e4771ef3 | |||
| cbb2936d26 | |||
| d9e7d6dc01 | |||
| 4d4e03d933 | |||
| 4c877e346f |
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
.vscode
|
.vscode
|
||||||
|
latest
|
||||||
dagens_lunch/spiders/__pycache__/*.pyc
|
dagens_lunch/spiders/__pycache__/*.pyc
|
||||||
dagens_lunch/__pycache__/*.pyc
|
dagens_lunch/__pycache__/*.pyc
|
||||||
|
|||||||
@ -42,3 +42,13 @@ class DagensLunchMalmens(scrapy.Item):
|
|||||||
friday = scrapy.Field()
|
friday = scrapy.Field()
|
||||||
scraped_by = scrapy.Field()
|
scraped_by = scrapy.Field()
|
||||||
scraped_at = scrapy.Field()
|
scraped_at = scrapy.Field()
|
||||||
|
|
||||||
|
class DagensLunchMax(scrapy.Item):
|
||||||
|
place = scrapy.Field()
|
||||||
|
monday = scrapy.Field()
|
||||||
|
tuesday = scrapy.Field()
|
||||||
|
wednesday = scrapy.Field()
|
||||||
|
thursday = scrapy.Field()
|
||||||
|
friday = scrapy.Field()
|
||||||
|
scraped_by = scrapy.Field()
|
||||||
|
scraped_at = scrapy.Field()
|
||||||
|
|||||||
@ -96,3 +96,5 @@ FEEDS = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
FEED_EXPORT_ENCODING = "utf-8"
|
FEED_EXPORT_ENCODING = "utf-8"
|
||||||
|
|
||||||
|
LOG_ENABLED = False
|
||||||
|
|||||||
24
dagens_lunch/spiders/max.py
Normal file
24
dagens_lunch/spiders/max.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
import scrapy
|
||||||
|
from ..items import DagensLunchMax
|
||||||
|
|
||||||
|
class MaxSpider(scrapy.Spider):
|
||||||
|
name = "max"
|
||||||
|
allowed_domains = ["max.se"]
|
||||||
|
start_urls = ["https://max.se/maten/meny/maltider/dagens-lunch/"]
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
days = []
|
||||||
|
lista = response.xpath("//div/div[2]/div[contains(@class, 'o-product-info')]/ul[contains(@class, 'o-product-info__variations')]")
|
||||||
|
for li in lista.xpath("./li/text()").getall():
|
||||||
|
days.append(li.split(" ")[-1])
|
||||||
|
return DagensLunchMax(
|
||||||
|
place="Max",
|
||||||
|
monday=days[0],
|
||||||
|
tuesday=days[1],
|
||||||
|
wednesday=days[2],
|
||||||
|
thursday=days[3],
|
||||||
|
friday=days[4],
|
||||||
|
scraped_by=self.__class__.__name__,
|
||||||
|
scraped_at=f"{datetime.now().isoformat()}"
|
||||||
|
)
|
||||||
89
run_spiders.py
Executable file
89
run_spiders.py
Executable file
@ -0,0 +1,89 @@
|
|||||||
|
import pathlib
|
||||||
|
import json
|
||||||
|
|
||||||
|
from scrapy import spiderloader
|
||||||
|
from scrapy import signals
|
||||||
|
from scrapy.crawler import CrawlerProcess
|
||||||
|
from scrapy.signalmanager import dispatcher
|
||||||
|
from scrapy.utils.project import get_project_settings
|
||||||
|
|
||||||
|
dir = pathlib.Path(__file__).resolve().parent
|
||||||
|
dir = dir / "latest"
|
||||||
|
file = dir / "all.json"
|
||||||
|
valsaren_file = dir / "valsaren.json"
|
||||||
|
malmens_file = dir / "malmens.json"
|
||||||
|
heat_file = dir / "heat.json"
|
||||||
|
max_file = dir / "max.json"
|
||||||
|
|
||||||
|
def dagens_lunch_results():
|
||||||
|
results = []
|
||||||
|
settings = get_project_settings()
|
||||||
|
process = CrawlerProcess(settings)
|
||||||
|
spider_loader = spiderloader.SpiderLoader.from_settings(settings)
|
||||||
|
|
||||||
|
def crawler_results(signal, sender, item, response, spider):
|
||||||
|
results.append(item)
|
||||||
|
print(f"{spider.name} is done!")
|
||||||
|
|
||||||
|
|
||||||
|
if file.is_file() and file.exists():
|
||||||
|
print(f"removing {file.name}")
|
||||||
|
file.unlink()
|
||||||
|
if valsaren_file.is_file and valsaren_file.exists():
|
||||||
|
print(f"removing {valsaren_file.name}")
|
||||||
|
valsaren_file.unlink()
|
||||||
|
if malmens_file.is_file() and malmens_file.exists():
|
||||||
|
print(f"removing {malmens_file.name}")
|
||||||
|
malmens_file.unlink()
|
||||||
|
if heat_file.is_file and heat_file.exists():
|
||||||
|
print(f"removing {heat_file.name}")
|
||||||
|
heat_file.unlink()
|
||||||
|
if max_file.is_file() and max_file.exists():
|
||||||
|
print(f"removing {max_file.name}")
|
||||||
|
max_file.unlink()
|
||||||
|
|
||||||
|
dispatcher.connect(crawler_results, signal=signals.item_scraped)
|
||||||
|
|
||||||
|
for spider_name in spider_loader.list():
|
||||||
|
print(f"getting menu from {spider_name}")
|
||||||
|
process.crawl(spider_name)
|
||||||
|
|
||||||
|
process.start()
|
||||||
|
return results
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
_valsaren = None
|
||||||
|
_malmens = None
|
||||||
|
_heat = None
|
||||||
|
_max = None
|
||||||
|
res = dagens_lunch_results()
|
||||||
|
with file.open("a") as f:
|
||||||
|
f.write("[\n")
|
||||||
|
for idx, item in enumerate(res):
|
||||||
|
if idx > 0:
|
||||||
|
f.write(",")
|
||||||
|
f.write(json.dumps(dict(item), indent=4))
|
||||||
|
if item["place"].split(" ")[0].lower() == "valsaren":
|
||||||
|
_valsaren = json.dumps(dict(item))
|
||||||
|
elif item["place"].split(" ")[0].lower() == "malmens":
|
||||||
|
_malmens = json.dumps(dict(item))
|
||||||
|
elif item["place"].split(" ")[0].lower() == "heat":
|
||||||
|
_heat = json.dumps(dict(item))
|
||||||
|
elif item["place"].split(" ")[0].lower() == "max":
|
||||||
|
_max = json.dumps(dict(item))
|
||||||
|
elif item["place"].split(" ")[0].lower() == "unknown":
|
||||||
|
print(f"please learn me more about {item['place']}")
|
||||||
|
f.write("\n]")
|
||||||
|
print(f"created: {file}")
|
||||||
|
|
||||||
|
valsaren_file.write_text(_valsaren)
|
||||||
|
print(f"created: {valsaren_file}")
|
||||||
|
|
||||||
|
malmens_file.write_text(_malmens)
|
||||||
|
print(f"created: {malmens_file}")
|
||||||
|
|
||||||
|
heat_file.write_text(_heat)
|
||||||
|
print(f"created: {heat_file}")
|
||||||
|
|
||||||
|
max_file.write_text(_max)
|
||||||
|
print(f"created: {max_file}")
|
||||||
Loading…
x
Reference in New Issue
Block a user