dagens_lunch/run_spiders.py

101 lines
3.2 KiB
Python
Executable File

import pathlib
import json
from scrapy import spiderloader
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from scrapy.signalmanager import dispatcher
from scrapy.utils.project import get_project_settings
dir = pathlib.Path(__file__).resolve().parent
dir = dir / "latest"
file = dir / "all.json"
valsaren_file = dir / "valsaren.json"
malmens_file = dir / "malmens.json"
heat_file = dir / "heat.json"
max_file = dir / "max.json"
varda_file = dir / "varda.json"
def dagens_lunch_results():
results = []
settings = get_project_settings()
process = CrawlerProcess(settings)
spider_loader = spiderloader.SpiderLoader.from_settings(settings)
def crawler_results(signal, sender, item, response, spider):
results.append(item)
print(f"{spider.name} is done!")
if file.is_file() and file.exists():
print(f"removing {file.name}")
file.unlink()
if valsaren_file.is_file and valsaren_file.exists():
print(f"removing {valsaren_file.name}")
valsaren_file.unlink()
if malmens_file.is_file() and malmens_file.exists():
print(f"removing {malmens_file.name}")
malmens_file.unlink()
if heat_file.is_file and heat_file.exists():
print(f"removing {heat_file.name}")
heat_file.unlink()
if max_file.is_file() and max_file.exists():
print(f"removing {max_file.name}")
max_file.unlink()
if varda_file.is_file() and varda_file.exists():
print(f"removing {varda_file.name}")
varda_file.unlink()
dispatcher.connect(crawler_results, signal=signals.item_scraped)
for spider_name in spider_loader.list():
print(f"getting menu from {spider_name}")
process.crawl(spider_name)
process.start()
return results
if __name__ == "__main__":
_valsaren = None
_malmens = None
_varda = None
_heat = None
_max = None
res = dagens_lunch_results()
dir.mkdir(parents=True, exist_ok=True)
with file.open("a") as f:
f.write("[\n")
for idx, item in enumerate(res):
if idx > 0:
f.write(",")
f.write(json.dumps(dict(item), indent=4))
if item["place"].split(" ")[0].lower() == "valsaren":
_valsaren = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "malmens":
_malmens = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "heat":
_heat = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "max":
_max = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "varda":
_varda = json.dumps(dict(item))
elif item["place"].split(" ")[0].lower() == "unknown":
print(f"please learn me more about {item['place']}")
f.write("\n]")
print(f"created: {file}")
valsaren_file.write_text(_valsaren)
print(f"created: {valsaren_file}")
malmens_file.write_text(_malmens)
print(f"created: {malmens_file}")
varda_file.write_text(_varda)
print(f"created: {varda_file}")
heat_file.write_text(_heat)
print(f"created: {heat_file}")
max_file.write_text(_max)
print(f"created: {max_file}")