import pathlib import json from scrapy import spiderloader from scrapy import signals from scrapy.crawler import CrawlerProcess from scrapy.signalmanager import dispatcher from scrapy.utils.project import get_project_settings dir = pathlib.Path(__file__).resolve().parent dir = dir / "latest" file = dir / "all.json" valsaren_file = dir / "valsaren.json" malmens_file = dir / "malmens.json" heat_file = dir / "heat.json" max_file = dir / "max.json" varda_file = dir / "varda.json" def dagens_lunch_results(): results = [] settings = get_project_settings() process = CrawlerProcess(settings) spider_loader = spiderloader.SpiderLoader.from_settings(settings) def crawler_results(signal, sender, item, response, spider): results.append(item) print(f"{spider.name} is done!") if file.is_file() and file.exists(): print(f"removing {file.name}") file.unlink() if valsaren_file.is_file and valsaren_file.exists(): print(f"removing {valsaren_file.name}") valsaren_file.unlink() if malmens_file.is_file() and malmens_file.exists(): print(f"removing {malmens_file.name}") malmens_file.unlink() if heat_file.is_file and heat_file.exists(): print(f"removing {heat_file.name}") heat_file.unlink() if max_file.is_file() and max_file.exists(): print(f"removing {max_file.name}") max_file.unlink() if varda_file.is_file() and varda_file.exists(): print(f"removing {varda_file.name}") varda_file.unlink() dispatcher.connect(crawler_results, signal=signals.item_scraped) for spider_name in spider_loader.list(): print(f"getting menu from {spider_name}") process.crawl(spider_name) process.start() return results if __name__ == "__main__": _valsaren = None _malmens = None _varda = None _heat = None _max = None res = dagens_lunch_results() dir.mkdir(parents=True, exist_ok=True) with file.open("a") as f: f.write("[\n") for idx, item in enumerate(res): if idx > 0: f.write(",") f.write(json.dumps(dict(item), indent=4)) if item["place"].split(" ")[0].lower() == "valsaren": _valsaren = json.dumps(dict(item)) elif item["place"].split(" ")[0].lower() == "malmens": _malmens = json.dumps(dict(item)) elif item["place"].split(" ")[0].lower() == "heat": _heat = json.dumps(dict(item)) elif item["place"].split(" ")[0].lower() == "max": _max = json.dumps(dict(item)) elif item["place"].split(" ")[0].lower() == "varda": _varda = json.dumps(dict(item)) elif item["place"].split(" ")[0].lower() == "unknown": print(f"please learn me more about {item['place']}") f.write("\n]") print(f"created: {file}") valsaren_file.write_text(_valsaren) print(f"created: {valsaren_file}") malmens_file.write_text(_malmens) print(f"created: {malmens_file}") varda_file.write_text(_varda) print(f"created: {varda_file}") heat_file.write_text(_heat) print(f"created: {heat_file}") max_file.write_text(_max) print(f"created: {max_file}")