diff --git a/dagens_lunch/settings.py b/dagens_lunch/settings.py index 53d8c9b..791a524 100644 --- a/dagens_lunch/settings.py +++ b/dagens_lunch/settings.py @@ -89,4 +89,12 @@ ROBOTSTXT_OBEY = False # Set settings whose default value is deprecated to a future-proof value TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" +FEEDS = { + "items.json": { + "format": "json", + "encoding": "utf8" + } +} FEED_EXPORT_ENCODING = "utf-8" + +LOG_ENABLED = False diff --git a/run_spiders.py b/run_spiders.py new file mode 100755 index 0000000..67a6f48 --- /dev/null +++ b/run_spiders.py @@ -0,0 +1,89 @@ +import pathlib +import json + +from scrapy import spiderloader +from scrapy import signals +from scrapy.crawler import CrawlerProcess +from scrapy.signalmanager import dispatcher +from scrapy.utils.project import get_project_settings + +dir = pathlib.Path(__file__).resolve().parent +dir = dir / "latest" +file = dir / "all.json" +valsaren_file = dir / "valsaren.json" +malmens_file = dir / "malmens.json" +heat_file = dir / "heat.json" +max_file = dir / "max.json" + +def dagens_lunch_results(): + results = [] + settings = get_project_settings() + process = CrawlerProcess(settings) + spider_loader = spiderloader.SpiderLoader.from_settings(settings) + + def crawler_results(signal, sender, item, response, spider): + results.append(item) + print(f"{spider.name} is done!") + + + if file.is_file() and file.exists(): + print(f"removing {file.name}") + file.unlink() + if valsaren_file.is_file and valsaren_file.exists(): + print(f"removing {valsaren_file.name}") + valsaren_file.unlink() + if malmens_file.is_file() and malmens_file.exists(): + print(f"removing {malmens_file.name}") + malmens_file.unlink() + if heat_file.is_file and heat_file.exists(): + print(f"removing {heat_file.name}") + heat_file.unlink() + if max_file.is_file() and max_file.exists(): + print(f"removing {max_file.name}") + max_file.unlink() + + dispatcher.connect(crawler_results, signal=signals.item_scraped) + + for spider_name in spider_loader.list(): + print(f"getting menu from {spider_name}") + process.crawl(spider_name) + + process.start() + return results + +if __name__ == "__main__": + _valsaren = None + _malmens = None + _heat = None + _max = None + res = dagens_lunch_results() + with file.open("a") as f: + f.write("[\n") + for idx, item in enumerate(res): + if idx > 0: + f.write(",") + f.write(json.dumps(dict(item), indent=4)) + if item["place"].split(" ")[0].lower() == "valsaren": + _valsaren = json.dumps(dict(item)) + elif item["place"].split(" ")[0].lower() == "malmens": + _malmens = json.dumps(dict(item)) + elif item["place"].split(" ")[0].lower() == "heat": + _heat = json.dumps(dict(item)) + elif item["place"].split(" ")[0].lower() == "max": + _max = json.dumps(dict(item)) + elif item["place"].split(" ")[0].lower() == "unknown": + print(f"please learn me more about {item['place']}") + f.write("\n]") + print(f"created: {file}") + + valsaren_file.write_text(_valsaren) + print(f"created: {valsaren_file}") + + malmens_file.write_text(_malmens) + print(f"created: {malmens_file}") + + heat_file.write_text(_heat) + print(f"created: {heat_file}") + + max_file.write_text(_max) + print(f"created: {max_file}")