run all spiders in diffrent threads
This commit is contained in:
parent
4c877e346f
commit
4d4e03d933
@ -89,4 +89,12 @@ ROBOTSTXT_OBEY = False
|
||||
|
||||
# Set settings whose default value is deprecated to a future-proof value
|
||||
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||
FEEDS = {
|
||||
"items.json": {
|
||||
"format": "json",
|
||||
"encoding": "utf8"
|
||||
}
|
||||
}
|
||||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
|
||||
LOG_ENABLED = False
|
||||
|
||||
89
run_spiders.py
Executable file
89
run_spiders.py
Executable file
@ -0,0 +1,89 @@
|
||||
import pathlib
|
||||
import json
|
||||
|
||||
from scrapy import spiderloader
|
||||
from scrapy import signals
|
||||
from scrapy.crawler import CrawlerProcess
|
||||
from scrapy.signalmanager import dispatcher
|
||||
from scrapy.utils.project import get_project_settings
|
||||
|
||||
dir = pathlib.Path(__file__).resolve().parent
|
||||
dir = dir / "latest"
|
||||
file = dir / "all.json"
|
||||
valsaren_file = dir / "valsaren.json"
|
||||
malmens_file = dir / "malmens.json"
|
||||
heat_file = dir / "heat.json"
|
||||
max_file = dir / "max.json"
|
||||
|
||||
def dagens_lunch_results():
|
||||
results = []
|
||||
settings = get_project_settings()
|
||||
process = CrawlerProcess(settings)
|
||||
spider_loader = spiderloader.SpiderLoader.from_settings(settings)
|
||||
|
||||
def crawler_results(signal, sender, item, response, spider):
|
||||
results.append(item)
|
||||
print(f"{spider.name} is done!")
|
||||
|
||||
|
||||
if file.is_file() and file.exists():
|
||||
print(f"removing {file.name}")
|
||||
file.unlink()
|
||||
if valsaren_file.is_file and valsaren_file.exists():
|
||||
print(f"removing {valsaren_file.name}")
|
||||
valsaren_file.unlink()
|
||||
if malmens_file.is_file() and malmens_file.exists():
|
||||
print(f"removing {malmens_file.name}")
|
||||
malmens_file.unlink()
|
||||
if heat_file.is_file and heat_file.exists():
|
||||
print(f"removing {heat_file.name}")
|
||||
heat_file.unlink()
|
||||
if max_file.is_file() and max_file.exists():
|
||||
print(f"removing {max_file.name}")
|
||||
max_file.unlink()
|
||||
|
||||
dispatcher.connect(crawler_results, signal=signals.item_scraped)
|
||||
|
||||
for spider_name in spider_loader.list():
|
||||
print(f"getting menu from {spider_name}")
|
||||
process.crawl(spider_name)
|
||||
|
||||
process.start()
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
_valsaren = None
|
||||
_malmens = None
|
||||
_heat = None
|
||||
_max = None
|
||||
res = dagens_lunch_results()
|
||||
with file.open("a") as f:
|
||||
f.write("[\n")
|
||||
for idx, item in enumerate(res):
|
||||
if idx > 0:
|
||||
f.write(",")
|
||||
f.write(json.dumps(dict(item), indent=4))
|
||||
if item["place"].split(" ")[0].lower() == "valsaren":
|
||||
_valsaren = json.dumps(dict(item))
|
||||
elif item["place"].split(" ")[0].lower() == "malmens":
|
||||
_malmens = json.dumps(dict(item))
|
||||
elif item["place"].split(" ")[0].lower() == "heat":
|
||||
_heat = json.dumps(dict(item))
|
||||
elif item["place"].split(" ")[0].lower() == "max":
|
||||
_max = json.dumps(dict(item))
|
||||
elif item["place"].split(" ")[0].lower() == "unknown":
|
||||
print(f"please learn me more about {item['place']}")
|
||||
f.write("\n]")
|
||||
print(f"created: {file}")
|
||||
|
||||
valsaren_file.write_text(_valsaren)
|
||||
print(f"created: {valsaren_file}")
|
||||
|
||||
malmens_file.write_text(_malmens)
|
||||
print(f"created: {malmens_file}")
|
||||
|
||||
heat_file.write_text(_heat)
|
||||
print(f"created: {heat_file}")
|
||||
|
||||
max_file.write_text(_max)
|
||||
print(f"created: {max_file}")
|
||||
Loading…
x
Reference in New Issue
Block a user