add_max #1
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,3 +1,4 @@ | |||||||
| .vscode | .vscode | ||||||
|  | latest | ||||||
| dagens_lunch/spiders/__pycache__/*.pyc | dagens_lunch/spiders/__pycache__/*.pyc | ||||||
| dagens_lunch/__pycache__/*.pyc | dagens_lunch/__pycache__/*.pyc | ||||||
|  | |||||||
| @ -42,3 +42,13 @@ class DagensLunchMalmens(scrapy.Item): | |||||||
|     friday = scrapy.Field() |     friday = scrapy.Field() | ||||||
|     scraped_by = scrapy.Field() |     scraped_by = scrapy.Field() | ||||||
|     scraped_at = scrapy.Field() |     scraped_at = scrapy.Field() | ||||||
|  | 
 | ||||||
|  | class DagensLunchMax(scrapy.Item): | ||||||
|  |     place = scrapy.Field() | ||||||
|  |     monday = scrapy.Field() | ||||||
|  |     tuesday = scrapy.Field() | ||||||
|  |     wednesday = scrapy.Field() | ||||||
|  |     thursday = scrapy.Field() | ||||||
|  |     friday = scrapy.Field() | ||||||
|  |     scraped_by = scrapy.Field() | ||||||
|  |     scraped_at = scrapy.Field() | ||||||
|  | |||||||
| @ -89,4 +89,12 @@ ROBOTSTXT_OBEY = False | |||||||
| 
 | 
 | ||||||
| # Set settings whose default value is deprecated to a future-proof value | # Set settings whose default value is deprecated to a future-proof value | ||||||
| TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" | TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" | ||||||
|  | FEEDS = { | ||||||
|  |     "items.json": { | ||||||
|  |         "format": "json", | ||||||
|  |         "encoding": "utf8" | ||||||
|  |     } | ||||||
|  | } | ||||||
| FEED_EXPORT_ENCODING = "utf-8" | FEED_EXPORT_ENCODING = "utf-8" | ||||||
|  | 
 | ||||||
|  | LOG_ENABLED = False | ||||||
|  | |||||||
							
								
								
									
										24
									
								
								dagens_lunch/spiders/max.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								dagens_lunch/spiders/max.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,24 @@ | |||||||
|  | from datetime import datetime | ||||||
|  | import scrapy | ||||||
|  | from ..items import DagensLunchMax | ||||||
|  | 
 | ||||||
|  | class MaxSpider(scrapy.Spider): | ||||||
|  |     name = "max" | ||||||
|  |     allowed_domains = ["max.se"] | ||||||
|  |     start_urls = ["https://max.se/maten/meny/maltider/dagens-lunch/"] | ||||||
|  | 
 | ||||||
|  |     def parse(self, response): | ||||||
|  |         days = [] | ||||||
|  |         lista = response.xpath("//div/div[2]/div[contains(@class, 'o-product-info')]/ul[contains(@class, 'o-product-info__variations')]") | ||||||
|  |         for li in lista.xpath("./li/text()").getall(): | ||||||
|  |             days.append(li.split(" ")[-1]) | ||||||
|  |         return DagensLunchMax( | ||||||
|  |             place="Max", | ||||||
|  |             monday=days[0], | ||||||
|  |             tuesday=days[1], | ||||||
|  |             wednesday=days[2], | ||||||
|  |             thursday=days[3], | ||||||
|  |             friday=days[4], | ||||||
|  |             scraped_by=self.__class__.__name__, | ||||||
|  |             scraped_at=f"{datetime.now().isoformat()}" | ||||||
|  |         ) | ||||||
							
								
								
									
										89
									
								
								run_spiders.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										89
									
								
								run_spiders.py
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,89 @@ | |||||||
|  | import pathlib | ||||||
|  | import json | ||||||
|  | 
 | ||||||
|  | from scrapy import spiderloader | ||||||
|  | from scrapy import signals | ||||||
|  | from scrapy.crawler import CrawlerProcess | ||||||
|  | from scrapy.signalmanager import dispatcher | ||||||
|  | from scrapy.utils.project import get_project_settings | ||||||
|  | 
 | ||||||
|  | dir = pathlib.Path(__file__).resolve().parent | ||||||
|  | dir = dir / "latest" | ||||||
|  | file = dir / "all.json" | ||||||
|  | valsaren_file = dir / "valsaren.json" | ||||||
|  | malmens_file = dir / "malmens.json" | ||||||
|  | heat_file = dir / "heat.json" | ||||||
|  | max_file = dir / "max.json" | ||||||
|  | 
 | ||||||
|  | def dagens_lunch_results(): | ||||||
|  |     results = [] | ||||||
|  |     settings = get_project_settings() | ||||||
|  |     process = CrawlerProcess(settings) | ||||||
|  |     spider_loader = spiderloader.SpiderLoader.from_settings(settings) | ||||||
|  | 
 | ||||||
|  |     def crawler_results(signal, sender, item, response, spider): | ||||||
|  |         results.append(item) | ||||||
|  |         print(f"{spider.name} is done!") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     if file.is_file() and file.exists(): | ||||||
|  |         print(f"removing {file.name}") | ||||||
|  |         file.unlink() | ||||||
|  |     if valsaren_file.is_file and valsaren_file.exists(): | ||||||
|  |         print(f"removing {valsaren_file.name}") | ||||||
|  |         valsaren_file.unlink() | ||||||
|  |     if malmens_file.is_file() and malmens_file.exists(): | ||||||
|  |         print(f"removing {malmens_file.name}") | ||||||
|  |         malmens_file.unlink() | ||||||
|  |     if heat_file.is_file and heat_file.exists(): | ||||||
|  |         print(f"removing {heat_file.name}") | ||||||
|  |         heat_file.unlink() | ||||||
|  |     if max_file.is_file() and max_file.exists(): | ||||||
|  |         print(f"removing {max_file.name}") | ||||||
|  |         max_file.unlink() | ||||||
|  | 
 | ||||||
|  |     dispatcher.connect(crawler_results, signal=signals.item_scraped) | ||||||
|  | 
 | ||||||
|  |     for spider_name in spider_loader.list(): | ||||||
|  |         print(f"getting menu from {spider_name}") | ||||||
|  |         process.crawl(spider_name) | ||||||
|  | 
 | ||||||
|  |     process.start() | ||||||
|  |     return results | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     _valsaren = None | ||||||
|  |     _malmens = None | ||||||
|  |     _heat = None | ||||||
|  |     _max = None | ||||||
|  |     res = dagens_lunch_results() | ||||||
|  |     with file.open("a") as f: | ||||||
|  |         f.write("[\n") | ||||||
|  |         for idx, item in enumerate(res): | ||||||
|  |             if idx > 0: | ||||||
|  |                 f.write(",") | ||||||
|  |             f.write(json.dumps(dict(item), indent=4)) | ||||||
|  |             if item["place"].split(" ")[0].lower() == "valsaren": | ||||||
|  |                 _valsaren = json.dumps(dict(item)) | ||||||
|  |             elif item["place"].split(" ")[0].lower() == "malmens": | ||||||
|  |                 _malmens = json.dumps(dict(item)) | ||||||
|  |             elif item["place"].split(" ")[0].lower() == "heat": | ||||||
|  |                 _heat = json.dumps(dict(item)) | ||||||
|  |             elif item["place"].split(" ")[0].lower() == "max": | ||||||
|  |                 _max = json.dumps(dict(item)) | ||||||
|  |             elif item["place"].split(" ")[0].lower() == "unknown": | ||||||
|  |                 print(f"please learn me more about {item['place']}") | ||||||
|  |         f.write("\n]") | ||||||
|  |     print(f"created: {file}") | ||||||
|  | 
 | ||||||
|  |     valsaren_file.write_text(_valsaren) | ||||||
|  |     print(f"created: {valsaren_file}") | ||||||
|  | 
 | ||||||
|  |     malmens_file.write_text(_malmens) | ||||||
|  |     print(f"created: {malmens_file}") | ||||||
|  | 
 | ||||||
|  |     heat_file.write_text(_heat) | ||||||
|  |     print(f"created: {heat_file}") | ||||||
|  | 
 | ||||||
|  |     max_file.write_text(_max) | ||||||
|  |     print(f"created: {max_file}") | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user