add_max #1
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,3 +1,4 @@ | ||||
| .vscode | ||||
| latest | ||||
| dagens_lunch/spiders/__pycache__/*.pyc | ||||
| dagens_lunch/__pycache__/*.pyc | ||||
| dagens_lunch/__pycache__/*.pyc | ||||
|  | ||||
| @ -42,3 +42,13 @@ class DagensLunchMalmens(scrapy.Item): | ||||
|     friday = scrapy.Field() | ||||
|     scraped_by = scrapy.Field() | ||||
|     scraped_at = scrapy.Field() | ||||
| 
 | ||||
| class DagensLunchMax(scrapy.Item): | ||||
|     place = scrapy.Field() | ||||
|     monday = scrapy.Field() | ||||
|     tuesday = scrapy.Field() | ||||
|     wednesday = scrapy.Field() | ||||
|     thursday = scrapy.Field() | ||||
|     friday = scrapy.Field() | ||||
|     scraped_by = scrapy.Field() | ||||
|     scraped_at = scrapy.Field() | ||||
|  | ||||
| @ -89,4 +89,12 @@ ROBOTSTXT_OBEY = False | ||||
| 
 | ||||
| # Set settings whose default value is deprecated to a future-proof value | ||||
| TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" | ||||
| FEEDS = { | ||||
|     "items.json": { | ||||
|         "format": "json", | ||||
|         "encoding": "utf8" | ||||
|     } | ||||
| } | ||||
| FEED_EXPORT_ENCODING = "utf-8" | ||||
| 
 | ||||
| LOG_ENABLED = False | ||||
|  | ||||
							
								
								
									
										24
									
								
								dagens_lunch/spiders/max.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								dagens_lunch/spiders/max.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,24 @@ | ||||
| from datetime import datetime | ||||
| import scrapy | ||||
| from ..items import DagensLunchMax | ||||
| 
 | ||||
| class MaxSpider(scrapy.Spider): | ||||
|     name = "max" | ||||
|     allowed_domains = ["max.se"] | ||||
|     start_urls = ["https://max.se/maten/meny/maltider/dagens-lunch/"] | ||||
| 
 | ||||
|     def parse(self, response): | ||||
|         days = [] | ||||
|         lista = response.xpath("//div/div[2]/div[contains(@class, 'o-product-info')]/ul[contains(@class, 'o-product-info__variations')]") | ||||
|         for li in lista.xpath("./li/text()").getall(): | ||||
|             days.append(li.split(" ")[-1]) | ||||
|         return DagensLunchMax( | ||||
|             place="Max", | ||||
|             monday=days[0], | ||||
|             tuesday=days[1], | ||||
|             wednesday=days[2], | ||||
|             thursday=days[3], | ||||
|             friday=days[4], | ||||
|             scraped_by=self.__class__.__name__, | ||||
|             scraped_at=f"{datetime.now().isoformat()}" | ||||
|         ) | ||||
							
								
								
									
										89
									
								
								run_spiders.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										89
									
								
								run_spiders.py
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,89 @@ | ||||
| import pathlib | ||||
| import json | ||||
| 
 | ||||
| from scrapy import spiderloader | ||||
| from scrapy import signals | ||||
| from scrapy.crawler import CrawlerProcess | ||||
| from scrapy.signalmanager import dispatcher | ||||
| from scrapy.utils.project import get_project_settings | ||||
| 
 | ||||
| dir = pathlib.Path(__file__).resolve().parent | ||||
| dir = dir / "latest" | ||||
| file = dir / "all.json" | ||||
| valsaren_file = dir / "valsaren.json" | ||||
| malmens_file = dir / "malmens.json" | ||||
| heat_file = dir / "heat.json" | ||||
| max_file = dir / "max.json" | ||||
| 
 | ||||
| def dagens_lunch_results(): | ||||
|     results = [] | ||||
|     settings = get_project_settings() | ||||
|     process = CrawlerProcess(settings) | ||||
|     spider_loader = spiderloader.SpiderLoader.from_settings(settings) | ||||
| 
 | ||||
|     def crawler_results(signal, sender, item, response, spider): | ||||
|         results.append(item) | ||||
|         print(f"{spider.name} is done!") | ||||
| 
 | ||||
| 
 | ||||
|     if file.is_file() and file.exists(): | ||||
|         print(f"removing {file.name}") | ||||
|         file.unlink() | ||||
|     if valsaren_file.is_file and valsaren_file.exists(): | ||||
|         print(f"removing {valsaren_file.name}") | ||||
|         valsaren_file.unlink() | ||||
|     if malmens_file.is_file() and malmens_file.exists(): | ||||
|         print(f"removing {malmens_file.name}") | ||||
|         malmens_file.unlink() | ||||
|     if heat_file.is_file and heat_file.exists(): | ||||
|         print(f"removing {heat_file.name}") | ||||
|         heat_file.unlink() | ||||
|     if max_file.is_file() and max_file.exists(): | ||||
|         print(f"removing {max_file.name}") | ||||
|         max_file.unlink() | ||||
| 
 | ||||
|     dispatcher.connect(crawler_results, signal=signals.item_scraped) | ||||
| 
 | ||||
|     for spider_name in spider_loader.list(): | ||||
|         print(f"getting menu from {spider_name}") | ||||
|         process.crawl(spider_name) | ||||
| 
 | ||||
|     process.start() | ||||
|     return results | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     _valsaren = None | ||||
|     _malmens = None | ||||
|     _heat = None | ||||
|     _max = None | ||||
|     res = dagens_lunch_results() | ||||
|     with file.open("a") as f: | ||||
|         f.write("[\n") | ||||
|         for idx, item in enumerate(res): | ||||
|             if idx > 0: | ||||
|                 f.write(",") | ||||
|             f.write(json.dumps(dict(item), indent=4)) | ||||
|             if item["place"].split(" ")[0].lower() == "valsaren": | ||||
|                 _valsaren = json.dumps(dict(item)) | ||||
|             elif item["place"].split(" ")[0].lower() == "malmens": | ||||
|                 _malmens = json.dumps(dict(item)) | ||||
|             elif item["place"].split(" ")[0].lower() == "heat": | ||||
|                 _heat = json.dumps(dict(item)) | ||||
|             elif item["place"].split(" ")[0].lower() == "max": | ||||
|                 _max = json.dumps(dict(item)) | ||||
|             elif item["place"].split(" ")[0].lower() == "unknown": | ||||
|                 print(f"please learn me more about {item['place']}") | ||||
|         f.write("\n]") | ||||
|     print(f"created: {file}") | ||||
| 
 | ||||
|     valsaren_file.write_text(_valsaren) | ||||
|     print(f"created: {valsaren_file}") | ||||
| 
 | ||||
|     malmens_file.write_text(_malmens) | ||||
|     print(f"created: {malmens_file}") | ||||
| 
 | ||||
|     heat_file.write_text(_heat) | ||||
|     print(f"created: {heat_file}") | ||||
| 
 | ||||
|     max_file.write_text(_max) | ||||
|     print(f"created: {max_file}") | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user