merged rubrik and text for each entry per day as a single string

This commit is contained in:
puckoprutt 2025-02-18 22:13:04 +01:00
parent 30471c96c3
commit 9ec5347a7f

View File

@ -21,15 +21,29 @@ class HeatSpider(scrapy.Spider):
for idx in range(1, 8):
if not tree.find(f".//mandagratt{idx}rubrik") is None:
self.monday.append({"rubrik": tree.find(f".//mandagratt{idx}rubrik").text, "text": tree.find(f".//mandagratt{idx}text").text})
rubrik = tree.find(f".//mandagratt{idx}rubrik").text
text = tree.find(f".//mandagratt{idx}text").text
self.monday.append(f"{rubrik} {text}".strip())
if not tree.find(f".//tisdagratt{idx}rubrik") is None:
self.tuesday.append({"rubrik": tree.find(f".//tisdagratt{idx}rubrik").text, "text": tree.find(f".//tisdagratt{idx}text").text})
rubrik = tree.find(f".//tisdagratt{idx}rubrik").text
text = tree.find(f".//tisdagratt{idx}text").text
self.tuesday.append(f"{rubrik} {text}".strip())
if not tree.find(f".//onsdagratt{idx}rubrik") is None:
self.wednesday.append({"rubrik": tree.find(f".//onsdagratt{idx}rubrik").text, "text": tree.find(f".//onsdagratt{idx}text").text})
rubrik = tree.find(f".//onsdagratt{idx}rubrik").text
text = tree.find(f".//onsdagratt{idx}text").text
self.wednesday.append(f"{rubrik} {text}".strip())
if not tree.find(f".//torsdagratt{idx}rubrik") is None:
self.thursday.append({"rubrik": tree.find(f".//torsdagratt{idx}rubrik").text, "text": tree.find(f".//torsdagratt{idx}text").text})
rubrik = tree.find(f".//torsdagratt{idx}rubrik").text
text = tree.find(f".//torsdagratt{idx}text").text
self.thursday.append(f"{rubrik} {text}".strip())
if not tree.find(f".//fredagratt{idx}rubrik") is None:
self.friday.append({"rubrik": tree.find(f".//fredagratt{idx}rubrik").text, "text": tree.find(f".//fredagratt{idx}text").text})
rubrik = tree.find(f".//fredagratt{idx}rubrik").text
text = tree.find(f".//fredagratt{idx}text").text
self.friday.append(f"{rubrik} {text}".strip())
dagens["week"] = self.week
dagens["monday"] = self.monday
@ -44,7 +58,6 @@ class HeatSpider(scrapy.Spider):
url = ""
dagens = DagensLunchHeat()
dagens["place"] = "Heat Kopparlunden"
dagens["scraped_by"] = f"{self.__class__.__name__}"
for js in scripts:
if not "jQuery(function( $ ){" in js:
continue
@ -54,6 +67,7 @@ class HeatSpider(scrapy.Spider):
if "url:\"https://castit.nu/xml" in line:
url = line[5:-2]
yield scrapy.Request(url, callback=self.extract_xml, meta={"dagens": dagens})
dagens["scraped_by"] = f"{self.__class__.__name__}"
dagens["scraped_at"] = f"{datetime.now().isoformat()}"
return dagens
return dagens