-
Notifications
You must be signed in to change notification settings - Fork 531
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'hhursev:main' into grouping-recipetineats
- Loading branch information
Showing
68 changed files
with
41,881 additions
and
9,183 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,10 @@ name = "recipe_scrapers" | |
description = "Python package, scraping recipes from all over the internet" | ||
authors = [ | ||
{name = "Hristo Harsev", email = "[email protected]"}, | ||
{name = "James Addison", email = "[email protected]"}, | ||
] | ||
maintainers = [ | ||
{name = "James Addison", email = "[email protected]"}, | ||
] | ||
urls = {Homepage = "https://github.com/hhursev/recipe-scrapers/"} | ||
keywords = ["python", "recipes", "scraper", "harvest", "recipe-scraper", "recipe-scrapers"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,18 @@ | ||
import re | ||
|
||
from ._abstract import AbstractScraper | ||
from ._exceptions import StaticValueException | ||
from ._utils import normalize_string | ||
|
||
|
||
class AlbertHeijn(AbstractScraper): | ||
@classmethod | ||
def host(cls): | ||
return "ah.nl" | ||
|
||
def site_name(self): | ||
raise StaticValueException(return_value="Albert Heijn") | ||
|
||
def instructions(self): | ||
instructions = [ | ||
normalize_string(step.get_text()) | ||
# get steps root | ||
for root in self.soup.findAll( | ||
"div", | ||
{"class", re.compile("recipe-preparation-steps_root.*")}, | ||
) | ||
# get steps | ||
for step in root.findAll("p") | ||
] | ||
instructions = self.schema.instructions() | ||
|
||
if instructions: | ||
return "\n".join(instructions) | ||
filtered_instructions = [ | ||
line | ||
for line in instructions.split("\n") | ||
if not line.lower().startswith("stap") | ||
] | ||
|
||
# try schema.org | ||
return self.schema.instructions() | ||
return "\n".join(filtered_instructions) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from ._abstract import AbstractScraper | ||
from ._exceptions import StaticValueException | ||
|
||
|
||
class AldiNord(AbstractScraper): | ||
@classmethod | ||
def host(cls, domain: str = "aldi-nord.de"): | ||
return domain | ||
|
||
def author(self): | ||
if author_from_schema := self.schema.author(): | ||
return author_from_schema | ||
|
||
raise StaticValueException(return_value="ALDI") | ||
|
||
def site_name(self): | ||
raise StaticValueException(return_value="ALDI") | ||
|
||
def instructions(self): | ||
return ( | ||
self.schema.data.get("recipeInstructions", "") | ||
.replace("\xa0", " ") | ||
.replace("\r\n ", "\n") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from ._abstract import AbstractScraper | ||
|
||
|
||
class AldiSued(AbstractScraper): | ||
@classmethod | ||
def host(cls, domain="aldi-sued.de"): | ||
return domain | ||
|
||
def instructions(self): | ||
instruction_elements = self.schema.data.get("recipeInstructions", []) | ||
return "\n".join( | ||
[ | ||
element.get("text").replace("\xad", "") | ||
for element in instruction_elements | ||
if element.get("text") | ||
] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from .aldisued import AldiSued | ||
|
||
|
||
class AldiSuisse(AldiSued): | ||
@classmethod | ||
def host(cls, domain="aldi-suisse.ch"): | ||
return domain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from .aldisued import AldiSued | ||
|
||
|
||
class Hofer(AldiSued): | ||
@classmethod | ||
def host(cls, domain="hofer.at"): | ||
return domain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from ._abstract import AbstractScraper | ||
from ._utils import csv_to_tags, get_minutes, get_yields, normalize_string | ||
|
||
|
||
class QuiToque(AbstractScraper): | ||
@classmethod | ||
def host(cls): | ||
return "quitoque.fr" | ||
|
||
@staticmethod | ||
def _get_text(element): | ||
if element: | ||
return normalize_string(element.get_text()) | ||
else: | ||
return None | ||
|
||
def _get_time(self, time_name): | ||
times = self.soup.select("div,.recipe-infos-short .item-info") | ||
total_time = None | ||
for time in times: | ||
if time_name in time.get_text(): | ||
total_time = self._get_text(time).replace(time_name, "") | ||
return get_minutes(total_time) | ||
|
||
def _get_nutrient(self, nutrient_name): | ||
nutrient_element = self._nutrients.find("p", string=nutrient_name).parent | ||
return self._get_text(nutrient_element.find("p", class_="regular")) | ||
|
||
def canonical_url(self): | ||
return self.soup.find("meta", {"property": "og:url"}).get("content") | ||
|
||
def author(self): | ||
return "QuiToque" | ||
|
||
def title(self): | ||
return self._get_text(self.soup.find("h1", class_="title-2")) | ||
|
||
def keywords(self): | ||
product_tags = self.soup.find(id="product-tags").find_all(class_="badge") | ||
keywords = ",".join(self._get_text(tag) for tag in product_tags) | ||
return csv_to_tags(keywords) | ||
|
||
def category(self): | ||
category = self.soup.find(class_="primary-ghost") | ||
return self._get_text(category) | ||
|
||
def total_time(self): | ||
return self._get_time("Total") | ||
|
||
def prep_time(self): | ||
return self._get_time("En cuisine") | ||
|
||
def yields(self): | ||
serving = self.soup.find(id="ingredients").find("p", class_="body-2") | ||
return get_yields(serving) | ||
|
||
def image(self): | ||
img_element = self.soup.find(class_="image").find("img") | ||
return img_element["src"] | ||
|
||
def ingredients(self): | ||
ingredients = [] | ||
ingredients.extend(self.soup.select("#ingredients .ingredient-list li")) | ||
ingredients.extend(self.soup.select(".kitchen-list li")) | ||
return [self._get_text(ingredient) for ingredient in ingredients] | ||
|
||
def equipment(self): | ||
equipments = self.soup.select("#equipment .ingredient-list li") | ||
return [self._get_text(equiment) for equiment in equipments] | ||
|
||
def instructions(self): | ||
instructions = self.soup.select("#preparation-steps li") | ||
return "\n".join([self._get_text(instruction) for instruction in instructions]) | ||
|
||
def description(self): | ||
description = self.soup.find("div", class_="container body-2 regular mt-2 mb-4") | ||
return self._get_text(description) | ||
|
||
def nutrients(self): | ||
self._nutrients = self.soup.find(id="portion") | ||
nutrients = { | ||
"calories": self._get_nutrient("Énergie (kCal)"), | ||
"fatContent": self._get_nutrient("Matières grasses"), | ||
"saturatedFatContent": self._get_nutrient("dont acides gras saturés"), | ||
"carbohydrateContent": self._get_nutrient("Glucides"), | ||
"sugarContent": self._get_nutrient("dont sucre"), | ||
"fiberContent": self._get_nutrient("Fibres"), | ||
"proteinContent": self._get_nutrient("Protéines"), | ||
} | ||
return nutrients |
Oops, something went wrong.