From 5518ae2d942ca0288f1d0e635f730d49a0932258 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:17:38 +0100
Subject: [PATCH 01/15] MarleySpoon: add precautionary check for unexpected API
 URLs.

---
 recipe_scrapers/marleyspoon.py           | 15 ++++++++++++++-
 tests/legacy/test_data/faulty.testhtml   |  6 ++++++
 tests/legacy/test_marleyspoon_invalid.py | 24 ++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 tests/legacy/test_data/faulty.testhtml
 create mode 100644 tests/legacy/test_marleyspoon_invalid.py
diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 1325a201b..989fe5690 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -1,11 +1,12 @@
 # mypy: disallow_untyped_defs=False
+from urllib.parse import urljoin, urlsplit
 import json
 import re
 
 import requests
 
 from ._abstract import HEADERS, AbstractScraper
-from ._exceptions import ElementNotFoundInHtml
+from ._exceptions import ElementNotFoundInHtml, RecipeScrapersExceptions
 from ._utils import normalize_string
 
 ID_PATTERN = re.compile(r"/(\d+)-")
@@ -65,6 +66,18 @@ def _get_json_params(self):
         if api_url is None or api_token is None:
             raise ElementNotFoundInHtml("Required script not found.")
 
+        scraper_name = self.__class__.__name__
+        expected_domain = scraper_name.lower()
+        try:
+            api_url = urljoin(self.url, api_url)
+            url_info = urlsplit(api_url)
+            domain_prefix, _ = url_info.hostname.rsplit(".", 1)
+            if not f".{domain_prefix}".endswith(f".{expected_domain}"):
+                msg = f"Domain for {api_url} does not contain expected part: {expected_domain}"
+                raise ValueError(msg)
+        except Exception:
+            raise RecipeScrapersExceptions(f"Unexpected API URL: {api_url}")
+
         return api_url, api_token
 
     @classmethod
diff --git a/tests/legacy/test_data/faulty.testhtml b/tests/legacy/test_data/faulty.testhtml
new file mode 100644
index 000000000..b0c4f2998
--- /dev/null
+++ b/tests/legacy/test_data/faulty.testhtml
@@ -0,0 +1,6 @@
+<!DOCTYPE html>
+<html>
+<script>
+gon.current_brand="test_invalid"; gon.current_country="XX"; gon.api_token=" ".trim() || null; gon.api_host="http://api.marlarkey.invalid";
+</script>
+</html>
diff --git a/tests/legacy/test_marleyspoon_invalid.py b/tests/legacy/test_marleyspoon_invalid.py
new file mode 100644
index 000000000..b6b5426f3
--- /dev/null
+++ b/tests/legacy/test_marleyspoon_invalid.py
@@ -0,0 +1,24 @@
+import unittest
+
+import responses
+
+from recipe_scrapers._exceptions import RecipeScrapersExceptions
+from recipe_scrapers.marleyspoon import MarleySpoon
+
+
+class TestFaultyAPIURLResponse(unittest.TestCase):
+
+    @responses.activate
+    def test_invalid_scraper(self):
+        valid_url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
+        with open("tests/legacy/test_data/faulty.testhtml", "r") as faulty_data:
+            faulty_response = faulty_data.read()
+
+        responses.add(
+            method=responses.GET,
+            url=valid_url,
+            body=faulty_response,
+        )
+
+        with self.assertRaises(RecipeScrapersExceptions):
+            scraper = MarleySpoon(url=valid_url)

From 06e5bf8a27c24e9b4cfedbfc496310b9ee2a6510 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:25:04 +0100
Subject: [PATCH 02/15] Fixup: linting: remove unused variable.

---
 tests/legacy/test_marleyspoon_invalid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/legacy/test_marleyspoon_invalid.py b/tests/legacy/test_marleyspoon_invalid.py
index b6b5426f3..98e4a0f3c 100644
--- a/tests/legacy/test_marleyspoon_invalid.py
+++ b/tests/legacy/test_marleyspoon_invalid.py
@@ -21,4 +21,4 @@ def test_invalid_scraper(self):
         )
 
         with self.assertRaises(RecipeScrapersExceptions):
-            scraper = MarleySpoon(url=valid_url)
+            MarleySpoon(url=valid_url)

From cf9c0594f5af2ae84c1ecac2ee29244dbebbd3e8 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:27:23 +0100
Subject: [PATCH 03/15] Fixup: linting: use isort to re-order imports.

---
 recipe_scrapers/marleyspoon.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 989fe5690..3676276b9 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -1,7 +1,7 @@
 # mypy: disallow_untyped_defs=False
-from urllib.parse import urljoin, urlsplit
 import json
 import re
+from urllib.parse import urljoin, urlsplit
 
 import requests
 

From c25b0e30c8155facdc2fe165adb5c30984580d6d Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:30:59 +0100
Subject: [PATCH 04/15] Fixup: linting: apply pyupgrade (py3.8+) to test
 module.

---
 tests/legacy/test_marleyspoon_invalid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/legacy/test_marleyspoon_invalid.py b/tests/legacy/test_marleyspoon_invalid.py
index 98e4a0f3c..41527568c 100644
--- a/tests/legacy/test_marleyspoon_invalid.py
+++ b/tests/legacy/test_marleyspoon_invalid.py
@@ -11,7 +11,7 @@ class TestFaultyAPIURLResponse(unittest.TestCase):
     @responses.activate
     def test_invalid_scraper(self):
         valid_url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
-        with open("tests/legacy/test_data/faulty.testhtml", "r") as faulty_data:
+        with open("tests/legacy/test_data/faulty.testhtml") as faulty_data:
             faulty_response = faulty_data.read()
 
         responses.add(

From 5f2a6bdafbf3e9e55100d221a47e2f027210065a Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:38:50 +0100
Subject: [PATCH 05/15] MarleySpoon: remove use of variable shadowing that
 introduce a change-in-behaviour.

---
 recipe_scrapers/marleyspoon.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 3676276b9..9946d6546 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -69,8 +69,8 @@ def _get_json_params(self):
         scraper_name = self.__class__.__name__
         expected_domain = scraper_name.lower()
         try:
-            api_url = urljoin(self.url, api_url)
-            url_info = urlsplit(api_url)
+            validation_url = urljoin(self.url, api_url)
+            url_info = urlsplit(validation_url)
             domain_prefix, _ = url_info.hostname.rsplit(".", 1)
             if not f".{domain_prefix}".endswith(f".{expected_domain}"):
                 msg = f"Domain for {api_url} does not contain expected part: {expected_domain}"

From 39cc78823c3fad1b745d40999c7e7c746fbd9db6 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:41:55 +0100
Subject: [PATCH 06/15] MarleySpoon: tests: rename test case.

---
 tests/legacy/test_marleyspoon_invalid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/legacy/test_marleyspoon_invalid.py b/tests/legacy/test_marleyspoon_invalid.py
index 41527568c..108f087ea 100644
--- a/tests/legacy/test_marleyspoon_invalid.py
+++ b/tests/legacy/test_marleyspoon_invalid.py
@@ -9,7 +9,7 @@
 class TestFaultyAPIURLResponse(unittest.TestCase):
 
     @responses.activate
-    def test_invalid_scraper(self):
+    def test_faulty_response(self):
         valid_url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
         with open("tests/legacy/test_data/faulty.testhtml") as faulty_data:
             faulty_response = faulty_data.read()

From ca2154fb53d508bf28e0d036ff91ceff6c45981e Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:46:22 +0100
Subject: [PATCH 07/15] MarleySpoon: tests: add coverage relative-URL API host
 case.

---
 tests/legacy/test_data/relative_url.testhtml |  6 ++++++
 tests/legacy/test_marleyspoon_invalid.py     | 17 +++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 tests/legacy/test_data/relative_url.testhtml

diff --git a/tests/legacy/test_data/relative_url.testhtml b/tests/legacy/test_data/relative_url.testhtml
new file mode 100644
index 000000000..16650db42
--- /dev/null
+++ b/tests/legacy/test_data/relative_url.testhtml
@@ -0,0 +1,6 @@
+<!DOCTYPE html>
+<html>
+<script>
+gon.current_brand="test_invalid"; gon.current_country="XX"; gon.api_token=" ".trim() || null; gon.api_host="relative_path/unexpected.js";
+</script>
+</html>
diff --git a/tests/legacy/test_marleyspoon_invalid.py b/tests/legacy/test_marleyspoon_invalid.py
index 108f087ea..3f89239bc 100644
--- a/tests/legacy/test_marleyspoon_invalid.py
+++ b/tests/legacy/test_marleyspoon_invalid.py
@@ -22,3 +22,20 @@ def test_faulty_response(self):
 
         with self.assertRaises(RecipeScrapersExceptions):
             MarleySpoon(url=valid_url)
+
+    @responses.activate
+    def test_relative_api_url(self):
+        valid_url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
+        with open("tests/legacy/test_data/relative_url.testhtml") as relative_url_data:
+            relative_url_response = relative_url_data.read()
+
+        responses.add(
+            method=responses.GET,
+            url=valid_url,
+            body=relative_url_response,
+        )
+
+        with self.assertRaises(Exception):
+            MarleySpoon(
+                url=valid_url
+            )  # currently this raises an requests.exceptions.MissingSchema exception

From c24cc7b2b0f8d11fe79c66149e97577b8cbcc6ae Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Fri, 19 Apr 2024 17:47:02 +0100
Subject: [PATCH 08/15] MarleySpoon: tests: brevity: rename 'valid_url' to
 'url'.

---
 tests/legacy/test_marleyspoon_invalid.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/legacy/test_marleyspoon_invalid.py b/tests/legacy/test_marleyspoon_invalid.py
index 3f89239bc..a8bde3b42 100644
--- a/tests/legacy/test_marleyspoon_invalid.py
+++ b/tests/legacy/test_marleyspoon_invalid.py
@@ -10,32 +10,32 @@ class TestFaultyAPIURLResponse(unittest.TestCase):
 
     @responses.activate
     def test_faulty_response(self):
-        valid_url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
+        url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
         with open("tests/legacy/test_data/faulty.testhtml") as faulty_data:
             faulty_response = faulty_data.read()
 
         responses.add(
             method=responses.GET,
-            url=valid_url,
+            url=url,
             body=faulty_response,
         )
 
         with self.assertRaises(RecipeScrapersExceptions):
-            MarleySpoon(url=valid_url)
+            MarleySpoon(url=url)
 
     @responses.activate
     def test_relative_api_url(self):
-        valid_url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
+        url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
         with open("tests/legacy/test_data/relative_url.testhtml") as relative_url_data:
             relative_url_response = relative_url_data.read()
 
         responses.add(
             method=responses.GET,
-            url=valid_url,
+            url=url,
             body=relative_url_response,
         )
 
         with self.assertRaises(Exception):
             MarleySpoon(
-                url=valid_url
+                url=url
             )  # currently this raises an requests.exceptions.MissingSchema exception

From eb286cbf23316c1e130cea062330e2612f947344 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Tue, 30 Apr 2024 13:15:38 +0100
Subject: [PATCH 09/15] MarleySpoon: adjustment: use is-same-scraper condition
 to decide whether a request is valid or not.

---
 recipe_scrapers/marleyspoon.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 9946d6546..e6afa1d83 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -7,7 +7,7 @@
 
 from ._abstract import HEADERS, AbstractScraper
 from ._exceptions import ElementNotFoundInHtml, RecipeScrapersExceptions
-from ._utils import normalize_string
+from ._utils import get_host_name, normalize_string
 
 ID_PATTERN = re.compile(r"/(\d+)-")
 SCRIPT_PATTERN = re.compile(
@@ -67,14 +67,12 @@ def _get_json_params(self):
             raise ElementNotFoundInHtml("Required script not found.")
 
         scraper_name = self.__class__.__name__
-        expected_domain = scraper_name.lower()
         try:
-            validation_url = urljoin(self.url, api_url)
-            url_info = urlsplit(validation_url)
-            domain_prefix, _ = url_info.hostname.rsplit(".", 1)
-            if not f".{domain_prefix}".endswith(f".{expected_domain}"):
-                msg = f"Domain for {api_url} does not contain expected part: {expected_domain}"
-                raise ValueError(msg)
+            next_url = urljoin(self.url, api_url)
+            host_name = get_host_name(next_url)
+            next_scraper = SCRAPERS[host_name]
+            if not isinstance(self, next_scraper):
+                raise ValueError(f"Attempted to scrape using {next_scraper} from {scraper_name}")
         except Exception:
             raise RecipeScrapersExceptions(f"Unexpected API URL: {api_url}")
 

From b06eec99ced52b5f8cdbfa913c4e24215dccb7f9 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Tue, 30 Apr 2024 13:18:33 +0100
Subject: [PATCH 10/15] MarleySpoon: exception handling: include link from
 raised-exception to originating-exception.

---
 recipe_scrapers/marleyspoon.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index e6afa1d83..d47d44802 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -73,8 +73,8 @@ def _get_json_params(self):
             next_scraper = SCRAPERS[host_name]
             if not isinstance(self, next_scraper):
                 raise ValueError(f"Attempted to scrape using {next_scraper} from {scraper_name}")
-        except Exception:
-            raise RecipeScrapersExceptions(f"Unexpected API URL: {api_url}")
+        except Exception as e:
+            raise RecipeScrapersExceptions(f"Unexpected API URL: {api_url}") from e
 
         return api_url, api_token
 

From 9c94ee9e44c2a434a8d52e81a7b1d83ee7c8d9b9 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Tue, 30 Apr 2024 13:22:57 +0100
Subject: [PATCH 11/15] MarleySpoon: fixup: add missing SCRAPERS import
 (localised; not ideal, but avoids a circular import).

---
 recipe_scrapers/marleyspoon.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index d47d44802..1d9b3898f 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -66,6 +66,8 @@ def _get_json_params(self):
         if api_url is None or api_token is None:
             raise ElementNotFoundInHtml("Required script not found.")
 
+        from . import SCRAPERS
+
         scraper_name = self.__class__.__name__
         try:
             next_url = urljoin(self.url, api_url)

From 7561de5ac6405987be2b88784524c1e4bcbc206e Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Tue, 30 Apr 2024 13:29:03 +0100
Subject: [PATCH 12/15] MarleySpoon: reduce constraint: allow less-precise
 matches on partial host domain name.

---
 recipe_scrapers/marleyspoon.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 1d9b3898f..35ce9873e 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -72,7 +72,13 @@ def _get_json_params(self):
         try:
             next_url = urljoin(self.url, api_url)
             host_name = get_host_name(next_url)
-            next_scraper = SCRAPERS[host_name]
+            next_scraper = type(None)
+            # check: api.foo.xx.example, foo.xx.example, xx.example
+            while host_name and host_name.count('.'):
+                next_scraper = SCRAPERS.get(host_name)
+                if next_scraper:
+                    break
+                host_name = '.'.join(host_name.split('.')[1:])
             if not isinstance(self, next_scraper):
                 raise ValueError(f"Attempted to scrape using {next_scraper} from {scraper_name}")
         except Exception as e:

From 2a5e003c5abb8111b314bf1f1ba45cdad30a81f1 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Tue, 30 Apr 2024 13:32:18 +0100
Subject: [PATCH 13/15] MarleySpoon: linting: adjust code to comply with black
 code style recommendations / requirements.

---
 recipe_scrapers/marleyspoon.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 35ce9873e..8954cb20e 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -74,13 +74,14 @@ def _get_json_params(self):
             host_name = get_host_name(next_url)
             next_scraper = type(None)
             # check: api.foo.xx.example, foo.xx.example, xx.example
-            while host_name and host_name.count('.'):
+            while host_name and host_name.count("."):
                 next_scraper = SCRAPERS.get(host_name)
                 if next_scraper:
                     break
-                host_name = '.'.join(host_name.split('.')[1:])
+                host_name = ".".join(host_name.split(".")[1:])
             if not isinstance(self, next_scraper):
-                raise ValueError(f"Attempted to scrape using {next_scraper} from {scraper_name}")
+                msg = f"Attempted to scrape using {next_scraper} from {scraper_name}"
+                raise ValueError(msg)
         except Exception as e:
             raise RecipeScrapersExceptions(f"Unexpected API URL: {api_url}") from e
 

From ff02a0c0899fb2c69321f73a900d6f54fa093d28 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Tue, 30 Apr 2024 13:34:13 +0100
Subject: [PATCH 14/15] MarleySpoon: refactor: adjust domain-climbing logic.

---
 recipe_scrapers/marleyspoon.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 8954cb20e..8c05100de 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -78,7 +78,7 @@ def _get_json_params(self):
                 next_scraper = SCRAPERS.get(host_name)
                 if next_scraper:
                     break
-                host_name = ".".join(host_name.split(".")[1:])
+                _, host_name = host_name.split(".", 1)
             if not isinstance(self, next_scraper):
                 msg = f"Attempted to scrape using {next_scraper} from {scraper_name}"
                 raise ValueError(msg)

From 1dfd79b23bb7b8baf424145f70ffe9fcc2c681d0 Mon Sep 17 00:00:00 2001
From: James Addison <james@reciperadar.com>
Date: Tue, 30 Apr 2024 13:37:24 +0100
Subject: [PATCH 15/15] MarleySpoon: cleanup: remove unused import.

---
 recipe_scrapers/marleyspoon.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py
index 8c05100de..d0f44ca14 100644
--- a/recipe_scrapers/marleyspoon.py
+++ b/recipe_scrapers/marleyspoon.py
@@ -1,7 +1,7 @@
 # mypy: disallow_untyped_defs=False
 import json
 import re
-from urllib.parse import urljoin, urlsplit
+from urllib.parse import urljoin
 
 import requests