From f95bdfc0fc0f190baa1a245fed6fe3b24c4bbe69 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Fri, 26 Jan 2024 20:26:14 -0500 Subject: [PATCH 01/12] feat(newOpinionSite): change underlying data structures from lists to dicts - create new base class for OpinionSite - add example for new deferring objects on `nev` - add example of json validation --- juriscraper/NewOpinionSite.py | 189 ++++++++++++++++++ .../opinions/united_states/state/nev.py | 43 ++-- juriscraper/schemas/scraper_schema.py | 20 ++ 3 files changed, 234 insertions(+), 18 deletions(-) create mode 100644 juriscraper/NewOpinionSite.py create mode 100644 juriscraper/schemas/scraper_schema.py diff --git a/juriscraper/NewOpinionSite.py b/juriscraper/NewOpinionSite.py new file mode 100644 index 000000000..3a493dcd1 --- /dev/null +++ b/juriscraper/NewOpinionSite.py @@ -0,0 +1,189 @@ +from datetime import datetime +from functools import cmp_to_key +from typing import Callable + +from jsonschema import Draft7Validator, FormatChecker + +from juriscraper.AbstractSite import AbstractSite, logger +from juriscraper.lib.string_utils import ( + clean_string, + convert_date_string, + harmonize, +) +from juriscraper.OpinionSite import OpinionSite +from juriscraper.schemas.scraper_schema import validation_schema + +opinion_site = OpinionSite() +opinion_site_ordered_attributes = opinion_site._all_attrs +del opinion_site + + +class NewOpinionSite(AbstractSite): + short_to_full_key = { + "citation": "citations", + "name": "case_names", + "docket": "docket_numbers", + "date": "case_dates", + "url": "download_urls", + "judge": "judges", + "lower_court": "lower_courts", + } + default_fields = { + "date_filed_is_approximate": False, + "blocked_statuses": False, + } + + def __init__(self, cnt=None): + super().__init__() + self.cases = [] + + self.validator = Draft7Validator( + validation_schema, format_checker=FormatChecker() + ) + + def __iter__(self): + for case in self.cases: + yield case + + def __getitem__(self, index: int): + return self.cases[index] + + def __len__(self) -> int: + return len(self.cases) + + def parse(self): + """Overrides AbstractSite.parse""" + if not self.downloader_executed: + # Run the downloader if it hasn't been run already + self.html = self._download() + + # Process the available html (optional) + self._process_html() + + self._post_parse() + + # Instead of self._check_sanity() + # date type validation, non-empty validation, etc will be done by JSON Schema validator + clean_cases = [] + for case in self.cases: + clean_case = self.clean_case(case) + self.fill_default_values(clean_case) + + try: + self.validator.validate(clean_case) + except Exception as e: + # TODO: need to write custom validator for case with Deferred values, + # which are functions to be called + if "bound method" not in str(e): + raise e + + clean_cases.append(clean_case) + + self.cases = clean_cases + self.cases.sort(key=cmp_to_key(self.sort_by_attributes)) + + # This is an example of something in Juriscraper that is exclusively used in CL + # and that should be put there + self.case_names = [case["case_names"] for case in self.cases] + self._make_hash() + + # Only for showing + for case in self.cases: + self.get_deferred_values(case) + + return self + + def fill_default_values(self, case) -> None: + """Required fields""" + for key, default_value in self.default_fields.items(): + if case.get(key) is None: + case[key] = default_value + + def get_deferred_values(self, case) -> None: + """Use this function to consume deferred values + Deferred values are functions that wait until execution to perform + the requests, usually after duplication has been checked by the caller + + If a single deferring_function scrapes multiple fields, it is better to + repeat it in every field, for clarity + + For example: + + ``` + # Use functools.partial to pass an argument to a function + # without calling it + + docket_number = "2023-23" + deferred_function = partial(self.scrape_detail, + docket_number=docket_number) + + self.cases.append({ + "case_dates": date(2024, 01, 01), + "judge": deferred_function, + "citation": deferred_function, + "lower_court": some_other_function, + }) + ``` + """ + update_values = {} + seen_callables = set() + + for value in case.values(): + if isinstance(value, Callable) and value not in seen_callables: + deferred_dict = value() + deferred_dict = self.clean_case(deferred_dict) + logger.info("Got deferred values %s", str(deferred_dict)) + update_values.update(deferred_dict) + seen_callables.add(value) + + case.update(update_values) + + @staticmethod + def sort_by_attributes(case, other_case) -> int: + """Replaces AbstractSite._date_sort + Keeping the order of attributes as OpinionSite ensures we have the same order of cases + """ + for attr in opinion_site_ordered_attributes: + value = case.get(attr) + other_value = other_case.get(attr) + + if value is None and other_value is None: + continue + elif other_value is None: + return 1 + elif value is None: + return -1 + + if value == other_value: + return 0 + elif value > other_value: + return 1 + else: + return -1 + + @classmethod + def clean_case(cls, case) -> None: + """Replaces AbstractSite._clean_attributes + + Clean case dict in place + """ + clean_case = {} + + for key, value in case.items(): + if key == "download_urls": + value = value.strip() + else: + if "date" in key: + value = str(convert_date_string(value)) + elif isinstance(value, datetime): + value = str(value.date()) + elif isinstance(value, str): + value = clean_string(value) + + if key in ["case_names", "docket_numbers"]: + value = harmonize(value) + + clean_key = cls.short_to_full_key.get(key, key) + clean_case[clean_key] = value + + return clean_case diff --git a/juriscraper/opinions/united_states/state/nev.py b/juriscraper/opinions/united_states/state/nev.py index 84c8313a0..9d66dc13a 100644 --- a/juriscraper/opinions/united_states/state/nev.py +++ b/juriscraper/opinions/united_states/state/nev.py @@ -7,20 +7,20 @@ import json +import re +from functools import partial from lxml.html import fromstring -from juriscraper.DeferringList import DeferringList -from juriscraper.OpinionSiteLinear import OpinionSiteLinear +from juriscraper.NewOpinionSite import NewOpinionSite -class Site(OpinionSiteLinear): +class Site(NewOpinionSite): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ self.url = "https://publicaccess.nvsupremecourt.us/WebSupplementalAPI/api/AdvanceOpinions" self.search = "https://caseinfo.nvsupremecourt.us/public/caseSearch.do" - self.xp = "//tr[td[contains(text(), 'Opinion')]]/td/a/@href" self.status = "Published" self.court_code = "10001" @@ -70,17 +70,23 @@ def _process_html(self): for case in self.filter_cases(): vol = int(case["date"].split("-")[0]) - 1884 citation = f"{vol} Nev., Advance Opinion {case['advanceNumber']}" + deferred_scraper = partial( + self.scrape_case_page, csNumber=case["caseNumber"] + ) + self.cases.append( { "citation": citation, "name": case["caseTitle"], "docket": case["caseNumber"], "date": case["date"], - "url": "placeholder", + "url": deferred_scraper, + "judge": deferred_scraper, + "lower_court": deferred_scraper, } ) - def fetch_document_link(self, csNumber: str): + def scrape_case_page(self, csNumber: str): """Fetch document url Using case number - make a request to return the case page and @@ -103,18 +109,19 @@ def fetch_document_link(self, csNumber: str): "submitValue": "Search", } content = self.request["session"].post(self.search, data=data).text - slug = fromstring(content).xpath(self.xp)[-1] - return f"https://caseinfo.nvsupremecourt.us{slug}" + html = fromstring(content) + opinion_xpath = "//tr[td[contains(text(), 'Opinion')]]" + opinion_row = html.xpath(opinion_xpath)[-1] + slug = opinion_row.xpath("td/a/@href")[0] - def _get_download_urls(self): - """Get download urls + lower_court_xpath = "//td[text()='Lower Court Case(s):']//following-sibling::td[1]/text()" - :return: List URLs - """ + author_str = opinion_row.xpath("td[3]/text()")[0] + author_match = re.search(r"Author:(?P[\s\w,]+).", author_str) + judge = author_match.group("judge") if author_match else "" - def fetcher(case): - if self.test_mode_enabled(): - return case["url"] - return self.fetch_document_link(case["docket"]) - - return DeferringList(seed=self.cases, fetcher=fetcher) + return { + "url": f"https://caseinfo.nvsupremecourt.us{slug}", + "lower_court": html.xpath(lower_court_xpath)[0], + "judge": judge, + } diff --git a/juriscraper/schemas/scraper_schema.py b/juriscraper/schemas/scraper_schema.py new file mode 100644 index 000000000..0843515e7 --- /dev/null +++ b/juriscraper/schemas/scraper_schema.py @@ -0,0 +1,20 @@ +validation_schema = { + "type": "object", + "properties": { + "case_names": {"type": "string"}, + "case_dates": {"type": "string", "format": "date-time"}, + "download_urls": {"type": "string"}, + "precedential_statuses": {"enum": ["Published", "Unpublished"]}, + "blocked_statuses": {"type": "boolean"}, + "date_filed_is_approximate": {"type": "boolean"}, + "citation": {"type": "string"}, + "docket": {"type": "string"}, + }, + "required": [ + "case_dates", + "case_names", + "download_urls", + "precedential_statuses", + "date_filed_is_approximate", + ], +} From b4d8348e1c7aee15063ba10e87093a4a73eb2564 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Fri, 16 Feb 2024 11:42:05 -0500 Subject: [PATCH 02/12] wip --- juriscraper/NewOpinionSite.py | 41 +++---- juriscraper/schemas/scraper_schema.py | 152 +++++++++++++++++++++++++- 2 files changed, 166 insertions(+), 27 deletions(-) diff --git a/juriscraper/NewOpinionSite.py b/juriscraper/NewOpinionSite.py index 3a493dcd1..1c39fd11a 100644 --- a/juriscraper/NewOpinionSite.py +++ b/juriscraper/NewOpinionSite.py @@ -1,6 +1,6 @@ from datetime import datetime from functools import cmp_to_key -from typing import Callable +from typing import Callable, Dict from jsonschema import Draft7Validator, FormatChecker @@ -19,22 +19,18 @@ class NewOpinionSite(AbstractSite): - short_to_full_key = { - "citation": "citations", - "name": "case_names", - "docket": "docket_numbers", - "date": "case_dates", - "url": "download_urls", - "judge": "judges", - "lower_court": "lower_courts", - } - default_fields = { - "date_filed_is_approximate": False, - "blocked_statuses": False, - } - - def __init__(self, cnt=None): - super().__init__() + """ + Inherits from AbstractSite to access methods which have to do + with downloading and processing data from sources + + Overrides methods which have to do with transforming + (converting and cleaning data). The main entry point to this is + `parse` + + Keeps interface compatible + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.cases = [] self.validator = Draft7Validator( @@ -92,13 +88,7 @@ def parse(self): self.get_deferred_values(case) return self - - def fill_default_values(self, case) -> None: - """Required fields""" - for key, default_value in self.default_fields.items(): - if case.get(key) is None: - case[key] = default_value - + def get_deferred_values(self, case) -> None: """Use this function to consume deferred values Deferred values are functions that wait until execution to perform @@ -139,9 +129,10 @@ def get_deferred_values(self, case) -> None: case.update(update_values) @staticmethod - def sort_by_attributes(case, other_case) -> int: + def sort_by_attributes(case: Dict, other_case: Dict) -> int: """Replaces AbstractSite._date_sort Keeping the order of attributes as OpinionSite ensures we have the same order of cases + Order is important because a hash is calculated from ordered case names """ for attr in opinion_site_ordered_attributes: value = case.get(attr) diff --git a/juriscraper/schemas/scraper_schema.py b/juriscraper/schemas/scraper_schema.py index 0843515e7..100a9e56a 100644 --- a/juriscraper/schemas/scraper_schema.py +++ b/juriscraper/schemas/scraper_schema.py @@ -1,12 +1,160 @@ -validation_schema = { +""" +The schemas correspond to Courtlistener's Django models, which mirror +the DB models. These schemas should be updated when the CL models are updated + +We keep the comments on the fields to a minimal. For further documentation +check CL models.py files +""" + +# Citations are usually returned as a string, and parsed by `eyecite` +# Currently, this schema would be used by structured citations returned +# from `extract_from_text` step +# citation.type reference: +# 1 - Federal +# 2 - State +# 3 - State Regional +# 4 - Specialty +# 5 - Scotus Early +# 6 - Lexis +# 7 - West +# 8 - Neutral +citation = { + "type": "object", + "properties": { + "volume": {"type": "integer"}, + "reporter": {"type": "string"}, + "page": {"type": "string"}, + "type": {"enum": [1, 2, 3, 4, 5, 6, 7, 8]}, + }, + "required": ["volume", "reporter", "page", "type"], +} + + +originating_court_information = { + "type": "object", + "properties": { + "docket_number": {"type": "string"}, + "assigned_to_str": {"type": "string"}, + "ordering_judge_str": {"type": "string"}, + "court_reporter": {"type": "string"}, + "date_disposed": {"type": "string", "format": "date-time"}, + "date_filed": {"type": "string", "format": "date-time"}, + "date_judgment": {"type": "string", "format": "date-time"}, + "date_judgment_eod": {"type": "string", "format": "date-time"}, + "date_filed_noa": {"type": "string", "format": "date-time"}, + "date_received_coa": {"type": "string", "format": "date-time"}, + }, +} + + +opinion = { + "type": "object", + "properties": { + "author_str": {"type": "string"}, + "per_curiam": {"type": "boolean"}, + "joined_by_str": {"type": "string"}, + "page_count": {"type": "integer"}, + "download_url": {"type": "string"}, + "type": { + "enum": [ + "010combined", + "015unamimous", + "020lead", + "025plurality", + "030concurrence", + "035concurrenceinpart", + "040dissent", + "050addendum", + "060remittitur", + "070rehearing", + "080onthemerits", + "090onmotiontostrike", + ] + }, + }, + "required": ["download_url"], +} + +# panel -> people_db.Person +# non_participating_judges -> people_db.Person +# source +# citation_count +# docket_id +cluster = { + "type": "object", + "properties": { + "judges": {"type": "string"}, + "date_filed": {"type": "string", "format": "date-time"}, + "date_filed_is_approximate": {"type": "boolean"}, + "case_name_short": {"type": "string"}, + "case_name": {"type": "string"}, + "case_name_full": {"type": "string"}, + "scdb_votes_minority": {"type": "integer"}, + "scdb_votes_majority": {"type": "integer"}, + "scdb_id": {"type": "string"}, + "attorneys": {"type": "string"}, + "procedural_history": {"type": "string"}, + "nature_of_suit": {"type": "string"}, + "posture": {"type": "string"}, + "syllabus": {"type": "string"}, + "headnotes": {"type": "string"}, + "summary": {"type": "string"}, + "history": {"type": "string"}, + "other_dates": {"type": "string"}, + "cross_reference": {"type": "string"}, + "correction": {"type": "string"}, + "date_blocked": {"type": "string", "format": "date-time"}, + "blocked": {"type": "boolean"}, + "arguments": {"type": "string"}, + "headmatter": {"type": "string"}, + "precedential_status": { + "enum": [ + "Published", + "Unpublished", + "Errata", + "Separate", + "In-chambers", + "Relating-to", + "Unknown", + ] + }, + # C stands for "Court Website". Since we are scraping court websites + # This is the only option that we can output + "source": {"enum": ["C"]}, + }, + "required": [ + "date_filed", + "date_filed_is_approximate", + "case_name", + "precedential_status", + ], +} + +docket = { + "type": "object", + "properties": { + # 2 stands for "Scraper". Since we are scraping this is the only + # option to output + "source": {"enum": [2]}, + "court": {"type": "string"}, + "appeal_from_str": {"type": "string"}, + }, + "assigned_to_str": {"type": "string"}, + "referred_to_str": {"type": "string"}, + "panel_str": {"type": "string"}, + "required": [], +} + + +legacy = { "type": "object", "properties": { "case_names": {"type": "string"}, "case_dates": {"type": "string", "format": "date-time"}, "download_urls": {"type": "string"}, "precedential_statuses": {"enum": ["Published", "Unpublished"]}, - "blocked_statuses": {"type": "boolean"}, "date_filed_is_approximate": {"type": "boolean"}, + "blocked_statuses": {"type": "boolean"}, "citation": {"type": "string"}, "docket": {"type": "string"}, }, From 77e2e86854cae9bc55e1c8db9b2c4cee13aaae5e Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Wed, 6 Mar 2024 17:04:19 -0500 Subject: [PATCH 03/12] feat(NewOpinionSite): support returning nested data structures - add jsonschema dependencies - create JSON Schemas for each scraped object, corresponding to courtlistener's Django Models - validate scraped data using JSONSchemaValidator - support nested objects data structures, which allow passing OpinionClusters to caller - nested objects also make it easier to pass previously unscraped objects to CL, like OriginatingCourtInformation - found an easy way to test secondary pages - update `tex` scraper to the new scraper class --- juriscraper/NewOpinionSite.py | 347 +- .../opinions/united_states/state/nev.py | 43 +- .../opinions/united_states/state/tex.py | 204 +- .../opinions/united_states/state/texapp_1.py | 11 + .../united_states/state/texcrimapp.py | 31 + juriscraper/schemas/Citation.json | 14 + juriscraper/schemas/Docket.json | 84 + juriscraper/schemas/Opinion.json | 31 + juriscraper/schemas/OpinionCluster.json | 131 + .../schemas/OriginatingCourtInformation.json | 19 + juriscraper/schemas/__init__.py | 3 + juriscraper/schemas/schema_utils.py | 75 + juriscraper/schemas/scraper_schema.py | 185 - requirements.txt | 3 + sample_caller.py | 104 +- .../united_states/tex_example.compare.json | 210 +- .../opinions/united_states/tex_example.html | 3551 +++++++++-------- .../united_states/tex_subexample_1.html | 1632 ++++++++ .../united_states/tex_subexample_2.html | 2424 +++++++++++ tests/schemas/test_OpinionSchemasTest.py | 17 + 20 files changed, 6921 insertions(+), 2198 deletions(-) create mode 100644 juriscraper/schemas/Citation.json create mode 100644 juriscraper/schemas/Docket.json create mode 100644 juriscraper/schemas/Opinion.json create mode 100644 juriscraper/schemas/OpinionCluster.json create mode 100644 juriscraper/schemas/OriginatingCourtInformation.json create mode 100644 juriscraper/schemas/__init__.py create mode 100644 juriscraper/schemas/schema_utils.py delete mode 100644 juriscraper/schemas/scraper_schema.py create mode 100644 tests/examples/opinions/united_states/tex_subexample_1.html create mode 100644 tests/examples/opinions/united_states/tex_subexample_2.html create mode 100644 tests/schemas/test_OpinionSchemasTest.py diff --git a/juriscraper/NewOpinionSite.py b/juriscraper/NewOpinionSite.py index 70393f1c0..cf4ced8d2 100644 --- a/juriscraper/NewOpinionSite.py +++ b/juriscraper/NewOpinionSite.py @@ -1,42 +1,83 @@ -from datetime import datetime +from datetime import date, datetime from functools import cmp_to_key -from typing import Callable, Dict - -from jsonschema import Draft7Validator, FormatChecker +from typing import Callable, Dict, List, Union from juriscraper.AbstractSite import AbstractSite, logger +from juriscraper.lib.judge_parsers import normalize_judge_string from juriscraper.lib.string_utils import ( + CaseNameTweaker, clean_string, convert_date_string, harmonize, ) -from juriscraper.OpinionSite import OpinionSite -from juriscraper.schemas.scraper_schema import validation_schema - -opinion_site = OpinionSite() -opinion_site_ordered_attributes = opinion_site._all_attrs -del opinion_site +from juriscraper.schemas.schema_utils import SchemaValidator class NewOpinionSite(AbstractSite): """ - Inherits from AbstractSite to access methods which have to do - with downloading and processing data from sources - - Overrides methods which have to do with transforming - (converting and cleaning data). The main entry point to this is - `parse` - - Keeps interface compatible + Inherits from AbstractSite to access downloading and processing methods + + Overrides legacy methods which have to do with data transformation: + converting, cleaning and shaping data + + Validates cleaned cases using JSON Schema Validator + + The main entry point is `parse`. Keeps interface compatible + for courtlistener caller to consume + + Lifecycle of a scrape: + - scrape is handled by inheriting scraper. + For expected attribute naming style see `build_nested_object` docstring + - build nested objects as expected by the schema + - clean values + - propagate values which repeat across objects + - fill default values + - validate against JSON Schema + + Design heuristics: + Automate as much as possible, improve developer experience + Inheriting scraper should concern itself mostly with parsing the + page and assigning proper names to the values, not with building + the nested object expected by CL, nor with manually calling the + cleaning functions depending on the data type, nor with filling + default values """ + expected_content_types = ["application/pdf", "Application/pdf"] + is_cluster_site = True + + # `judges` and `joined_by_str` refer to multiple judges + # if we pass a "symbol"-separated string of multiple judges + # `normalize_judge_string` may missinterpret it depending + # on the "symbol" . It is better that the scraper pass them as lists + + # Ingestion into the DB could be improved by adding extra descriptors + # of the judge's names, flags for full or partial names, a flag + # for "messy" string or only name included + judge_keys = { + "assigned_to_str", + "referred_to_str", + "judges", + "author_str", + "joined_by_str", + "ordering_judge_str", + } + + # For hash building + sort_keys = [ + "case_dates", + "case_names", + "download_urls", + ] + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.cases = [] - self.validator = Draft7Validator( - validation_schema, format_checker=FormatChecker() - ) + self.schema_validator = SchemaValidator() + self.cnt = CaseNameTweaker() + # To be filled by inheriting classes + self.status = "" def __iter__(self): yield from self.cases @@ -50,45 +91,200 @@ def __len__(self) -> int: def parse(self): """Overrides AbstractSite.parse""" if not self.downloader_executed: - # Run the downloader if it hasn't been run already self.html = self._download() - - # Process the available html (optional) self._process_html() self._post_parse() - # Instead of self._check_sanity() - # date type validation, non-empty validation, etc will be done by JSON Schema validator clean_cases = [] for case in self.cases: - clean_case = self.clean_case(case) - self.fill_default_values(clean_case) - - try: - self.validator.validate(clean_case) - except Exception as e: - # TODO: need to write custom validator for case with Deferred values, - # which are functions to be called - if "bound method" not in str(e): - raise e - + clean_case = self.build_nested_object(case) + self.fill_default_and_required_values(clean_case) + self.schema_validator.validate(clean_case.get("Docket")) + self.recast_dates(clean_case) clean_cases.append(clean_case) self.cases = clean_cases self.cases.sort(key=cmp_to_key(self.sort_by_attributes)) - # This is an example of something in Juriscraper that is exclusively used in CL - # and that should be put there - self.case_names = [case["case_names"] for case in self.cases] + # Ordering is important for hash + # Hash will be used by caller to skip downloading binary content if page has + # already been seen + self.case_names = [case["Docket"]["case_name"] for case in self.cases] self._make_hash() - # Only for showing - for case in self.cases: - self.get_deferred_values(case) - return self + def build_nested_object(self, case: Dict) -> Dict: + """Build nested object expected by CL and defined on JSON Schemas + + {Model Name}: {Naming convention} + Docket: field name preceded by "d." + OpinionCluster: field name preceded by "oc." + Opinion: expected to be returned as an object, with key "opinions" + OriginatingCourtInformation: expected to be returned as an object, with key "oci" + + :param case: case as returned by the scraper + + :return: Nested object as expected by CL + """ + cl_obj = {"Docket": {"OpinionCluster": {}}} + + for k, v in case.items(): + clean_value = self.clean_value(k, v) + if not clean_value: + continue + + if "." in k: + obj, key = k.split(".") + if obj == "d": + cl_obj["Docket"][key] = clean_value + elif obj == "oc": + cl_obj["Docket"]["OpinionCluster"][key] = clean_value + elif k == "oci": + cl_obj["Docket"]["OriginatingCourtInformation"] = clean_value + elif k == "opinions": + ops = ( + [clean_value] + if isinstance(clean_value, dict) + else clean_value + ) + cl_obj["Docket"]["OpinionCluster"]["Opinions"] = ops + else: + raise NotImplementedError( + f"Unsupported complex object with key '{k}' {v}" + ) + + return cl_obj + + def recast_dates(self, clean_case: Dict): + """Courtlistener expects Python date objects, which is not + a valid JSON format for the validator + + This is a temporary method until a custom TypeChecker is written + """ + obj_x_date_fields = [ + ( + clean_case["Docket"]["OpinionCluster"], + ["date_filed", "date_blocked"], + ), + ( + clean_case["Docket"], + [ + "date_filed", + "date_terminated", + "date_last_filing", + "date_blocked", + ], + ), + ( + clean_case["Docket"].get("OriginatingCourtInformation", {}), + [ + "date_disposed", + "date_filed", + "date_judgment", + "date_judgment_eod", + "date_filed_noa", + "date_received_coa", + ], + ), + ] + + for obj, date_fields in obj_x_date_fields: + for df in date_fields: + if obj.get(df): + obj[df] = datetime.strptime(obj[df], "%Y-%m-%d").date() + + def clean_value( + self, key: str, value: Union[Dict, List, str] + ) -> Union[Dict, List, str]: + """Clean values recursively + + :param key: field name, used to apply special cleaning functions + :param value: dict, list or string + + :return: preserves the input type of `value` + """ + if isinstance(value, dict): + return {k: self.clean_value(k, v) for k, v in value.items()} + if isinstance(value, list): + return [self.clean_value(key, item) for item in value] + + if value is None or not value: + return + + if key == "download_url": + value = value.strip() + else: + if isinstance(value, str): + if "date" in key: + value = str(convert_date_string(value)) + else: + value = clean_string(value) + elif isinstance(value, datetime): + value = str(value.date()) + elif isinstance(value, date): + value = str(value) + + if key in ["case_name", "docket_number"]: + value = harmonize(value) + elif key in self.judge_keys: + value = normalize_judge_string(value) + value = value[0] + + return value + + def fill_default_and_required_values(self, case: Dict) -> None: + """Fill default values and propagate values shared between objects + + Many default fields are taken from Courtlistener's + cl_scrape_opinions.make_objects + + :param case: nested object + :return None + """ + oc = case["Docket"]["OpinionCluster"] + d = case["Docket"] + + # Default fields + d["source"] = 2 + oc["source"] = "C" + + # Default if not filled + if not oc.get("date_filed_is_approximate"): + oc["date_filed_is_approximate"] = False + if not oc.get("blocked"): + oc["blocked"] = False + if not d.get("blocked"): + d["blocked"] = False + + # imitating cl_scrape_opinions.make_objects + if d.get("blocked") and not d.get("date_blocked"): + d["date_blocked"] = oc["date_blocked"] = date.today() + + if not oc.get("precedential_status"): + oc["precedential_status"] = ( + self.status if self.status else "Unknown" + ) + + # Propagate fields + if not oc.get("case_name") and d.get("case_name"): + oc["case_name"] = d["case_name"] + if not d.get("case_name") and oc.get("case_name"): + oc["case_name"] = d["case_name"] + if not d.get("case_name_short"): + case_name_short = self.cnt.make_case_name_short(d["case_name"]) + d["case_name_short"] = oc["case_name_short"] = case_name_short + + # correct field shapes + if oc.get("judges") and isinstance(oc["judges"], list): + oc["judges"] = ";".join(oc["judges"]) + for op in oc["Opinions"]: + if op.get("joined_by_str") and isinstance( + op["joined_by_str"], list + ): + op["joined_by_str"] = ";".join(op["joined_by_str"]) + def get_deferred_values(self, case) -> None: """Use this function to consume deferred values Deferred values are functions that wait until execution to perform @@ -121,7 +317,7 @@ def get_deferred_values(self, case) -> None: for value in case.values(): if isinstance(value, Callable) and value not in seen_callables: deferred_dict = value() - deferred_dict = self.clean_case(deferred_dict) + # deferred_dict = self.clean_case(deferred_dict) logger.info("Got deferred values %s", str(deferred_dict)) update_values.update(deferred_dict) seen_callables.add(value) @@ -130,13 +326,30 @@ def get_deferred_values(self, case) -> None: @staticmethod def sort_by_attributes(case: Dict, other_case: Dict) -> int: - """Replaces AbstractSite._date_sort + """Replaces AbstractSite._date_sort. Passes as `key` argument to base `sort` + Keeping the order of attributes as OpinionSite ensures we have the same order of cases Order is important because a hash is calculated from ordered case names + + :param case: cleaned case + :param other_case: another cleaned case + :return 1 if first case is greater than second case + 0 if they are equal + -1 if first case is less than second """ - for attr in opinion_site_ordered_attributes: - value = case.get(attr) - other_value = other_case.get(attr) + oc = case["Docket"]["OpinionCluster"] + other_oc = other_case["Docket"]["OpinionCluster"] + + for index in range(3): + if index == 0: + value = oc["date_filed"] + other_value = other_oc["date_filed"] + elif index == 1: + value = case["Docket"]["case_name"] + other_value = other_case["Docket"]["case_name"] + elif index == 2: + value = oc["Opinions"][0]["download_url"] + other_value = other_oc["Opinions"][0]["download_url"] if value is None and other_value is None: continue @@ -146,38 +359,18 @@ def sort_by_attributes(case: Dict, other_case: Dict) -> int: return -1 if value == other_value: - return 0 + continue elif value > other_value: return 1 else: return -1 - @classmethod - def clean_case(cls, case) -> None: - """Replaces AbstractSite._clean_attributes + return 0 - Clean case dict in place - """ - clean_case = {} + def extract_from_text(self, scraped_text): + """Pass scraped text into function and return data as a dictionary - for key, value in case.items(): - if key == "download_urls": - value = value.strip() - else: - if "date" in key: - value = str(convert_date_string(value)) - elif isinstance(value, datetime): - value = str(value.date()) - elif isinstance(value, str): - value = clean_string(value) - - if key in ["case_names", "docket_numbers"]: - value = harmonize(value) - - clean_key = cls.short_to_full_key.get(key, key) - clean_case[clean_key] = value - - return clean_case - - def make_nested_object(self): - return {"Docket": {"OpinionCluster": {"Opinions": []}}} + :param opinion_text: Text of scraped content + :return: dictionary of information + """ + return {} diff --git a/juriscraper/opinions/united_states/state/nev.py b/juriscraper/opinions/united_states/state/nev.py index 827fc83ee..fc78c0665 100644 --- a/juriscraper/opinions/united_states/state/nev.py +++ b/juriscraper/opinions/united_states/state/nev.py @@ -7,20 +7,20 @@ import json -import re -from functools import partial from lxml.html import fromstring -from juriscraper.NewOpinionSite import NewOpinionSite +from juriscraper.DeferringList import DeferringList +from juriscraper.OpinionSiteLinear import OpinionSiteLinear -class Site(NewOpinionSite): +class Site(OpinionSiteLinear): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ self.url = "https://publicaccess.nvsupremecourt.us/WebSupplementalAPI/api/AdvanceOpinions" self.search = "https://caseinfo.nvsupremecourt.us/public/caseSearch.do" + self.xp = "//tr[td[contains(text(), 'Opinion')]]/td/a/@href" self.status = "Published" self.court_code = "10001" self.headers = { @@ -72,23 +72,17 @@ def _process_html(self): for case in self.filter_cases(): vol = int(case["date"].split("-")[0]) - 1884 citation = f"{vol} Nev., Advance Opinion {case['advanceNumber']}" - deferred_scraper = partial( - self.scrape_case_page, csNumber=case["caseNumber"] - ) - self.cases.append( { "citation": citation, "name": case["caseTitle"], "docket": case["caseNumber"], "date": case["date"], - "url": deferred_scraper, - "judge": deferred_scraper, - "lower_court": deferred_scraper, + "url": "placeholder", } ) - def scrape_case_page(self, csNumber: str): + def fetch_document_link(self, csNumber: str): """Fetch document url Using case number - make a request to return the case page and @@ -111,19 +105,18 @@ def scrape_case_page(self, csNumber: str): "submitValue": "Search", } content = self.request["session"].post(self.search, data=data).text - html = fromstring(content) - opinion_xpath = "//tr[td[contains(text(), 'Opinion')]]" - opinion_row = html.xpath(opinion_xpath)[-1] - slug = opinion_row.xpath("td/a/@href")[0] + slug = fromstring(content).xpath(self.xp)[-1] + return f"https://caseinfo.nvsupremecourt.us{slug}" - lower_court_xpath = "//td[text()='Lower Court Case(s):']//following-sibling::td[1]/text()" + def _get_download_urls(self): + """Get download urls - author_str = opinion_row.xpath("td[3]/text()")[0] - author_match = re.search(r"Author:(?P[\s\w,]+).", author_str) - judge = author_match.group("judge") if author_match else "" + :return: List URLs + """ - return { - "url": f"https://caseinfo.nvsupremecourt.us{slug}", - "lower_court": html.xpath(lower_court_xpath)[0], - "judge": judge, - } + def fetcher(case): + if self.test_mode_enabled(): + return case["url"] + return self.fetch_document_link(case["docket"]) + + return DeferringList(seed=self.cases, fetcher=fetcher) diff --git a/juriscraper/opinions/united_states/state/tex.py b/juriscraper/opinions/united_states/state/tex.py index e21331135..639ba76ab 100644 --- a/juriscraper/opinions/united_states/state/tex.py +++ b/juriscraper/opinions/united_states/state/tex.py @@ -16,8 +16,11 @@ # - 2015-08-27: Updated by Andrei Chelaru to add explicit waits # - 2021-12-28: Updated by flooie to remove selenium. +import re from datetime import date, timedelta -from typing import Optional, Dict +from typing import Dict, List + +from lxml import html as lxmlHTML from juriscraper.AbstractSite import logger from juriscraper.lib.string_utils import titlecase @@ -28,13 +31,13 @@ class Site(NewOpinionSite): oci_mapper = { # Court of Appelas Information "COA Case": "docket_number", - "COA District": "appeal_from_str", + "COA District": "origin_court", "COA Justice": "assigned_to_str", "Opinion Cite": "citation", # may contain date data # Trial Court Information "Court Case": "docket_number", "Court Judge": "assigned_to_str", - "Court": "appeal_from_str", + "Court": "origin_court", "Reporter": "court_reporter", ## Extra available fields: "Punishment", "County" } @@ -49,6 +52,7 @@ def __init__(self, *args, **kwargs): self.checkbox = 0 self.status = "Published" self.url = "https://search.txcourts.gov/CaseSearch.aspx?coa=cossup" + self.seen_case_urls = set() def _set_parameters( self, @@ -107,17 +111,32 @@ def _process_html(self) -> None: self.html = super()._download() for row in self.html.xpath(self.rows_xpath): - parsed = self.parse_case_page(row.xpath(".//a")[2].get("href")) + # `Document search` page returns OpinionClusters separated, + # each opinion in a single row. We keep track to skip if we already parsed the case + case_url = row.xpath(".//a")[2].get("href") + if case_url in self.seen_case_urls: + continue + self.seen_case_urls.add(case_url) + + parsed = self.parse_case_page(case_url) parsed["oc.date_filed"] = row.xpath("td[2]")[0].text_content() parsed["d.docket_number"] = row.xpath("td[5]")[0].text_content() - + if not parsed.get("opinions"): opinion = {"download_url": row.xpath(".//a")[1].get("href")} parsed["opinions"] = [opinion] - + else: + judges, dispositions = [], [] + for op in parsed["opinions"]: + judges.extend(op.get("joined_by_str", [])) + judges.append(op.get("author_str")) + dispositions.append(op.pop("disposition", "")) + parsed["oc.judges"] = list(filter(bool, judges)) + parsed["oc.disposition"] = sorted(dispositions, key=len)[-1] + self.cases.append(parsed) - def parse_case_page(self, link: str): + def parse_case_page(self, link: str) -> Dict: """Parses the case page Usually we would defer getting extra data until dup checking @@ -125,66 +144,92 @@ def parse_case_page(self, link: str): page, which is need for site hash computing, which cannot be deferred :param link: url of the case page + :return: parsed case dictionary """ parsed = {} if self.test_mode_enabled(): - return parsed + # Support "sub" pages on test_ScraperExampleTest by modifying + # the href attribute of the case page, to point to the proper local file + self.url = link + self._request_url_mock(link) + html = self._return_response_text_object() + else: + html = self._get_html_tree_by_url(link) - html = self._get_html_tree_by_url(link) parsed["d.case_name"] = self.get_name(html, link) - parsed["d.date_filed"] = self.get_by_label_from_case_page(html, "Date Filed:") - + parsed["d.date_filed"] = self.get_by_label_from_case_page( + html, "Date Filed:" + ) + # For example: # on texapp: "Protective Order", "Contract", "Personal Injury" # on tex: "Petition for Review originally filed as 53.7(f)" - parsed["oc.nature_of_suit"] = self.get_by_label_from_case_page(html, "Case Type:") - self.get_opinions(html, parsed) - + parsed["oc.nature_of_suit"] = self.get_by_label_from_case_page( + html, "Case Type:" + ) + parsed["opinions"] = self.get_opinions(html) + coa_id, trial_id = ( "ctl00_ContentPlaceHolder1_divCOAInfo", "ctl00_ContentPlaceHolder1_pnlTrialCourt2", ) - if self.checkbox == 0: + oci = None + if self.checkbox in [0, 1]: oci = self.parse_originating_court_info(html, coa_id) - else: + if oci: + parsed["d.appeal_from_str"] = oci.pop("origin_court", "") + if parsed["d.appeal_from_str"]: + parsed["d.appeal_from_id"] = "texapp" + if not oci: oci = self.parse_originating_court_info(html, trial_id) + parsed["d.appeal_from_str"] = oci.pop("origin_court", "") + parsed["oci"] = oci - # TODO: we could extract people_db models: Party, Attorneys, PartyType + # Further work: + # we could extract people_db models: Party, Attorneys, PartyType return parsed - def parse_originating_court_info(self, html, table_id): - """Parses OCI section + def parse_originating_court_info( + self, html: lxmlHTML, table_id: str + ) -> Dict: + """Parses Originating Court Information section - Some Supreme Case cases have OCI for both Appeal and Trial court - In Courtlistener, OCI and Docket have a 1-1 relation - So we may only pick one + Some Supreme Court or texcrimapp cases have OCI for both Appeals + and Trial courts. In Courtlistener, OCI and Docket have a 1-1 relation, + so we can only pick one Example: https://search.txcourts.gov/Case.aspx?cn=22-0431&coa=cossup + + :param html: object for aplying selectors + :table_id: either COA or Trial Courts information tables + + :return: dict with parsed OCI data """ labels = html.xpath( - f"//div[@id='{table_id}']//div[class='span2']/label/text()" - ) - values = html.xpath( - f"//div[@id='{table_id}']//div[class='span4']/text()[last()]" + f"//div[@id='{table_id}']//div[@class='span2']/label/text()" ) + values = html.xpath(f"//div[@id='{table_id}']//div[@class='span4']") data = {} for lab, val in zip(labels, values): key = self.oci_mapper.get(lab.strip()) + val = ( + val.xpath("div/a/text()")[0] + if val.xpath("div/a") + else val.xpath("text()[last()]")[0] + ) if not key or not val.strip(): continue - data[lab] = val + data[key] = val.strip() if "COA" in table_id: - if data.get("appeal_from_str"): - data["appeal_from"] = "texapp" if data.get("citation") and "," in data["citation"]: _, data["date_judgment"] = data.pop("citation").split(",") return data - def get_name(self, html, link: str) -> Optional[str]: + def get_name(self, html: lxmlHTML, link: str) -> str: """Abstract out the case name from the case page.""" try: plaintiff = self.get_by_label_from_case_page(html, "Style:") @@ -203,36 +248,75 @@ def get_name(self, html, link: str) -> Optional[str]: logger.warning(f"No title or defendant found for {self.url}") return "" - def get_opinions(self, html, parsed): - # In texcrimapp, opinion is in upper case OPINION ISSD - disp = "//div[contains(text(), 'Case Events')]//td[contains(text(), 'opinion')]/following-sibling::td[1]/text()" - if html.xpath(disp): - parsed["oc.disposition"] = html.xpath(disp)[0] - - # 2 Opinions: main and concurring - # https://search.txcourts.gov/Case.aspx?cn=PD-0984-19&coa=coscca - - # 3 opinions - # https://search.txcourts.gov/Case.aspx?cn=PD-0037-22&coa=coscca - - # supreme court has 'remarks' field, which may have per_curiam field - # https://search.txcourts.gov/Case.aspx?cn=22-0424&coa=cossup - - # https://search.txcourts.gov/Case.aspx?cn=23-0390&coa=cossup - # structure is not so clear - - # TODO - # build object - # clean values - # propagate shared values - # fill defaults - # validate JSON - # rebuild examples, including the extra page - # transform another priority source to see how it looks - - - def get_by_label_from_case_page(self, html, label:str) -> str: + def get_opinions(self, html: lxmlHTML) -> List[Dict]: + """Parses opinions present in case page. + If we fail to find any opinions here, the scraper will default to using + the URL in the search results page + + `tex`, `texcrimapp` and `texapp_*` differ on how opinions are presented, + so this method is overridden in inheriting classes so as to not + overcrowd it with all the if clauses + + Examples: + + Cluster with 3 opinions (Supreme Court) + https://search.txcourts.gov/Case.aspx?cn=22-0242&coa=cossup + + Counter Examples: + 'Opinion' text does not appear on 'Event Type' column; but there is indeed an opinion + https://search.txcourts.gov/Case.aspx?cn=21-1008&coa=cossup + + :param html: page's HTML object + :return List of opinions + """ + opinions = [] + opinion_xpath = "//div[div[contains(text(), 'Case Events')]]//tr[td[contains(text(), 'pinion issu')]]" + for opinion in html.xpath(opinion_xpath): + op = {} + link_xpath = opinion.xpath(".//td//a/@href") + if not link_xpath: + continue + op["download_url"] = link_xpath[0] + op["disposition"] = opinion.xpath(".//td[3]/text()")[0] + + # Remarks may contain Per Curiam flag. Does not exist in texcrim + remark = opinion.xpath(".//td[4]/text()")[0] + if "per curiam" in remark.lower(): + op["per_curiam"] = True + + author_match = re.search( + r"(?P[A-Z][a-z-]+)\s+filed\s+a", remark + ) + if author_match: + op["author_str"] = author_match.group("judge") + + joined_match = re.findall( + r"Justice\s+(?P[A-Z][a-z-]+) (?!filed)(?!delivered)", + remark, + ) + if joined_match: + op["joined_by_str"] = joined_match + + op_type = opinion.xpath(".//td[2]/text()")[0].lower() + if "concur" in op_type: + op["type"] = "030concurrence" + elif "diss" in op_type: + op_type = "040dissent" + else: + op_type = "010combined" + + opinions.append(op) + + return opinions + + def get_by_label_from_case_page(self, html: lxmlHTML, label: str) -> str: + """Selects from first / main table of case page + + :param html: HTML object that supports selection + :param label: label to be used in selector + + :return case page string value + """ xpath = f'//label[contains(text(), "{label}")]/parent::div/following-sibling::div/text()' value = html.xpath(xpath) return value[0].strip() if value else "" - \ No newline at end of file diff --git a/juriscraper/opinions/united_states/state/texapp_1.py b/juriscraper/opinions/united_states/state/texapp_1.py index 6c4311037..01633bda0 100644 --- a/juriscraper/opinions/united_states/state/texapp_1.py +++ b/juriscraper/opinions/united_states/state/texapp_1.py @@ -15,3 +15,14 @@ def __init__(self, *args, **kwargs): self.court_id = self.__module__ self.court_name = "capp_1" self.checkbox = 2 + + def get_opinions(self, html): + """ + Cluster with 2 opinions (Court of Appeals) + https://search.txcourts.gov/Case.aspx?cn=02-22-00347-CV&coa=coa02 + Counter Examples: + + 'opinion' in text, but it is actually a letter + https://search.txcourts.gov/Case.aspx?cn=01-23-00777-CR&coa=coa01 + """ + raise NotImplementedError("...") diff --git a/juriscraper/opinions/united_states/state/texcrimapp.py b/juriscraper/opinions/united_states/state/texcrimapp.py index 340663265..5315f8cd2 100644 --- a/juriscraper/opinions/united_states/state/texcrimapp.py +++ b/juriscraper/opinions/united_states/state/texcrimapp.py @@ -15,3 +15,34 @@ def __init__(self, *args, **kwargs): self.court_id = self.__module__ self.court_name = "ccrimapp" self.checkbox = 1 + + def get_opinions(self, html): + """ + Cluster with 3 opinions (texcrimapp) + https://search.txcourts.gov/Case.aspx?cn=PD-0037-22&coa=coscca + + 2 Opinions: main and concurring + https://search.txcourts.gov/Case.aspx?cn=PD-0984-19&coa=coscca + """ + opinions = [] + opinion_xpath = "//div[contains(text(), 'Case Events')]//tr[td[text()='OPINION ISSD')]]" + for opinion in html.xpath(opinion_xpath): + op = {} + link_xpath = opinion.xpath(".//td//a/@href") + if not link_xpath: + continue + + op["download_url"] = link_xpath[0] + op["disposition"] = opinion.xpath(".//td[3]/text()")[0] + + op_type = opinion.xpath(".//td//tr[a]/td[2]/text()")[0] + if op_type == "Original": + op["type"] = "010combined" + elif op_type == "Dissenting": + op_type["type"] = "040dissent" + elif op_type == "Concurring": + op_type["type"] = "030concurrence" + + opinions.append(op) + + return opinions diff --git a/juriscraper/schemas/Citation.json b/juriscraper/schemas/Citation.json new file mode 100644 index 000000000..cdd7c6729 --- /dev/null +++ b/juriscraper/schemas/Citation.json @@ -0,0 +1,14 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://courtlistener.com/schemas/Citation.json", + "title": "Citation", + "description":"", + "type": "object", + "properties": { + "volume": {"type": "integer"}, + "reporter": {"type": "string"}, + "page": {"type": "string"}, + "type": {"enum": [1, 2, 3, 4, 5, 6, 7, 8], "description":"1 - Federal; 2 - State; 3 - State Regional; 4 - Specialty; 5 - Scotus Early; 6 - Lexis; 7 - West; 8 - Neutral"} + }, + "required": ["volume", "reporter", "page", "type"] +} diff --git a/juriscraper/schemas/Docket.json b/juriscraper/schemas/Docket.json new file mode 100644 index 000000000..f6181c17a --- /dev/null +++ b/juriscraper/schemas/Docket.json @@ -0,0 +1,84 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://courtlistener.com/schemas/Docket.json", + "title": "Docket", + "description": "A Courtlistener Docket object. The schemas correspond to Courtlistener's Django models, which mirror the DB models. These schemas should be updated when the CL models are updated. We keep the comments on the fields to a minimal. For further documentation check CL models.py files.", + "type": "object", + "properties": { + "source": { + "enum": [ + 2 + ] + }, + "court": { + "type": "string" + }, + "appeal_from_str": { + "type": "string" + }, + "appeal_from_id": { + "type": "string", + "description": "Court ID in Courtlistener, which is a lowercase abbreviation of the court name" + }, + "assigned_to_str": { + "type": "string" + }, + "referred_to_str": { + "type": "string" + }, + "panel_str": { + "type": "string" + }, + "date_filed": { + "type": "string", + "format": "date-time" + }, + "date_terminated": { + "type": "string", + "format": "date-time" + }, + "date_last_filing": { + "type": "string", + "format": "date-time" + }, + "case_name": { + "type": "string" + }, + "case_name_short": { + "type": "string" + }, + "case_name_full": { + "type": "string" + }, + "docket_number": { + "type": "string" + }, + "cause": { + "type": "string" + }, + "jury_demand": { + "type": "string" + }, + "appellate_fee_status": { + "type": "string" + }, + "date_blocked": { + "type": "string", + "format": "date-time" + }, + "blocked": { + "type": "boolean" + }, + "OriginatingCourtInformation": { + "$ref": "https://courtlistener.com/schemas/OriginatingCourtInformation.json" + }, + "OpinionCluster": { + "$ref": "https://courtlistener.com/schemas/OpinionCluster.json" + } + }, + "required": [ + "docket_number", + "case_name", + "OpinionCluster" + ] +} \ No newline at end of file diff --git a/juriscraper/schemas/Opinion.json b/juriscraper/schemas/Opinion.json new file mode 100644 index 000000000..5f4fcde15 --- /dev/null +++ b/juriscraper/schemas/Opinion.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://courtlistener.com/schemas/Opinion.json", + "title": "Opinion", + "description":"", + "type": "object", + "properties": { + "author_str": {"type": "string"}, + "per_curiam": {"type": "boolean"}, + "joined_by_str": {"type": "string"}, + "page_count": {"type": "integer"}, + "download_url": {"type": "string"}, + "type": { + "enum": [ + "010combined", + "015unamimous", + "020lead", + "025plurality", + "030concurrence", + "035concurrenceinpart", + "040dissent", + "050addendum", + "060remittitur", + "070rehearing", + "080onthemerits", + "090onmotiontostrike" + ] + } + }, + "required": ["download_url"] +} diff --git a/juriscraper/schemas/OpinionCluster.json b/juriscraper/schemas/OpinionCluster.json new file mode 100644 index 000000000..20c8127af --- /dev/null +++ b/juriscraper/schemas/OpinionCluster.json @@ -0,0 +1,131 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://courtlistener.com/schemas/OpinionCluster.json", + "title": "OpinionCluster", + "description": "", + "type": "object", + "properties": { + "judges": { + "type": "string" + }, + "date_filed": { + "type": "string", + "format": "date-time" + }, + "date_filed_is_approximate": { + "type": "boolean" + }, + "case_name_short": { + "type": "string" + }, + "case_name": { + "type": "string" + }, + "case_name_full": { + "type": "string" + }, + "scdb_votes_minority": { + "type": "integer" + }, + "scdb_votes_majority": { + "type": "integer" + }, + "scdb_id": { + "type": "string" + }, + "attorneys": { + "type": "string" + }, + "procedural_history": { + "type": "string" + }, + "nature_of_suit": { + "type": "string" + }, + "disposition": { + "type": "string" + }, + "posture": { + "type": "string" + }, + "syllabus": { + "type": "string" + }, + "headnotes": { + "type": "string" + }, + "summary": { + "type": "string" + }, + "history": { + "type": "string" + }, + "other_dates": { + "type": "string" + }, + "cross_reference": { + "type": "string" + }, + "correction": { + "type": "string" + }, + "date_blocked": { + "type": "string", + "format": "date-time" + }, + "blocked": { + "type": "boolean" + }, + "arguments": { + "type": "string" + }, + "headmatter": { + "type": "string" + }, + "precedential_status": { + "enum": [ + "Published", + "Unpublished", + "Errata", + "Separate", + "In-chambers", + "Relating-to", + "Unknown" + ] + }, + "source": { + "enum": [ + "C" + ] + }, + "Opinions": { + "type": "array", + "items": [ + { + "$ref": "https://courtlistener.com/schemas/Opinion.json" + } + ] + }, + "Citations": { + "type": "array", + "items": [ + { + "$ref": "https://courtlistener.com/schemas/Citation.json" + } + ] + }, + "citation_strings": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "date_filed", + "date_filed_is_approximate", + "case_name", + "precedential_status", + "Opinions" + ] +} \ No newline at end of file diff --git a/juriscraper/schemas/OriginatingCourtInformation.json b/juriscraper/schemas/OriginatingCourtInformation.json new file mode 100644 index 000000000..5d3d40e26 --- /dev/null +++ b/juriscraper/schemas/OriginatingCourtInformation.json @@ -0,0 +1,19 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://courtlistener.com/schemas/OriginatingCourtInformation.json", + "title": "OriginatingCourtInformation", + "description":"", + "type": "object", + "properties": { + "docket_number": {"type": "string"}, + "assigned_to_str": {"type": "string"}, + "ordering_judge_str": {"type": "string"}, + "court_reporter": {"type": "string"}, + "date_disposed": {"type": "string", "format": "date-time"}, + "date_filed": {"type": "string", "format": "date-time"}, + "date_judgment": {"type": "string", "format": "date-time"}, + "date_judgment_eod": {"type": "string", "format": "date-time"}, + "date_filed_noa": {"type": "string", "format": "date-time"}, + "date_received_coa": {"type": "string", "format": "date-time"} + } +} \ No newline at end of file diff --git a/juriscraper/schemas/__init__.py b/juriscraper/schemas/__init__.py new file mode 100644 index 000000000..5f830bf15 --- /dev/null +++ b/juriscraper/schemas/__init__.py @@ -0,0 +1,3 @@ +__all__ = [ + "schema_utils", +] diff --git a/juriscraper/schemas/schema_utils.py b/juriscraper/schemas/schema_utils.py new file mode 100644 index 000000000..f5170e7db --- /dev/null +++ b/juriscraper/schemas/schema_utils.py @@ -0,0 +1,75 @@ +import json +from pathlib import Path +from typing import Dict, List, Union + +import requests +from jsonschema import Draft7Validator +from referencing import Registry, Resource, exceptions + + +def retrieve_from_filesystem(uri: str): + """Resolve the schema URLs to Juriscraper's folder organization + + :param uri: URL value of $ref / $id in a schema + :return: content of schema + """ + schema_folder = Path(__file__).parent + path = schema_folder / Path( + uri.replace("https://courtlistener.com/schemas/", "") + ) + contents = json.loads(path.read_text()) + + return Resource.from_contents(contents) + + +def retrieve_from_github(uri: str): + """Retrieve JSON from github + + Previous function won't work with docket + """ + gh = "https://raw.githubusercontent.com/grossir/juriscraper/new_opinion_site_subclass/juriscraper/schemas/" + uri = uri.replace("https://courtlistener.com/schemas/", gh) + + return Resource.from_contents(requests.get(uri).json()) + + +class SchemaValidator: + """ + The JSON schemas map closely to Courtlistener's Django Models + They are missing "Reference" or foreign key fields, which need to be + looked up in the DB and will be built up using some string values + For example: OpinionCluster.judges, or Opinion.author_str + + Some extra fields: + OpinionCluster.citation_strings + Citations are parsed using `eyecite` on the caller side, so we pass + them as strings if we find them. Some citations may be passed as + proper objects, when the parsing is straightforward + + About types: + JSON format does not support "date" or "datetime" types, but it can + enforce the "date" format on a string + Luckily, the date-like values we collect are all "dates", which reduces + the complexity of also supporting "datetime" + """ + + def __init__(self): + """ + $id and $ref in the JSON Schema are URLs. + Since we are serving these from files, we need to create a special Registry + """ + docket_schema_id = "https://courtlistener.com/schemas/Docket.json" + try: + registry = Registry(retrieve=retrieve_from_filesystem) + docket_schema = registry.get_or_retrieve(docket_schema_id) + except exceptions.Unretrievable: + registry = Registry(retrieve=retrieve_from_github) + docket_schema = registry.get_or_retrieve(docket_schema_id) + + Draft7Validator.check_schema(docket_schema.value.contents) + self.validator = Draft7Validator( + docket_schema.value.contents, registry=registry + ) + + def validate(self, obj: Union[Dict, List]): + self.validator.validate(obj) diff --git a/juriscraper/schemas/scraper_schema.py b/juriscraper/schemas/scraper_schema.py deleted file mode 100644 index 70809eae1..000000000 --- a/juriscraper/schemas/scraper_schema.py +++ /dev/null @@ -1,185 +0,0 @@ -""" -The schemas correspond to Courtlistener's Django models, which mirror -the DB models. These schemas should be updated when the CL models are updated - -We keep the comments on the fields to a minimal. For further documentation -check CL models.py files -""" - -# Citations are usually returned as a string, and parsed by `eyecite` -# Currently, this schema would be used by structured citations returned -# from `extract_from_text` step -# citation.type reference: -# 1 - Federal -# 2 - State -# 3 - State Regional -# 4 - Specialty -# 5 - Scotus Early -# 6 - Lexis -# 7 - West -# 8 - Neutral -citation = { - "type": "object", - "properties": { - "volume": {"type": "integer"}, - "reporter": {"type": "string"}, - "page": {"type": "string"}, - "type": {"enum": [1, 2, 3, 4, 5, 6, 7, 8]}, - }, - "required": ["volume", "reporter", "page", "type"], -} - - -originating_court_information = { - "type": "object", - "properties": { - "docket_number": {"type": "string"}, - "assigned_to_str": {"type": "string"}, - "ordering_judge_str": {"type": "string"}, - "court_reporter": {"type": "string"}, - "date_disposed": {"type": "string", "format": "date-time"}, - "date_filed": {"type": "string", "format": "date-time"}, - "date_judgment": {"type": "string", "format": "date-time"}, - "date_judgment_eod": {"type": "string", "format": "date-time"}, - "date_filed_noa": {"type": "string", "format": "date-time"}, - "date_received_coa": {"type": "string", "format": "date-time"}, - }, -} - - -opinion = { - "type": "object", - "properties": { - "author_str": {"type": "string"}, - "per_curiam": {"type": "boolean"}, - "joined_by_str": {"type": "string"}, - "page_count": {"type": "integer"}, - "download_url": {"type": "string"}, - "type": { - "enum": [ - "010combined", - "015unamimous", - "020lead", - "025plurality", - "030concurrence", - "035concurrenceinpart", - "040dissent", - "050addendum", - "060remittitur", - "070rehearing", - "080onthemerits", - "090onmotiontostrike", - ] - }, - }, - "required": ["download_url"], -} - -# panel -> people_db.Person -# non_participating_judges -> people_db.Person -# source -# citation_count -# docket_id -cluster = { - "type": "object", - "properties": { - "judges": {"type": "string"}, - "date_filed": {"type": "string", "format": "date-time"}, - "date_filed_is_approximate": {"type": "boolean"}, - "case_name_short": {"type": "string"}, - "case_name": {"type": "string"}, - "case_name_full": {"type": "string"}, - "scdb_votes_minority": {"type": "integer"}, - "scdb_votes_majority": {"type": "integer"}, - "scdb_id": {"type": "string"}, - "attorneys": {"type": "string"}, - "procedural_history": {"type": "string"}, - "nature_of_suit": {"type": "string"}, - "posture": {"type": "string"}, - "syllabus": {"type": "string"}, - "headnotes": {"type": "string"}, - "summary": {"type": "string"}, - "history": {"type": "string"}, - "other_dates": {"type": "string"}, - "cross_reference": {"type": "string"}, - "correction": {"type": "string"}, - "date_blocked": {"type": "string", "format": "date-time"}, - "blocked": {"type": "boolean"}, - "arguments": {"type": "string"}, - "headmatter": {"type": "string"}, - "precedential_status": { - "enum": [ - "Published", - "Unpublished", - "Errata", - "Separate", - "In-chambers", - "Relating-to", - "Unknown", - ] - }, - # C stands for "Court Website". Since we are scraping court websites - # This is the only option that we can output - "source": {"enum": ["C"]}, - }, - "required": [ - "date_filed", - "date_filed_is_approximate", - "case_name", - "precedential_status", - ], -} - -docket = { - "type": "object", - "properties": { - # 2 stands for "Scraper". Since we are scraping this is the only - # option to output - "source": {"enum": [2]}, - "court": {"type": "string"}, - "appeal_from_str": {"type": "string"}, - "appeal_from": {"type": "string"}, - "assigned_to_str": {"type": "string"}, - "referred_to_str": {"type": "string"}, - "panel_str": {"type": "string"}, - "date_filed": {"type": "string", "format": "date-time"}, - "date_terminated": {"type": "string", "format": "date-time"}, - "date_last_filing": {"type": "string", "format": "date-time"}, - "case_name": {"type": "string"}, - "case_name_short": {"type": "string"}, - "case_name_full": {"type": "string"}, - "docket_number": {"type": "string"}, - "cause": {"type": "string"}, - "jury_demand": {"type": "string"}, - "appellate_fee_status": {"type": "string"}, - "date_blocked": {"type": "string", "format": "date-time"}, - "blocked": {"type": "boolean"}, - "originating_court_information": originating_court_information, - }, - "required": [ - "docket_number", - "case_name" - ], -} - - -legacy = { - "type": "object", - "properties": { - "case_names": {"type": "string"}, - "case_dates": {"type": "string", "format": "date-time"}, - "download_urls": {"type": "string"}, - "precedential_statuses": {"enum": ["Published", "Unpublished"]}, - "date_filed_is_approximate": {"type": "boolean"}, - "blocked_statuses": {"type": "boolean"}, - "citation": {"type": "string"}, - "docket": {"type": "string"}, - }, - "required": [ - "case_dates", - "case_names", - "download_urls", - "precedential_statuses", - "date_filed_is_approximate", - ], -} diff --git a/requirements.txt b/requirements.txt index e4f8477c1..2baa77e11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,6 @@ python-dateutil>=2.8.2 requests>=2.20.0 selenium>=4.9.1 tldextract +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 +referencing==0.32.1 \ No newline at end of file diff --git a/sample_caller.py b/sample_caller.py index 749d7bc54..7360b019e 100755 --- a/sample_caller.py +++ b/sample_caller.py @@ -42,61 +42,67 @@ def scrape_court(site, binaries=False): exceptions = defaultdict(list) for item in site: # First turn the download urls into a utf-8 byte string - item_download_urls = item["download_urls"].encode("utf-8") - # Percent encode URLs (this is a Python wart) - download_url = parse.quote( - item_download_urls, safe="%/:=&?~#+!$,;'@()*[]" - ) - - if binaries: - try: - opener = request.build_opener() - for cookie_dict in site.cookies: - opener.addheaders.append( - ( - "Cookie", - f"{cookie_dict['name']}={cookie_dict['value']}", + if getattr(site, "is_cluster_site", False): + item_download_urls = [ + op["download_url"] + for op in item["Docket"]["OpinionCluster"]["Opinions"] + ] + else: + item_download_urls = [item["download_urls"].encode("utf-8")] + + for url in item_download_urls: + # Percent encode URLs (this is a Python wart) + download_url = parse.quote(url, safe="%/:=&?~#+!$,;'@()*[]") + + if binaries: + try: + opener = request.build_opener() + for cookie_dict in site.cookies: + opener.addheaders.append( + ( + "Cookie", + f"{cookie_dict['name']}={cookie_dict['value']}", + ) ) - ) - r = opener.open(download_url) - expected_content_types = site.expected_content_types - response_type = r.headers.get("Content-Type", "").lower() - # test for expected content type response - if ( - expected_content_types - and response_type not in expected_content_types - ): + r = opener.open(download_url) + expected_content_types = site.expected_content_types + response_type = r.headers.get("Content-Type", "").lower() + # test for expected content type response + if ( + expected_content_types + and response_type not in expected_content_types + ): + exceptions["DownloadingError"].append(download_url) + v_print(3, f"DownloadingError: {download_url}") + v_print(3, traceback.format_exc()) + data = r.read() + + # test for empty files (thank you CA1) + if len(data) == 0: + exceptions["EmptyFileError"].append(download_url) + v_print(3, f"EmptyFileError: {download_url}") + v_print(3, traceback.format_exc()) + continue + except Exception: exceptions["DownloadingError"].append(download_url) v_print(3, f"DownloadingError: {download_url}") v_print(3, traceback.format_exc()) - data = r.read() - - # test for empty files (thank you CA1) - if len(data) == 0: - exceptions["EmptyFileError"].append(download_url) - v_print(3, f"EmptyFileError: {download_url}") - v_print(3, traceback.format_exc()) continue - except Exception: - exceptions["DownloadingError"].append(download_url) - v_print(3, f"DownloadingError: {download_url}") - v_print(3, traceback.format_exc()) - continue - # Extract the data using e.g. antiword, pdftotext, etc., then - # clean it up. - data = extract_doc_content(data) - data = site.cleanup_content(data) - - # Normally, you'd do your save routines here... - v_print(1, "\nAdding new item:") - for k, v in item.items(): - if isinstance(v, str): - value = trunc(v, 200, ellipsis="...") - v_print(1, f' {k}: "{value}"') - else: - # Dates and such... - v_print(1, f" {k}: {v}") + # Extract the data using e.g. antiword, pdftotext, etc., then + # clean it up. + data = extract_doc_content(data) + data = site.cleanup_content(data) + + # Normally, you'd do your save routines here... + v_print(1, "\nAdding new item:") + for k, v in item.items(): + if isinstance(v, str): + value = trunc(v, 200, ellipsis="...") + v_print(1, f' {k}: "{value}"') + else: + # Dates and such... + v_print(1, f" {k}: {v}") v_print(3, f"\n{site.court_id}: Successfully crawled {len(site)} items.") return {"count": len(site), "exceptions": exceptions} diff --git a/tests/examples/opinions/united_states/tex_example.compare.json b/tests/examples/opinions/united_states/tex_example.compare.json index 2bfb5bad2..6bafccfe5 100644 --- a/tests/examples/opinions/united_states/tex_example.compare.json +++ b/tests/examples/opinions/united_states/tex_example.compare.json @@ -1,142 +1,80 @@ [ { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2457&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0617", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Petition for Review/Cause under Rule 53.1", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=fa4dc02f-9219-47ae-af9e-610967e3cec2&coa=cossup&DT=OPINION&MediaID=04d9e9c5-bfc2-422d-bca4-d76dce234526", + "author_str": "Young", + "joined_by_str": "Hecht;Blacklock", + "type": "030concurrence" + }, + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=259a3c9c-4bcf-4166-9ea2-09bd11b30d10&coa=cossup&DT=OPINION&MediaID=5cf04feb-fcf5-40b0-a67e-ad8ebefd3efc", + "author_str": "Lehrmann", + "joined_by_str": "Busby;Young", + "type": "030concurrence" + }, + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=b103a5e9-a915-468e-a207-ef1735308744&coa=cossup&DT=OPINION&MediaID=3ff7a9e4-24f7-44c6-9069-30f038d3b875" + } + ], + "date_filed": "2024-03-01", + "judges": "Hecht;Blacklock;Young;Busby;Young;Lehrmann", + "disposition": "Court of appeals' judgment affirmed", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "Eve Lynn Baker v. Terry Lee Bizzle", + "case_name_short": "" + }, + "case_name": "Eve Lynn Baker v. Terry Lee Bizzle", + "appeal_from_str": "2nd Court of Appeals", + "appeal_from_id": "texapp", + "OriginatingCourtInformation": { + "docket_number": "02-20-00075-CV", + "assigned_to_str": "E. Lee Gabriel", + "date_judgment": "2022-01-13" + }, + "docket_number": "22-0242", + "source": 2, + "blocked": false, + "case_name_short": "" + } }, { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2456&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0804", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2450&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0957", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2449&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0957", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2448&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-1039", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2446&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-0846", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2445&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0804", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2440&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0957", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2437&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0483", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2433&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0617", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2432&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-1039", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2425&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0617", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2422&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "12-0804", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-03", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=2417&Index=***sc%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-0846", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Petition for Review/Cause under Tex. R. App. P. 59.1", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=094a8ad8-7205-4041-8b63-2fab9687e5c9&coa=cossup&DT=OPINION&MediaID=7ea5e635-2f83-4650-a5f4-4182c2a8bbec", + "per_curiam": true + } + ], + "date_filed": "2024-03-01", + "disposition": "Court of Appeals' judgment reversed and remanded to Court of Appeals", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "In THE INTEREST OF C.E., a CHILD v. the State of Texas", + "case_name_short": "" + }, + "case_name": "In THE INTEREST OF C.E., a CHILD v. the State of Texas", + "appeal_from_str": "2nd Court of Appeals", + "appeal_from_id": "texapp", + "OriginatingCourtInformation": { + "docket_number": "02-22-00285-CV", + "assigned_to_str": "D. Michael Wallach", + "date_judgment": "2023-01-12" + }, + "docket_number": "23-0180", + "source": 2, + "blocked": false, + "case_name_short": "" + } } ] \ No newline at end of file diff --git a/tests/examples/opinions/united_states/tex_example.html b/tests/examples/opinions/united_states/tex_example.html index eab4d10b4..e93eb3e79 100644 --- a/tests/examples/opinions/united_states/tex_example.html +++ b/tests/examples/opinions/united_states/tex_example.html @@ -1,256 +1,1762 @@ - - - - - TAMES SEARCH - Supreme Court - - - - - - - - - - -
-
- - - - - -
- - - - - - - - - - -
-
-
-
- - -
- -
-
-
-
-
- - - -
- - - -
- -
-
- -
- - - - -

- -

Loading
- - - - -

- - - - - - - - - - - - - - - - - - -

-

- Supreme Court Home Page -
-

- - - - - - - - - - - - - - - - - - - -

-

- Frequently Asked Questions - -
-

- - - - - - - - - - - - - - - - -

-

- Rules & Standards - -
-

- - - - - - - - - - - - - - - - -

-

- Orders & Opinions - -
-

- - - - - - - - - - - - - - - - -

-

- Clerk's Office - -
-

- - - - - - - - - - - - - - - - - - -

-

- About the Court -
-
-

- - - - + + - - - - - - - - - - - - Contact - - | - - - - - - - - - - - - - - - Justices - - | - - - - - - - - - - - - - - - Employment - - | - - - - - - - - - - - - - - - History - - | - - - - - - - - - - - - - - - Court News & Advisories - - | - - - - - - - - - - - - - - - RSS Information - - | - - - - - - - - - - - - - - - Podcasts - - | -
- - - - - - - - - -

-

- Court Calendar - -
-

- - - - - - - - - - - - - - - - -

-

- - Permanent Judicial Commission for Children, Youth & Families -
-

- - - - - - - - - - - - - - - - -

-

- -
efiling logo
-
-

- - - - - - - - - - - -
-
-
-
- - - - - - - - - - -
-
-

- Case Information

-
-
-
- -
- -
- Case Search - -
- -
- Electronic Briefs - -
- -
- Event Reports - -
- -
- Oral Argument Information - -
- -
- Oral Argument Audio - -
- -
- Oral Argument Video - -
- -
- Causes - -
- -
- -
-
-
- - - - - - - - - -
-
-

- Case Mail

-
-
-
-
Track Cases or Released Opinions
-
- - - - - -
-
-
-
- - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - + \ No newline at end of file diff --git a/tests/examples/opinions/united_states/tex_subexample_1.html b/tests/examples/opinions/united_states/tex_subexample_1.html new file mode 100644 index 000000000..64513fe4d --- /dev/null +++ b/tests/examples/opinions/united_states/tex_subexample_1.html @@ -0,0 +1,1632 @@ + + + + + + Case Detail + + + + + +
+ +
+ +
+
+
+

+ Supreme Court +

+
+ + +
+ +
+
+ +
+
+
+
+ + + +
+ +
+ + + + + +
+ +
+ +
+
+ + + +
+ + + + + + + + + + + + + + +
+ + + + + +
+ + + + +
+
+ Case: + 22-0242 +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + 22-0242 + +
+
+
+
+
+ +
+
+
+ 03/28/2022 +
+
+
+
+
+ +
+
+ Petition for Review/Cause under Rule 53.1 +
+
+
+
+ +
+
+ EVE LYNN BAKER  +
+
+
+
+ +
+
+ TERRY LEE BIZZLE  +
+
+ +
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date Event + TypeDescription + Remarks + Document
11/21/2022Brief on the MeritsRespondentResponse Brief on the Merits filed on behalf of + Terry Lee Bizzle.
+
+ + + + + +
+
+
10/17/2022Brief on the MeritsPetitionerPetitioner's Brief on the Merits filed on behalf of + Eve Lynn Baker.
+
+ + + + + +
+
+
07/13/2022Response to PetitionRespondentResponse to Petition for Review filed on behalf of + Terry Lee Bizzle.
+
+ + + + + +
+
+
03/28/2022Petition for ReviewPetitionerPetition for Review filed on behalf of Eve Lynn + Baker.
+
+ + + + + + + + + +
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date + Event + TypeDisposition + Remarks + Document
03/01/2024Court approved judgment sent to + attorneys of recordIssued 
+
+ + + + + +
+
+
03/01/2024Concurring Opinion issued.IssuedJustice Young filed a concurring opinion, in which + Chief Justice Hecht and Justice Blacklock joined. +
+
+ + + + + +
+
+
03/01/2024Concurring Opinion issued.IssuedJustice Lehrmann filed a concurring opinion, in + which Justice Busby and Justice Young joined.
+
+
+ + + + + +
+
+
03/01/2024Opinion issuedCourt of appeals' judgment affirmedThe Court affirms the court of appeals' judgment. + Justice Devine delivered the opinion of the + Court.
(3/1/2024 4:00:11 PM) 220242c2.pdf, + 220242c1.pdf, 220242.pdf, 22-0242.jmt.pdf
+
+ + + + + + + + + +
+
+
10/11/2023Clerk's Record  
+
+ +
+
09/13/2023Oral argument  Argued on behalf of Petitioner by Paul M. Leopold; + Argued on behalf of Respondent by Alyssa S. + Herrington and Trey Volentine
+
+ + + + + + + + + + + + + +
+
+
09/08/2023Submission Schedule  
+
+ + + + + +
+
+
08/22/2023Oral Argument Submission Form + from Attorney received Oral Argument Submission Form filed on behalf of + Terry Lee Bizzle. Oral Argument will be presented by + Alyssa S. Herrington and Trey Volentine of Hanshaw + Kennedy Hafen, LLP of Frisco, Texas.
+
+ + + + + +
+
+
08/15/2023Oral Argument Submission Form + from Attorney received Oral Argument Submission Form filed on behalf of Eve + Lynn Baker. Oral Argument will be presented by Paul + M. Leopold from KoonsFuller from Southlake, + Texas.
+
+ + + + + +
+
+
06/30/2023Case set for oral argumentCase set for oral argumentThis cause has been set for oral argument at 9:00 + a.m., September 13, 2023. Time allotted to argue: + 20/20 minutes
+
+ + + + + +
+
+
03/10/2023Petition for Review disposed + Filing grantedThe date and time for oral argument are yet to be + determined.
+
+ + + + + +
+
+
03/10/2023Petition for Review granted  
+
+ +
+
11/21/2022Brief on the Merits Response Brief on the Merits filed on behalf of + Terry Lee Bizzle.
+
+ + + + + +
+
+
11/01/2022Motion for Extension of Time + disposed.Filing grantedMotion for Extension of Time to file Response Brief + on the Merits is granted. Response Brief is due + November 21, 2022. FURTHER REQUESTS FOR EXTENSIONS + OF TIME FOR THIS FILING WILL BE DISFAVORED. Reply + Brief is due December 6, 2022.
+
+ + + + + +
+
+
11/01/2022Motion for Extension of Time to + File Brief filed Unopposed Motion for Extension of Time to file + Response Brief on the Merits filed on behalf of + Terry Lee Bizzle.
+
+ + + + + +
+
+
10/17/2022Brief on the Merits Petitioner's Brief on the Merits filed on behalf of + Eve Lynn Baker.
+
+ + + + + +
+
+
09/29/2022Motion for Extension of Time + disposed.Filing grantedUnopposed Motion for Extension of Time to file + Petitioner's Brief on the Merits is granted. + Petitioner's Brief is due October 17, 2022. FURTHER + REQUESTS FOR EXTENSIONS OF TIME FOR THIS FILING WILL + BE DISFAVORED; Respondent's Brief is due November 7, + 2022; Reply Brief is due November 22, 2022.
+
+ + + + + +
+
+
09/28/2022Motion for Extension of Time to + File Brief filed Unopposed Motion for Extension of Time to file + Petitioner's Brief on the Merits filed on behalf of + Eve Lynn Baker.
+
+ + + + + +
+
+
09/02/2022Brief on the Merits Requested +  Brief on the merits requested: Petitioner's brief + due no later than October 3, 2022; Response brief + due October 24, 2022; Reply brief due November 8, + 2022.
+
+ + + + + +
+
+
07/13/2022Response to Petition Response to Petition for Review filed on behalf of + Terry Lee Bizzle.
+
+ + + + + +
+
+
06/03/2022Motion for Extension of Time to + File Response disposedFiling grantedMotion for Extension of Time to file Response to + Petition for Review is granted. Response is due July + 13, 2022.  FURTHER REQUESTS FOR EXTENSIONS OF + TIME FOR THIS FILING WILL BE DISFAVORED.
+
+ + + + + +
+
+
06/03/2022Motion for Extension of Time to + File Response Unopposed Motion for Extension of Time to file + Response to Petition for Review filed on behalf of + Terry Lee Bizzle.
+
+ + + + + +
+
+
05/13/2022Supreme Court of Texas Requested + Response Requested response to petition for review due no + later than June 13, 2022.
+
+ + + + + +
+
+
04/12/2022Case forwarded to Court  
+
+ +
+
04/11/2022Response Waiver filed Response Waiver filed on behalf of Terry Lee + Bizzle.
+
+ + + + + +
+
+
03/29/2022Clerk's Record  
+
+ +
+
03/29/2022Court reporter/recorder's record +   
+
+ +
+
03/28/2022Petition for Review Petition for Review filed on behalf of Eve Lynn + Baker.
+
+ + + + + + + + + +
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason SetRemarks
03/18/2024StatusMotion for rehearing due to be filed.  
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
Baker, Eve LynnPetitionerMs. Charla Bradshaw
Mr. Brian Scott + Loughmiller
Mr. Paul M. Leopold
Brett + Nelson
Bizzle, Terry LeeRespondentMs. Alyssa Herrington
Ms. Sarah Rose
Mr. + Allen "Trey" Volentine III
+ +
+ + +
+
+
+
+
+ +
+
+
+
+ Court of Appeals Information: +
+
+
+ +
+
+ +
+
+ + +
+
+
+
+ +
+
+ Reverse & Render  +
+
+
+
+ +
+
+ 683 SW3d 44, 01-13-22  +
+
+
+
+ +
+
+ 2nd Court of Appeals  +
+
+ +
+
+ +
+
+ +
+
+ Honorable E. Lee Gabriel  +
+
+ +
+
+
+
+
+
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ 367th District Court  +
+
+
+
+ +
+
+ Denton  +
+
+
+
+ +
+
+ Honorable Margaret Barnes  +
+
+
+
+ +
+
+ 18-9925-367  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+
+
+

+ To view or print PDF files you must have the Adobe Acrobat® reader. This + software + may be obtained without charge from Adobe. Download the reader from the Adobe Web site +

+
+
+ + +
+ +
+
+
+ + + +
+
+
+
+ + + + + \ No newline at end of file diff --git a/tests/examples/opinions/united_states/tex_subexample_2.html b/tests/examples/opinions/united_states/tex_subexample_2.html new file mode 100644 index 000000000..a79883107 --- /dev/null +++ b/tests/examples/opinions/united_states/tex_subexample_2.html @@ -0,0 +1,2424 @@ + + + + + + Case Detail + + + + + +
+
+
+

+ Supreme Court +

+
+ + +
+ +
+
+ +
+
+
+
+ + + +
+ +
+ + + + + +
+ +
+ +
+
+ + + +
+ + + + + + + + + + + + + + +
+ + + + + +
+ + + + +
+
+ Case: + 23-0180 +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + 23-0180 + +
+
+
+
+
+ +
+
+
+ 03/07/2023 +
+
+
+
+
+ +
+
+ Petition for Review/Cause under Tex. R. App. P. 59.1 +
+
+
+
+ +
+
+ IN THE INTEREST OF C.E., A CHILD  +
+
+
+
+ +
+
+   +
+
+ +
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date Event + TypeDescription + Remarks + Document
10/06/2023Reply BriefPetitionerReply Brief on the Merits filed on behalf of Texas + Department of Family & Protective Services.
+
+ + + + + +
+
+
10/06/2023Brief on the MeritsPetitioner's Reply BriefPetitioner's Reply Brief on the Merits filed on + behalf of C.E.
+
+ + + + + +
+
+
09/28/2023Brief on the MeritsRespondentRespondent's Brief on the Merits filed on behalf of + B.K.
+
+ + + + + +
+
+
09/18/2023Brief on the MeritsPetitionerPetitioner's Brief on the Merits filed on behalf of + Texas Department of Family and Protective Services. +
+
+ + + + + +
+
+
09/18/2023Brief on the MeritsParental Termination casePetitioner's Brief on the Merits filed on behalf of + C.E.
+
+ + + + + +
+
+
06/16/2023Reply to Response to Petition + filed PetitionerReply to Response to Petition for Review filed on + behalf of TDFPS.
+
+ + + + + +
+
+
06/06/2023Response to PetitionRespondentResponse to Petition for Review filed on behalf of + B.K.
+
+ + + + + +
+
+
04/03/2023Redrafted Petition for Review + PetitionerRedrafted Petition for Review filed on behalf of + TDPFS.
+
+ + + + + + + + + +
+
+
04/03/2023Petition for Review (Parental + Termination)PetitionerPetition for Review filed on behalf of C.E.
+
+ + + + + + + + + +
+
+
03/20/2023Petition for Review (Parental + Termination)PetitionerPetition for Review filed on behalf of TDFPS.
+
+
+ + + + + + + + + +
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date + Event + TypeDisposition + Remarks + Document
03/01/2024Court approved judgment sent to + attorneys of recordIssued 
+
+ + + + + +
+
+
03/01/2024Opinion issuedIssuedPursuant to Texas Rule of Appellate Procedure 59.1, + after granting the petitions for review and without + hearing oral argument, the Court reverses the court + of appeals' judgment and remands the case to that + court. Per Curiam Opinion
+
+ +
+
03/01/2024Petition for Review disposed + Petition granted pursuant to TRAP 59.1 
+
+ +
+
03/01/2024Petition for Review granted + under TRAP 59.1  
+
+ +
+
03/01/2024Opinion issuedCourt of Appeals' judgment reversed and remanded to + Court of AppealsPursuant to Texas Rule of Appellate Procedure 59.1, + after granting the petitions for review and without + hearing oral argument, the Court reverses the court + of appeals' judgment and remands the case to that + court. Per Curiam Opinion

(3/1/2024 5:10:13 + PM) 230180.pdf, 23-0180.jmt.pdf
+
+ + + + + + + + + +
+
+
03/01/2024Petition for Review disposed + Petition granted pursuant to TRAP 59.1 
+
+ +
+
03/01/2024Petition for Review granted + under TRAP 59.1  
+
+ +
+
10/06/2023Reply Brief Reply Brief on the Merits filed on behalf of Texas + Department of Family & Protective Services.
+
+ + + + + +
+
+
10/06/2023Brief on the Merits Petitioner's Reply Brief on the Merits filed on + behalf of C.E.
+
+ + + + + +
+
+
09/28/2023Brief on the Merits Respondent's Brief on the Merits filed on behalf of + B.K.
+
+ + + + + +
+
+
09/18/2023Brief on the Merits Petitioner's Brief on the Merits filed on behalf of + Texas Department of Family and Protective Services. +
+
+ + + + + +
+
+
09/18/2023Brief on the Merits Petitioner's Brief on the Merits filed on behalf of + C.E.
+
+ + + + + +
+
+
09/14/2023Notice of Appearance Notice of Appearance filed on behalf of Texas + Department of Family and Protective Services.
+
+
+ + + + + +
+
+
09/01/2023Brief on the Merits Requested +  Brief on the merits requested: Petitioner's brief + due no later than September 18, 2023; Response brief + due September 28, 2023; Reply brief due October 6, + 2023.
+
+ + + + + +
+
+
06/16/2023Reply to Response to Petition + filed  Reply to Response to Petition for Review filed on + behalf of TDFPS.
+
+ + + + + +
+
+
06/06/2023Response to Petition Response to Petition for Review filed on behalf of + B.K.
+
+ + + + + +
+
+
05/31/2023Motion for Extension of Time to + File Response disposedFiling grantedMotion for Extension of Time to file Response to + Petition for Review granted. Further requests for + extensions of time will be disfavored. Response is + due no later than June 6, 2023.
+
+ + + + + +
+
+
05/30/2023Motion for Extension of Time to + File Response Motion for Extension of Time to file Response to + Petition for Review filed on behalf of B.K.
+
+ + + + + +
+
+
05/12/2023Supreme Court of Texas Requested + Response Requested responses to both petitions for review due + no later than May 30, 2023.
+
+ + + + + +
+
+
04/11/2023Case forwarded to Court  
+
+ +
+
04/11/2023Case forwarded to Court  
+
+ +
+
04/10/2023Response Waiver filed Response Waiver to Petitions for Review filed on + behalf of B.K.
+
+ + + + + +
+
+
04/03/2023Petition for Review (Parental + Termination) Petition for Review filed on behalf of C.E.
+
+ + + + + + + + + +
+
+
04/03/2023Redrafted Petition for Review +  Redrafted Petition for Review filed on behalf of + TDPFS.
+
+ + + + + + + + + +
+
+
03/22/2023Petition for Review disposed + Filed document is struck by the CourtThe Petition for Review violates Texas Rules of + Appellate Procedure 9.4(i)(2)(D) and is struck. A + Redafted Petition for Review is due no later than + April 3, 2023.
+
+ + + + + +
+
+
03/22/2023Motion to Exceed Word Limit + disposedDeniedMotion to Exceed Word Limit is denied.
+
+ +
+
03/20/2023Petition for Review (Parental + Termination) Petition for Review filed on behalf of TDFPS.
+
+
+ + + + + + + + + +
+
+
03/16/2023Motion to Exceed Word Limit Motion to Exceed Word Limit filed on behalf of + TDFPS.
+
+ + + + + +
+
+
03/08/2023Notice from attorney regarding + vacation dates Vacation Notice Letter filed on behalf of C.E.
+
+
+ + + + + +
+
+
03/07/2023Motion for Extension of Time to + File Petition for Review disposedFiling grantedMotion for Extension of Time to file Petition for + Review granted.  Petition for Review is due no + later than April 3, 2023.
+
+ + + + + +
+
+
03/07/2023Motion for Extension of Time to + File Petition for Review filed Motion for Extension of Time to file Petition for + Review filed on behalf of C G.E.
+
+ + + + + + + + + +
+
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Clerk's Record  
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
03/07/2023Court reporter/recorder's record +   
+
+ +
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason SetRemarks
03/18/2024StatusMotion for rehearing due to be filed.  
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
Texas Department of Family and Protective Services + PetitionerMs. Rebecca L. Safavi
Ms. Caroline Carow +
Ms. Amie Serrano
Ms. Leslie Capace +
Mr. Michael Burton
Benjamin S. Walton +
Mr. Jerry L. Reyes
Mr. Eric Tek Tai +
K., B.RespondentMr. Paul M. Leopold
Ms. Jessica Hall Janicek +
E., C.PetitionerMr. Brad M. LaMorgese
E., C. Other interested party Samuel Bryant
+ +
+ + +
+
+
+
+
+ +
+
+
+
+ Court of Appeals Information: +
+
+
+ +
+
+ +
+
+ + +
+
+
+
+ +
+
+ Reverse & Remand  +
+
+
+
+ +
+
+ ___ SW3d ___, 01-12-23  +
+
+
+
+ +
+
+ 2nd Court of Appeals  +
+
+ +
+
+ +
+
+ +
+
+ Honorable D. Michael Wallach  +
+
+ +
+
+
+
+
+
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ 355th District Court  +
+
+
+
+ +
+
+ Hood  +
+
+
+
+ +
+
+ Honorable Bryan T. Bufkin  +
+
+
+
+ +
+
+ P2021009  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+
+
+

+ To view or print PDF files you must have the Adobe Acrobat® reader. This + software + may be obtained without charge from Adobe. Download the reader from the Adobe Web site +

+
+
+ + +
+ +
+
+
+ +
+
+
+
+ + + + + \ No newline at end of file diff --git a/tests/schemas/test_OpinionSchemasTest.py b/tests/schemas/test_OpinionSchemasTest.py new file mode 100644 index 000000000..e9f9ad06a --- /dev/null +++ b/tests/schemas/test_OpinionSchemasTest.py @@ -0,0 +1,17 @@ +import unittest + + +class OpinionSchemasValidationTest(unittest.TestCase): + """Test the schemas validator work as expected""" + + def setUp(self): + pass + + def test_nested_schema(self): + pass + + def test_single_schema(self): + pass + + def test_improper_type(self): + pass From 9928f2e2bc65c89b1f5af467b25a2000ba75bbb9 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Fri, 8 Mar 2024 18:05:00 -0500 Subject: [PATCH 04/12] feat(texcrimapp): update scraper --- .../opinions/united_states/state/tex.py | 39 +- .../united_states/state/texcrimapp.py | 29 +- .../united_states/tex_example.compare.json | 6 +- .../texcrimapp_example.compare.json | 352 +- .../united_states/texcrimapp_example.html | 3327 +++++++++-------- .../texcrimapp_subexample_1.html | 1031 +++++ .../texcrimapp_subexample_3.html | 1041 ++++++ 7 files changed, 4016 insertions(+), 1809 deletions(-) create mode 100644 tests/examples/opinions/united_states/texcrimapp_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texcrimapp_subexample_3.html diff --git a/juriscraper/opinions/united_states/state/tex.py b/juriscraper/opinions/united_states/state/tex.py index 639ba76ab..13928730a 100644 --- a/juriscraper/opinions/united_states/state/tex.py +++ b/juriscraper/opinions/united_states/state/tex.py @@ -15,11 +15,12 @@ # - 2015-08-19: Updated by Andrei Chelaru to add backwards scraping support. # - 2015-08-27: Updated by Andrei Chelaru to add explicit waits # - 2021-12-28: Updated by flooie to remove selenium. - +# - 2023-03-08: Updated by grossir to collect more data import re from datetime import date, timedelta from typing import Dict, List +from dateutil import parser from lxml import html as lxmlHTML from juriscraper.AbstractSite import logger @@ -132,7 +133,9 @@ def _process_html(self) -> None: judges.append(op.get("author_str")) dispositions.append(op.pop("disposition", "")) parsed["oc.judges"] = list(filter(bool, judges)) - parsed["oc.disposition"] = sorted(dispositions, key=len)[-1] + parsed["oc.disposition"] = self.get_cluster_disposition( + dispositions + ) self.cases.append(parsed) @@ -224,8 +227,14 @@ def parse_originating_court_info( data[key] = val.strip() if "COA" in table_id: - if data.get("citation") and "," in data["citation"]: - _, data["date_judgment"] = data.pop("citation").split(",") + citation = data.pop("citation", "") + if "," in citation: + _, data["date_judgment"] = citation.split(",") + elif "-" in citation or "/" in citation: + try: + data["date_judgement"] = parser.parse(citation).date() + except parser.ParserError: + pass return data @@ -273,10 +282,10 @@ def get_opinions(self, html: lxmlHTML) -> List[Dict]: opinion_xpath = "//div[div[contains(text(), 'Case Events')]]//tr[td[contains(text(), 'pinion issu')]]" for opinion in html.xpath(opinion_xpath): op = {} - link_xpath = opinion.xpath(".//td//a/@href") - if not link_xpath: + link = opinion.xpath(".//td//a/@href") + if not link: continue - op["download_url"] = link_xpath[0] + op["download_url"] = link[0] op["disposition"] = opinion.xpath(".//td[3]/text()")[0] # Remarks may contain Per Curiam flag. Does not exist in texcrim @@ -301,14 +310,26 @@ def get_opinions(self, html: lxmlHTML) -> List[Dict]: if "concur" in op_type: op["type"] = "030concurrence" elif "diss" in op_type: - op_type = "040dissent" + op["type"] = "040dissent" else: - op_type = "010combined" + op["type"] = "010combined" opinions.append(op) return opinions + def get_cluster_disposition(self, dispositions: List) -> str: + """Get oc.disposition from each opinion's disposition value + + In tex, disposition ir is usually the longest string. + On texcrimapp, disposition is the same for all opinions + In texapp_*, disposition is found on the 'main' opinion + + :param dispositions: disposition strings + :return: dispositon of the cluster + """ + return sorted(dispositions, key=len)[-1] + def get_by_label_from_case_page(self, html: lxmlHTML, label: str) -> str: """Selects from first / main table of case page diff --git a/juriscraper/opinions/united_states/state/texcrimapp.py b/juriscraper/opinions/united_states/state/texcrimapp.py index 5315f8cd2..7a24736ee 100644 --- a/juriscraper/opinions/united_states/state/texcrimapp.py +++ b/juriscraper/opinions/united_states/state/texcrimapp.py @@ -6,6 +6,8 @@ # Date: 2015-09-02 +from typing import Dict, List + from juriscraper.opinions.united_states.state import tex @@ -16,32 +18,33 @@ def __init__(self, *args, **kwargs): self.court_name = "ccrimapp" self.checkbox = 1 - def get_opinions(self, html): - """ - Cluster with 3 opinions (texcrimapp) - https://search.txcourts.gov/Case.aspx?cn=PD-0037-22&coa=coscca + def get_opinions(self, html) -> List[Dict]: + """Override from tex.py. See docstring there for more info - 2 Opinions: main and concurring - https://search.txcourts.gov/Case.aspx?cn=PD-0984-19&coa=coscca + :param html: page's HTML object + :return List of opinions """ opinions = [] - opinion_xpath = "//div[contains(text(), 'Case Events')]//tr[td[text()='OPINION ISSD')]]" + opinion_xpath = "//div[div[contains(text(), 'Case Events')]]//tr[td[text()='OPINION ISSD']]" + link_xpath = ( + ".//tr[td[1]/a and td[2][not(contains(text(), 'Notice'))]]" + ) for opinion in html.xpath(opinion_xpath): op = {} - link_xpath = opinion.xpath(".//td//a/@href") - if not link_xpath: + link = opinion.xpath(link_xpath) + if not link: continue - op["download_url"] = link_xpath[0] op["disposition"] = opinion.xpath(".//td[3]/text()")[0] + op["download_url"] = link[0].xpath("td/a/@href")[0] - op_type = opinion.xpath(".//td//tr[a]/td[2]/text()")[0] + op_type = link[0].xpath("td[2]/text()")[0].strip() if op_type == "Original": op["type"] = "010combined" elif op_type == "Dissenting": - op_type["type"] = "040dissent" + op["type"] = "040dissent" elif op_type == "Concurring": - op_type["type"] = "030concurrence" + op["type"] = "030concurrence" opinions.append(op) diff --git a/tests/examples/opinions/united_states/tex_example.compare.json b/tests/examples/opinions/united_states/tex_example.compare.json index 6bafccfe5..6454986fa 100644 --- a/tests/examples/opinions/united_states/tex_example.compare.json +++ b/tests/examples/opinions/united_states/tex_example.compare.json @@ -17,7 +17,8 @@ "type": "030concurrence" }, { - "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=b103a5e9-a915-468e-a207-ef1735308744&coa=cossup&DT=OPINION&MediaID=3ff7a9e4-24f7-44c6-9069-30f038d3b875" + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=b103a5e9-a915-468e-a207-ef1735308744&coa=cossup&DT=OPINION&MediaID=3ff7a9e4-24f7-44c6-9069-30f038d3b875", + "type": "010combined" } ], "date_filed": "2024-03-01", @@ -51,7 +52,8 @@ "Opinions": [ { "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=094a8ad8-7205-4041-8b63-2fab9687e5c9&coa=cossup&DT=OPINION&MediaID=7ea5e635-2f83-4650-a5f4-4182c2a8bbec", - "per_curiam": true + "per_curiam": true, + "type": "010combined" } ], "date_filed": "2024-03-01", diff --git a/tests/examples/opinions/united_states/texcrimapp_example.compare.json b/tests/examples/opinions/united_states/texcrimapp_example.compare.json index 09d508b38..f25590173 100644 --- a/tests/examples/opinions/united_states/texcrimapp_example.compare.json +++ b/tests/examples/opinions/united_states/texcrimapp_example.compare.json @@ -1,252 +1,108 @@ [ { - "case_dates": "2015-07-01", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=15808&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-82,941-01", - "case_name_shorts": "" - }, - { - "case_dates": "2015-07-01", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=15593&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-0474-14", - "case_name_shorts": "" - }, - { - "case_dates": "2015-07-01", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=15125&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-82,426-03", - "case_name_shorts": "" - }, - { - "case_dates": "2015-07-01", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=12887&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-20,460-02", - "case_name_shorts": "" - }, - { - "case_dates": "2015-07-01", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=12770&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1071-14", - "case_name_shorts": "" - }, - { - "case_dates": "2015-07-01", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=12500&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-81,830-01", - "case_name_shorts": "" - }, - { - "case_dates": "2015-07-01", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=11570&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-0421-14", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=15187&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1790-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=14839&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1792-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=14626&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1793-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=14419&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1791-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=13486&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-79,465-01", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=13036&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1792-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=12939&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-81,078-04", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=12802&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1790-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=11015&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1791-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-24", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=11013&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-1791-13", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-17", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=16254&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-0501-14", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-17", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=15999&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-81,875-01", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-17", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=15422&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-82,919-01", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-17", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=14931&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-76,781-01", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-17", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=14231&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "PD-0501-14", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-10", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=15519&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-82,563-01", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-10", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=14516&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-81,806-03", - "case_name_shorts": "" - }, - { - "case_dates": "2015-06-10", - "case_names": "No case names fetched during tests.", - "download_urls": "tests/examples/opinions/united_states/RetrieveDocument.aspx?DocId=12360&Index=%5c%5cOCA%2dPSQL01%2ecourts%2estate%2etx%2eus%5cTamesIndexes%5ccca%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "WR-81,806-04", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "PDR Case Type", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=13b7a2e8-b99f-4df9-9af5-f0344d2dc41b&coa=coscca&DT=OPINION&MediaID=1eee18b8-7674-45f1-a0d6-833ebe51866c", + "type": "010combined" + }, + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=fafdaafd-343a-4ba5-bee0-b1f1e093a22d&coa=coscca&DT=OPINION&MediaID=e08f5e9b-a472-4b71-824c-f4b585d61be6", + "type": "040dissent" + }, + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=6fceba07-40a9-4059-b4e8-1c65d0b9bd94&coa=coscca&DT=OPINION&MediaID=e55e6579-b560-4391-9f24-01990114b26b", + "type": "030concurrence" + } + ], + "date_filed": "2024-02-14", + "disposition": "Reversed COA Affirmed the Trial Court", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "DANIEL, BERNARD v. the State of Texas", + "case_name_short": "" + }, + "case_name": "DANIEL, BERNARD v. the State of Texas", + "appeal_from_str": "3rd Court of Appeals", + "appeal_from_id": "texapp", + "OriginatingCourtInformation": { + "docket_number": "03-20-00519-CR", + "date_judgement": "2021-12-23" + }, + "docket_number": "PD-0037-22", + "source": 2, + "blocked": false, + "case_name_short": "" + } + }, + { + "Docket": { + "OpinionCluster": { + "nature_of_suit": "PDR Case Type", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=59f22e02-f84c-4e02-8a87-75fcd0b752af&coa=coscca&DT=OPINION&MediaID=091f82d1-999f-4a6a-a384-a321fc4ccdef", + "type": "010combined" + }, + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=540d7572-1ae5-4555-9f90-b1e718dee379&coa=coscca&DT=OPINION&MediaID=0436c2a7-58e9-46d7-baee-966f2b03fc0d", + "type": "030concurrence" + } + ], + "date_filed": "2024-02-21", + "disposition": "Reversed COA & Trial CourtRemanded to Trial Court", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "MCGUIRE, SEAN MICHAEL v. the State of Texas", + "case_name_short": "" + }, + "case_name": "MCGUIRE, SEAN MICHAEL v. the State of Texas", + "appeal_from_str": "1st Court of Appeals", + "appeal_from_id": "texapp", + "OriginatingCourtInformation": { + "docket_number": "01-18-00146-CR", + "date_judgement": "2019-08-29" + }, + "docket_number": "PD-0984-19", + "source": 2, + "blocked": false, + "case_name_short": "" + } + }, + { + "Docket": { + "OpinionCluster": { + "nature_of_suit": "11.07 HC", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=c1ee0a83-1ef5-425d-948a-e184a63c648d&coa=coscca&DT=OPINION&MediaID=4d6ae38d-1f44-402a-b40e-4b9d47acce66", + "type": "010combined" + } + ], + "date_filed": "2024-02-28", + "disposition": "HC Relief granted", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "KIRKLAND, JOSEPH v. the State of Texas", + "case_name_short": "" + }, + "case_name": "KIRKLAND, JOSEPH v. the State of Texas", + "appeal_from_str": "485TH DISTRICT COURT", + "OriginatingCourtInformation": { + "docket_number": "C-485-W012385-1724597-A" + }, + "docket_number": "WR-95,476-01", + "source": 2, + "blocked": false, + "case_name_short": "" + } } ] \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texcrimapp_example.html b/tests/examples/opinions/united_states/texcrimapp_example.html index 7997cc2bc..5dbcdb2fe 100644 --- a/tests/examples/opinions/united_states/texcrimapp_example.html +++ b/tests/examples/opinions/united_states/texcrimapp_example.html @@ -1,447 +1,15 @@ + - - - - TAMES Search - - - - - - - - - - - - - - -
- -
- - - - +
@@ -452,1133 +20,1818 @@

-
-
- -
-
-
-
-
- - -
-
-
- - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - -
-
- Document Search Criteria -
-
-
-
-
-
-
-
-
- - -
-
-
-
-
- -
-
- - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-
-
-
-
- Selection Criteria -
-
-
-
- -
-
- -
-
-
-
-   -
-
- - - Phonic    - - Stemming  - -
-
-
-
- -
-
- + + +
+ + +
+ + +
+ + + +
+ + +
+
+ Document Search Criteria +
+
+
+
+
+
+
+
+
+ +
-
-
- -
-
-
- - - - -
Open the calendar popup.
-
- to -
-
-
- - - - -
Open the calendar popup.
-
-
+
+
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
-
-
- -
-
- - +
+
+
+
+
+ +
+ +
+
+ Selection Criteria +
+
+
+
+ +
+
+ +
+
+
+
+   +
+
+ + + Phonic    + + Stemming  + +
+
+
+
+ +
+
+ +
+
+
+
+ +
+
+
+ + + + + + + + + + + + + + +
+ RadDatePicker +
RadDatePicker +
+ Open + the calendar popup. + +
+
+ to +
+
+
+ + + + + + + + + + + + + + +
+ RadDatePicker +
RadDatePicker +
+ Open + the calendar popup. + +
+
+
+
+
+
+ +
+
+ + +
+
+
+
+ +
+
+ Examples: + apple pie and pear | apple pie + or pear | apple pie w/5 + pear | apple not w/12 pear + | apple and not pear | wildcard + w/5 match* | +
+
+
-
- -
-
- Examples: apple pie and pear | apple pie - or pear | apple pie w/5 - pear | apple not w/12 pear - | apple and not pear | wildcard - w/5 match* | -
+
+
+
+
-
+
-
-
-
-
-
- -
-
-
- -
-
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + +
+ Data pager +
Data pager
+
+ +
+
+ 123 +
+
+ +
+
+ +
+
+  67 items in + 3 pages +
+
+
Hits + Date + SummaryDocument + TypeCase + NumberAppellate + Court
+ + + + + + + + + + + + +
+ Data pager +
Data pager
+
+ +
+
+ 123 +
+
+ +
+
+ +
+
+  67 items in + 3 pages +
+
+
+ 0 + 02/28/2024 + + + Opinion + + + WR-95,476-01 + + + + + + + COSCCA +
+ 0 + 02/28/2024 + + + Opinion + + + WR-95,406-01 + + + + + + + COSCCA +
+ 0 + 02/21/2024 + + + Opinion + + + PD-0984-19 + + + + + + + COSCCA +
+ 0 + 02/21/2024 + + + Opinion + + + PD-0984-19 + + + + + + + COSCCA +
+ 0 + 02/14/2024 + + + Opinion + + + PD-0037-22 + + + + + + + COSCCA +
+ 0 + 02/14/2024 + + + Opinion + + + PD-0037-22 + + + + + + + COSCCA +
+ 0 + 02/14/2024 + + + Opinion + + + PD-0037-22 + + + + + + COSCCA +
+ +
+ +
+
+
+
-
- 131 - - - - - - - -
+ + + + + + + + +
- -
- - - - - - - - - - + + \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texcrimapp_subexample_1.html b/tests/examples/opinions/united_states/texcrimapp_subexample_1.html new file mode 100644 index 000000000..772ff4e1c --- /dev/null +++ b/tests/examples/opinions/united_states/texcrimapp_subexample_1.html @@ -0,0 +1,1031 @@ + + + + + + Case Detail + + + + + +
+
+
+

+ Court of Criminal Appeals +

+
+ + +
+ +
+
+ +
+
+
+
+ + + +
+ +
+ + + +
+ +
+ +
+
+ + + +
+ + + +
+ + + + + +
+ + + + +
+
+ Case: + PD-0037-22 +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + PD-0037-22 + +
+
+
+
+
+ +
+
+
+ 01/28/2022 +
+
+
+
+
+ +
+
+ PDR Case Type +
+
+
+
+ +
+
+ DANIEL, BERNARD  +
+
+
+
+ +
+
+   +
+
+ +
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date Event + TypeDescription + Document
04/10/2023BRIEF FILEDAppellant +
+ + + + + + + + + +
+
+
03/13/2023BRIEF FILEDState Prosecuting Attorney +
+ + + + + + + + + +
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date + Event + TypeDisposition + Document
02/14/2024OPINION ISSDReversed COA; Affirmed the Trial Court +
+ + + + + + + + + +
+
+
02/14/2024OPINION ISSDReversed COA; Affirmed the Trial Court +
+ + + + + + + + + +
+
+
02/14/2024OPINION ISSDReversed COA; Affirmed the Trial Court +
+ + + + + + + + + +
+
+
05/31/2023SUBMITTED  +
+ +
+
05/17/2023FOR NON ARGT CASES  +
+ + + + + +
+
+
05/17/2023SET FOR SUBMIS  +
+ +
+
04/10/2023BRIEF FILED  +
+ + + + + + + + + +
+
+
03/13/2023BRIEF FILED  +
+ + + + + + + + + +
+
+
02/28/2023LTR 1.051 RESP REC  +
+ + + + + +
+
+
02/15/2023LTR 1.051 REQ RESP  +
+ + + + + +
+
+
02/15/2023PDR GRANTED  +
+ +
+
02/15/2023PDR DISPGranted +
+ + + + + +
+
+
07/05/2022MISC DOCUMENT FOR A PDR  +
+ + + + + +
+
+
01/28/2022PDR FILED  +
+ + + + + + + + + +
+
+
01/26/2022COA RECORD RECEIVED  +
+ +
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason Set
03/11/2024MANDATEMANDATE DUE
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
STATE OF TEXASStateStacey M. Soule
Daniel, BernardAppellantErika Copeland
+ +
+ + +
+
+
+
+
+ +
+
+
+
+ Court of Appeals Information: +
+
+
+ +
+
+ +
+
+ + +
+
+
+
+ +
+
+ Reversed  +
+
+
+
+ +
+
+ 12-23-21  +
+
+
+
+ +
+
+ 3rd Court of Appeals  +
+
+ +
+ +
+
+
+
+
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ 27th District Court  +
+
+
+
+ +
+
+ Bell  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+ 80234  +
+
+
+
+ +
+
+   +
+
+ +
+
+ +
+
+
+
+
+ + + + +
+
+
+
+ + + + \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texcrimapp_subexample_3.html b/tests/examples/opinions/united_states/texcrimapp_subexample_3.html new file mode 100644 index 000000000..10894cef5 --- /dev/null +++ b/tests/examples/opinions/united_states/texcrimapp_subexample_3.html @@ -0,0 +1,1041 @@ + + + + + + Case Detail + + + + +
+ +
+ +
+ + + + + +
+ +
+ +
+
+ + + +
+ +
+ + + + + +
+ + + + +
+
+ Case: + PD-0984-19 +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + PD-0984-19 + +
+
+
+
+
+ +
+
+
+ 09/19/2019 +
+
+
+
+
+ +
+
+ PDR Case Type +
+
+
+
+ +
+
+ MCGUIRE, SEAN MICHAEL  +
+
+
+
+ +
+
+   +
+
+ +
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date Event + TypeDescription + Document
02/11/2020BRIEF FILEDAppellee +
+ + + + + + + + + +
+
+
01/10/2020BRIEF FILEDState Prosecuting Attorney +
+ + + + + + + + + +
+
+
10/08/2019RESPONSE FILEDAppellee +
+ + + + + + + + + +
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date + Event + TypeDisposition + Document
02/21/2024OPINION ISSDReversed COA & Trial Court;Remanded to Trial Court + +
+ + + + + + + + + +
+
+
02/21/2024OPINION ISSDReversed COA & Trial Court;Remanded to Trial Court + +
+ + + + + + + + + +
+
+
04/08/2020SUBMITTED  +
+ +
+
03/25/2020FOR NON ARGT CASES  +
+ + + + + +
+
+
03/25/2020SET FOR SUBMIS  +
+ +
+
02/11/2020BRIEF FILED  +
+ + + + + + + + + +
+
+
01/13/2020MISC DOCUMENT FOR A PDR  +
+ + + + + +
+
+
01/10/2020BRIEF FILED  +
+ + + + + + + + + +
+
+
12/11/2019PDR GRANTED  +
+ +
+
12/11/2019PDR DISPGranted +
+ + + + + +
+
+
10/08/2019RESPONSE FILED  +
+ + + + + + + + + +
+
+
09/24/2019MISC DOCUMENT FOR A PDR  +
+ + + + + +
+
+
09/19/2019PDR FILED  +
+ + + + + + + + + +
+
+
09/19/2019COA RECORD RECEIVED  +
+ +
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason Set
03/18/2024MANDATEMANDATE DUE
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
MCGUIRE, SEAN MICHAELAppelleeKristen Jernigan
STATE OF TEXASStateStacey M. Soule
+ +
+ + +
+
+
+
+
+ +
+
+
+
+ Court of Appeals Information: +
+
+
+ +
+
+ +
+
+ + +
+
+
+
+ +
+
+ Affirmed  +
+
+
+
+ +
+
+ 08-29-19  +
+
+
+
+ +
+
+ 1st Court of Appeals  +
+
+ +
+ +
+
+
+
+
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ 268th District Court  +
+
+
+
+ +
+
+ Fort Bend  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+ 10-DCR-055898  +
+
+
+
+ +
+
+   +
+
+ +
+
+ +
+
+
+ +
+
+ + + + + +
+
+
+
+ + + + \ No newline at end of file From ae96a6b14d28a864e07d0aa8a8df2a2ffc9c742c Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Fri, 8 Mar 2024 22:29:57 -0500 Subject: [PATCH 05/12] feat(texapp_1, texapp_2): update to new base class Also, update test files --- .../opinions/united_states/state/texapp_1.py | 36 +- .../opinions/united_states/state/texapp_2.py | 4 +- .../opinions/united_states/tex_example.html | 130 - .../texapp_1_example.compare.json | 165 +- .../united_states/texapp_1_example.html | 1914 +++----------- .../united_states/texapp_1_subexample_1.html | 893 +++++++ .../united_states/texapp_1_subexample_25.html | 591 +++++ .../texapp_2_example.compare.json | 274 +- .../united_states/texapp_2_example.html | 2214 +++-------------- .../united_states/texapp_2_subexample_1.html | 865 +++++++ .../united_states/texapp_2_subexample_2.html | 823 ++++++ 11 files changed, 4040 insertions(+), 3869 deletions(-) create mode 100644 tests/examples/opinions/united_states/texapp_1_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_1_subexample_25.html create mode 100644 tests/examples/opinions/united_states/texapp_2_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_2_subexample_2.html diff --git a/juriscraper/opinions/united_states/state/texapp_1.py b/juriscraper/opinions/united_states/state/texapp_1.py index 01633bda0..412cb8b7b 100644 --- a/juriscraper/opinions/united_states/state/texapp_1.py +++ b/juriscraper/opinions/united_states/state/texapp_1.py @@ -5,6 +5,7 @@ # Reviewer: # Date: 2014-07-10 +from typing import Dict, List from juriscraper.opinions.united_states.state import tex @@ -16,13 +17,32 @@ def __init__(self, *args, **kwargs): self.court_name = "capp_1" self.checkbox = 2 - def get_opinions(self, html): - """ - Cluster with 2 opinions (Court of Appeals) - https://search.txcourts.gov/Case.aspx?cn=02-22-00347-CV&coa=coa02 - Counter Examples: + def get_opinions(self, html) -> List[Dict]: + """Override from tex.py. See docstring there for more info - 'opinion' in text, but it is actually a letter - https://search.txcourts.gov/Case.aspx?cn=01-23-00777-CR&coa=coa01 + :param html: page's HTML object + :return List of opinions """ - raise NotImplementedError("...") + opinions = [] + opinion_xpath = "//div[div[contains(text(), 'Case Events')]]//tr[td[contains(text(), 'pinion issued')]]" + link_xpath = ".//tr[td[1]/a and td[2][contains(text(), 'pinion')]]" + for opinion in html.xpath(opinion_xpath): + op = {} + link = opinion.xpath(link_xpath) + if not link: + continue + + op["disposition"] = opinion.xpath(".//td[3]/text()")[0] + op["download_url"] = link[0].xpath("td/a/@href")[0] + + op_type = link[0].xpath("td[2]/text()")[0].strip().lower() + if "concur" in op_type: + op["type"] = "030concurrence" + elif "diss" in op_type: + op["type"] = "040dissent" + else: + op["type"] = "010combined" + + opinions.append(op) + + return opinions diff --git a/juriscraper/opinions/united_states/state/texapp_2.py b/juriscraper/opinions/united_states/state/texapp_2.py index 47e46f79c..c19a7cea4 100644 --- a/juriscraper/opinions/united_states/state/texapp_2.py +++ b/juriscraper/opinions/united_states/state/texapp_2.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/tests/examples/opinions/united_states/tex_example.html b/tests/examples/opinions/united_states/tex_example.html index e93eb3e79..e3e31a6a0 100644 --- a/tests/examples/opinions/united_states/tex_example.html +++ b/tests/examples/opinions/united_states/tex_example.html @@ -82,114 +82,6 @@
-
- - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - @@ -1880,16 +1772,6 @@   - -   - -
Open Document by clicking the hyperlink

Copy Hyperlink - to clipboard by right clicking and selecting Copy Shortcut (Internet - Explorer), Copy Link Address (Firefox), Copy Link (Safari), or - the copy feature pertinent to your browser.
- -   -     @@ -1916,18 +1798,6 @@
-
diff --git a/tests/examples/opinions/united_states/texapp_1_example.compare.json b/tests/examples/opinions/united_states/texapp_1_example.compare.json index 08c72cb17..d5ad9aeaf 100644 --- a/tests/examples/opinions/united_states/texapp_1_example.compare.json +++ b/tests/examples/opinions/united_states/texapp_1_example.compare.json @@ -1,112 +1,65 @@ [ { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=2153&Index=***coa01%5cOrder", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-12-00215-CV", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Mandamus", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=f181b579-b431-4c2f-a2a2-87fc282e342e&coa=coa01&DT=Opinion&MediaID=82d84765-20e6-4cee-9a32-ec951e5dca36", + "type": "010combined" + } + ], + "date_filed": "2024-02-22", + "disposition": "Deny petition for writ of mandamus", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "In Re Julian Fernandez v. the State of Texas", + "case_name_short": "" + }, + "case_name": "In Re Julian Fernandez v. the State of Texas", + "appeal_from_str": "Co Crim Ct at Law No 13", + "OriginatingCourtInformation": { + "assigned_to_str": "County Cr Court @ 13", + "docket_number": "2355952" + }, + "docket_number": "01-24-00076-CR", + "source": 2, + "blocked": false, + "case_name_short": "" + } }, { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=2151&Index=***coa01%5cOrder", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-14-00116-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=2150&Index=***coa01%5cOrder", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-13-01027-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14492&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-14-00533-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14491&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-12-00971-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14490&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-13-00933-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14489&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-13-00581-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14488&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-14-00369-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14487&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-13-00631-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14486&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-10-01153-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-07-08", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=14485&Index=***coa01%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "01-13-00674-CV", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Termination of parental rights or conservatorship - accelerated", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=0e464b10-bd33-4e32-af94-c2c6a99a09bd&coa=coa01&DT=Opinion&MediaID=d33e7fcc-fbc6-4a20-8e9a-7a58d2a75721", + "type": "010combined" + } + ], + "date_filed": "2024-02-27", + "disposition": "Affirm TC judgment", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "In the Interest of K.B., Child v. Department of Family and Protective Services", + "case_name_short": "" + }, + "case_name": "In the Interest of K.B., Child v. Department of Family and Protective Services", + "appeal_from_str": "314th District Court", + "OriginatingCourtInformation": { + "assigned_to_str": "314th Court", + "docket_number": "2021-004499J", + "court_reporter": "Court Reporter 314th District Court" + }, + "docket_number": "01-23-00658-CV", + "source": 2, + "blocked": false, + "case_name_short": "" + } } ] \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_1_example.html b/tests/examples/opinions/united_states/texapp_1_example.html index de9153c40..ccee75fdd 100644 --- a/tests/examples/opinions/united_states/texapp_1_example.html +++ b/tests/examples/opinions/united_states/texapp_1_example.html @@ -1,1631 +1,397 @@ - - - TAMES SEARCH - Supreme Court - - - - - - - - - - - -
- - - - - -
- - - - - - - - - - -
-
-
-
- - + - - - - - - \ No newline at end of file + \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_1_subexample_1.html b/tests/examples/opinions/united_states/texapp_1_subexample_1.html new file mode 100644 index 000000000..61a01086a --- /dev/null +++ b/tests/examples/opinions/united_states/texapp_1_subexample_1.html @@ -0,0 +1,893 @@ + + Case Detail + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+

+ First Court of Appeals +

+
+ + +
+ +
+
+ + + +
+
+
+
+ + + +
+ +
+
Case Information
+
+ Administration + +
+ + + +
+ +
+ + + + + + + + + + + + + + + + +
+
+ Case: + 01-23-00658-CV +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + 01-23-00658-CV + +
+
+
+
+
+ +
+
+
+ 09/06/2023 +
+
+
+
+
+ +
+
+ Termination of parental rights or conservatorship - accelerated +
+
+
+
+ +
+
+ In the interest of K.B., child  +
+
+
+
+ +
+
+ Department of Family and Protective Services  +
+
+
+ +
+
+ +
+
+ No  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+ www.txcourts.gov/1stcoa  +
+
+ +
+
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + +
Date Event TypeDescriptionDocument
12/18/2023Brief filed - oral argument not requestedAppellee +
+ + + + + + +
[ PDF/781 KB ] Brief
[ PDF/141 KB ] Notice
+
+
11/06/2023Brief filed - oral argument not requestedAppellant +
+ + + + + + +
[ PDF/617 KB ] Brief
[ PDF/139 KB ] Notice
+
+
11/06/2023Brief filed - oral argument not requestedAppellant +
+ + + + + + +
[ PDF/290 KB ] Brief
[ PDF/139 KB ] Notice
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateEvent TypeDispositionDocument
02/27/2024Motion for extension of time to file brief disposedDismiss motion +
+ +
+
02/27/2024Judgment Issued  +
+ + + + +
[ PDF/86 KB ] Judgment
+
+
02/27/2024Memorandum opinion issuedAffirm TC judgment +
+ + + + + + +
[ PDF/325 KB ] Memorandum Opinion
[ PDF/141 KB ] Notice
+
+
01/11/2024Set for submission on briefs  +
+ + + + +
[ PDF/142 KB ] Notice
+
+
01/08/2024Reply brief due  +
+ +
+
12/18/2023Brief filed - oral argument not requested  +
+ + + + + + +
[ PDF/781 KB ] Brief
[ PDF/141 KB ] Notice
+
+
12/18/2023Case ready to be set  +
+ +
+
12/18/2023Appellees brief due  +
+ +
+
11/30/2023Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/140 KB ] Notice
+
+
11/28/2023Appearance of counsel  +
+ + + + +
[ PDF/683 KB ] NOTICE
+
+
11/27/2023Appellees brief due  +
+ +
+
11/27/2023Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/651 KB ] Extension
+
+
11/09/2023Motion to substitute attorney disposedDENIED +
+ + + + +
[ PDF/140 KB ] Notice
+
+
11/06/2023Brief filed - oral argument not requested  +
+ + + + + + +
[ PDF/617 KB ] Brief
[ PDF/139 KB ] Notice
+
+
11/06/2023Motion to substitute attorney filed  +
+ + + + +
[ PDF/197 KB ] Motion
+
+
11/06/2023Brief filed - oral argument not requested  +
+ + + + + + +
[ PDF/290 KB ] Brief
[ PDF/139 KB ] Notice
+
+
11/06/2023Appellants brief due  +
+ +
+
10/24/2023Motion to substitute attorney disposedDENIED +
+ + + + +
[ PDF/139 KB ] Notice
+
+
10/19/2023Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/138 KB ] Notice
+
+
10/17/2023Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/183 KB ] Extension
+
+
10/17/2023Motion to substitute attorney filed  +
+ + + + +
[ PDF/178 KB ] Motion
+
+
10/16/2023Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/316 KB ] Extension
+
+
10/16/2023Appellants brief due  +
+ +
+
09/26/2023Reporters record filed  +
+ + + + +
[ PDF/137 KB ] Notice
+
+
09/26/2023Court fee due  +
+ +
+
09/26/2023Docketing statement due  +
+ +
+
09/25/2023Reporters record due  +
+ +
+
09/15/2023Extension of time to file reporters record disposedMotion or Writ Granted +
+ + + + +
[ PDF/139 KB ] Notice
+
+
09/14/2023Extension of time to file reporters record filed  +
+ + + + +
[ PDF/208 KB ] INFORMATION SHEET
+
+
09/14/2023Clerks record filed  +
+ + + + +
[ PDF/138 KB ] Notice
+
+
09/14/2023Record due  +
+ +
+
09/06/2023Letter issued by the court  +
+ + + + +
[ PDF/164 KB ] Notice
+
+
09/06/2023Notice of appeal received  +
+ +
+
09/06/2023Indigent status for costs of appeal confirmed  +
+ +
+
09/06/2023Case began in court of appeals  +
+ + + + +
[ PDF/169 KB ] Notice
+
+
09/06/2023Notice of appeal received  +
+ +
+
09/05/2023Notice of appeal filed in trial court  +
+ +
+
08/15/2023Judgment signed by trial court judge  +
+ +
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason Set
03/13/2024StatusMotion for rehearing enbanc due
03/13/2024StatusMotion for rehearing due
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
Department of Family and Protective ServicesAppelleeRobert J. Hazeltine-Shedd
Marc Ritter
Minna Nashef
B., W. AppellantDonald M. Crane
Alexandra George
A, C. J.AppellantMichael Francis Craig
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Documents, Opinions, Notices: +
+
+ +
+
+
+
+ +
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ 314th District Court  +
+
+
+
+ +
+
+ Harris  +
+
+
+
+ +
+
+ Honorable Judge 314th District Court  +
+
+
+
+ +
+
+ 2021-004499J  +
+
+
+
+ +
+
+ Court Reporter 314th District Court  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+
+
+

+ To view or print PDF files you must have the Adobe Acrobat® reader. This software + may be obtained without charge from Adobe. Download the reader from the Adobe Web site +

+
+
+ + +
+ +
+
+
+ + + + + + +
+
+
+ \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_1_subexample_25.html b/tests/examples/opinions/united_states/texapp_1_subexample_25.html new file mode 100644 index 000000000..2043cfb5d --- /dev/null +++ b/tests/examples/opinions/united_states/texapp_1_subexample_25.html @@ -0,0 +1,591 @@ + + Case Detail + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+

+ First Court of Appeals +

+
+ + +
+ +
+
+ + + +
+
+
+
+ + + +
+ +
+
Case Information
+
+ Administration + +
+ + + +
+ +
+ + + + + + + + + + + + + + + + +
+
+ Case: + 01-24-00076-CR +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + 01-24-00076-CR + +
+
+
+
+
+ +
+
+
+ 01/29/2024 +
+
+
+
+
+ +
+
+ Mandamus +
+
+
+
+ +
+
+ In re Julian Fernandez  +
+
+
+
+ +
+
+   +
+
+
+ +
+
+ +
+
+ Yes  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+ +
+
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + +
Date Event TypeDescriptionDocument
01/29/2024Petition for writ of mandamus filedRelator +
+ + + + + + +
[ PDF/5.90 MB ] Petition
[ PDF/135 KB ] Notice
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateEvent TypeDispositionDocument
03/08/2024Motion for rehearing due  +
+ +
+
03/08/2024Motion for rehearing enbanc due  +
+ +
+
02/22/2024Memorandum opinion issuedDeny petition for writ of mandamus +
+ + + + + + +
[ PDF/194 KB ] Memorandum Opinion
[ PDF/137 KB ] Notice
+
+
02/22/2024Petition for writ of mandamus disposed  +
+ +
+
02/21/2024Submitted  +
+ +
+
01/29/2024Petition for writ of mandamus filed  +
+ + + + + + +
[ PDF/5.90 MB ] PETITION
[ PDF/135 KB ] Notice
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason Set
03/08/2024StatusMotion for rehearing due
03/08/2024StatusMotion for rehearing enbanc due
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
The State of TexasCriminal - State of TexasThe Honorable Kim K Ogg
Jessica A. Caird
Fernandez, Julian RelatorJulian Fernandez
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Documents, Opinions, Notices: +
+
+ +
+
+
+
+ +
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ Co Crim Ct at Law No 13  +
+
+
+
+ +
+
+ Harris  +
+
+
+
+ +
+
+ Honorable Judge County CR Court @ Law #13  +
+
+
+
+ +
+
+ 2355952  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+
+
+

+ To view or print PDF files you must have the Adobe Acrobat® reader. This software + may be obtained without charge from Adobe. Download the reader from the Adobe Web site +

+
+
+ + +
+ +
+
+
+ + + + + + +
+
+
+ \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_2_example.compare.json b/tests/examples/opinions/united_states/texapp_2_example.compare.json index 481f475a4..0a91c5d2e 100644 --- a/tests/examples/opinions/united_states/texapp_2_example.compare.json +++ b/tests/examples/opinions/united_states/texapp_2_example.compare.json @@ -1,212 +1,70 @@ [ { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22795&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00317-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22792&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-12-00249-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22791&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00301-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22789&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00165-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22788&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-12-00301-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22782&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00253-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22770&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00106-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22761&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00091-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22760&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00558-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22750&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-12-00301-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22734&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00253-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22728&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00558-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22718&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00091-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22715&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00301-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22708&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00317-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22699&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00165-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22692&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-12-00249-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22618&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00028-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22606&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00023-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22604&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-14-00046-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=22600&Index=***coa02%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "02-13-00336-CV", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Murder", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=8ea6213f-bc83-420c-a9b1-ef64b4982acb&coa=coa02&DT=Opinion&MediaID=5cbc6828-9ebe-426d-83d5-11d6badada5a", + "type": "010combined" + } + ], + "date_filed": "2024-02-29", + "disposition": "Affirmed", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "Lyneisha Marie McCuin v. the State of Texas", + "case_name_short": "" + }, + "case_name": "Lyneisha Marie McCuin v. the State of Texas", + "appeal_from_str": "372nd District Court", + "OriginatingCourtInformation": { + "assigned_to_str": "Julie Lugo", + "docket_number": "1771194R", + "court_reporter": "Court Reporter, 372nd District Court" + }, + "docket_number": "02-23-00075-CR", + "source": 2, + "blocked": false, + "case_name_short": "" + } + }, + { + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Injunction", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=59102180-2384-4b34-89e3-e5a4a7691ded&coa=coa02&DT=Opinion&MediaID=a147d5fe-ac7b-4874-93cd-391849ff8d65", + "type": "010combined" + }, + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=665d2d13-8aa7-49b4-964e-3834b6cc3e55&coa=coa02&DT=Opinion&MediaID=483c1738-1b86-4042-8010-eb0501e3a39c", + "type": "030concurrence" + } + ], + "date_filed": "2024-02-29", + "disposition": "Affirmed", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "Texas Equal Access Fund v. Ashley Maxwell", + "case_name_short": "" + }, + "case_name": "Texas Equal Access Fund v. Ashley Maxwell", + "appeal_from_str": "431st District Court", + "OriginatingCourtInformation": { + "assigned_to_str": "James S. Johnson", + "docket_number": "22-2100-431", + "court_reporter": "Court Reporter, 431st District Court" + }, + "docket_number": "02-22-00347-CV", + "source": 2, + "blocked": false, + "case_name_short": "" + } } ] \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_2_example.html b/tests/examples/opinions/united_states/texapp_2_example.html index 98fd19f92..7665a014f 100644 --- a/tests/examples/opinions/united_states/texapp_2_example.html +++ b/tests/examples/opinions/united_states/texapp_2_example.html @@ -1,1901 +1,433 @@ - - - TAMES SEARCH - Supreme Court - - - - - - - - - - -
-
- - - - - -
- - - - - - - - - - -
-
-
-
- -
+ - - - - - - \ No newline at end of file + \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_2_subexample_1.html b/tests/examples/opinions/united_states/texapp_2_subexample_1.html new file mode 100644 index 000000000..263870260 --- /dev/null +++ b/tests/examples/opinions/united_states/texapp_2_subexample_1.html @@ -0,0 +1,865 @@ + + Case Detail + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+

+ Second Court of Appeals +

+
+ + +
+ +
+
+ + + +
+
+
+
+ + + +
+ +
+
Case Information
+
+ Administration + +
+ + + +
+ +
+ + + + + + + + + + + + + + + + +
+
+ Case: + 02-22-00347-CV +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + 02-22-00347-CV + +
+
+
+
+
+ +
+
+
+ 08/31/2022 +
+
+
+
+
+ +
+
+ Injunction +
+
+
+
+ +
+
+ Texas Equal Access Fund  +
+
+
+
+ +
+
+ Ashley Maxwell  +
+
+
+ +
+
+ +
+
+ No  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+ West Publishing  +
+
+ +
+
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + +
Date Event TypeDescriptionDocument
02/28/2023Reply brief filedAppellant +
+ + + + + + +
[ PDF/1.30 MB ] Brief
[ PDF/91 KB ] Notice
+
+
02/28/2023Amicus curiae brief receivedAmicus Curiae +
+ + + + +
[ PDF/259 KB ] Brief
+
+
01/19/2023Brief filed - oral argument requestedAppellee +
+ + + + + + +
[ PDF/1.75 MB ] Brief
[ PDF/92 KB ] Notice
+
+
11/30/2022Electronic brief filed - oral argument requestedAppellant +
+ + + + + + +
[ PDF/31.38 MB ] Brief
[ PDF/104 KB ] Notice
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateEvent TypeDispositionDocument
02/29/2024Memorandum opinion issuedAffirmed +
+ + + + + + + + +
[ PDF/124 KB ] Judgment
[ PDF/429 KB ] Memorandum Opinion
[ PDF/93 KB ] Notice
+
+
02/29/2024Memorandum opinion issuedAffirmed +
+ + + + + + + + +
[ PDF/124 KB ] Judgment
[ PDF/156 KB ] Concurring Memorandum Opinion
[ PDF/93 KB ] Notice
+
+
03/07/2023Submitted  +
+ +
+
03/02/2023Document Received  +
+ + + + +
[ PDF/991 KB ] Other
+
+
02/28/2023Amicus curiae brief received  +
+ + + + +
[ PDF/259 KB ] Brief
+
+
02/28/2023Reply brief filed  +
+ + + + + + +
[ PDF/1.30 MB ] Brief
[ PDF/91 KB ] Notice
+
+
02/09/2023Set for submission on briefs - oral argument denied  +
+ + + + +
[ PDF/116 KB ] Notice
+
+
01/27/2023Motion for extension of time to file reply brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/94 KB ] Notice
+
+
01/27/2023Motion for extension of time to file reply brief filed  +
+ + + + +
[ PDF/215 KB ] Motion
+
+
01/19/2023Case ready to be set  +
+ +
+
01/19/2023Brief filed - oral argument requested  +
+ + + + + + +
[ PDF/1.75 MB ] Brief
[ PDF/92 KB ] Notice
+
+
12/15/2022Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/97 KB ] Notice
+
+
12/14/2022Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/140 KB ] Motion
+
+
11/30/2022Electronic brief filed - oral argument requested  +
+ + + + + + +
[ PDF/31.38 MB ] Brief
[ PDF/104 KB ] Notice
+
+
11/16/2022Motion disposedMotion or Writ Denied +
+ + + + +
[ PDF/96 KB ] Notice
+
+
11/08/2022Response filed  +
+ + + + +
[ PDF/197 KB ] Response
+
+
11/07/2022Motion filed  +
+ + + + +
[ PDF/138 KB ] Motion
+
+
10/18/2022Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/95 KB ] Notice
+
+
10/17/2022Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/159 KB ] Motion
+
+
09/19/2022Docketing statement filed  +
+ +
+
09/15/2022Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/95 KB ] Notice
+
+
09/13/2022Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/162 KB ] Motion
+
+
09/12/2022Document filed  +
+ + + + +
[ PDF/325 KB ] Other
+
+
09/09/2022Electronic Clerks Record Filed  +
+ + + + +
[ PDF/129 KB ] Notice
+
+
09/08/2022Electronic Reporter/Recorders Record Filed  +
+ + + + +
[ PDF/91 KB ] Notice
+
+
09/07/2022Additional copies of documents received after initial filing  +
+ +
+
09/06/2022Amended notice of appeal filed  +
+ + + + + + +
[ PDF/180 KB ] Notice of Appeal
[ PDF/100 KB ] Notice
+
+
08/31/2022Fee paid  +
+ +
+
08/31/2022Notice of appeal filed in court of appeals  +
+ + + + + + +
[ PDF/40 KB ] Order
[ PDF/124 KB ] Notice
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason Set
04/29/2024StatusRelease to publisher
05/09/2024StatusMandate to Issue
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
Maxwell, AshleyAppelleeJonathan F. Mitchell
North Texas Equal Access FundAppellantJohn Atkins
Mackenzie S. Wallace
Jennifer R. Ecklund
Marcy Hogan Greer
Alexandra W. Albright
Kirsten M. Castaneda
Hannah Roblyer
Kevin H. Dubose
Elizabeth G. Myers
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Documents, Opinions, Notices: +
+
+ +
+
+
+
+ +
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ 431st District Court  +
+
+
+
+ +
+
+ Denton  +
+
+
+
+ +
+
+ Honorable James S. Johnson  +
+
+
+
+ +
+
+ 22-2100-431  +
+
+
+
+ +
+
+ Court Reporter, 431st District Court  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+
+
+

+ To view or print PDF files you must have the Adobe Acrobat® reader. This software + may be obtained without charge from Adobe. Download the reader from the Adobe Web site +

+
+
+ + +
+ +
+
+
+ + + + + + +
+
+
+ \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_2_subexample_2.html b/tests/examples/opinions/united_states/texapp_2_subexample_2.html new file mode 100644 index 000000000..6ddc14035 --- /dev/null +++ b/tests/examples/opinions/united_states/texapp_2_subexample_2.html @@ -0,0 +1,823 @@ + + Case Detail + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+

+ Second Court of Appeals +

+
+ + +
+ +
+
+ + + +
+
+
+
+ + + +
+ +
+
Case Information
+
+ Administration + +
+ + + +
+ +
+ + + + + + + + + + + + + + + + +
+
+ Case: + 02-23-00075-CR +     +
+
+ + +
+
+
+
+
+
+ +
+
+
+ + 02-23-00075-CR + +
+
+
+
+
+ +
+
+
+ 04/11/2023 +
+
+
+
+
+ +
+
+ Murder +
+
+
+
+ +
+
+ Lyneisha Marie McCuin  +
+
+
+
+ +
+
+ The State of Texas  +
+
+
+ +
+
+ +
+
+ No  +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+
+
+ +
+
+   +
+
+ +
+
+
+
+
+
+
+
+
+ Appellate Briefs +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + +
Date Event TypeDescriptionDocument
10/27/2023Brief was not filedState +
+ +
+
09/27/2023Anders brief filedAppellant +
+ + + + + + +
[ PDF/381 KB ] Anders Brief
[ PDF/114 KB ] Notice
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Events +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateEvent TypeDispositionDocument
02/29/2024Motion to withdraw attorney disposedMotion or Writ Granted +
+ +
+
02/29/2024Memorandum opinion issuedAffirmed +
+ + + + + + + + +
[ PDF/98 KB ] Judgment
[ PDF/122 KB ] Memorandum Opinion
[ PDF/92 KB ] Notice
+
+
02/22/2024Submitted  +
+ +
+
02/02/2024Letter received  +
+ + + + +
[ PDF/199 KB ] Letter
+
+
02/01/2024Set for submission on briefs  +
+ + + + +
[ PDF/119 KB ] Notice
+
+
01/22/2024Pro Se response filed  +
+ + + + +
[ PDF/1.06 MB ] Response
+
+
12/12/2023Letter issued by the court  +
+ + + + +
[ PDF/105 KB ] Notice
+
+
12/11/2023Electronic Supplemental Clerks Record Filed  +
+ +
+
11/30/2023Motion to access appellate record dispMotion or Writ Granted +
+ + + + +
[ PDF/108 KB ] Notice
+
+
11/30/2023Motion to access appellate record filed  +
+ + + + +
[ PDF/263 KB ] Response
+
+
10/27/2023Case ready to be set  +
+ +
+
10/27/2023Brief was not filed  +
+ +
+
10/27/2023Letter filed  +
+ + + + +
[ PDF/186 KB ] Letter
+
+
10/26/2023Response was not filed  +
+ + + + +
[ PDF/116 KB ] Notice
+
+
09/27/2023Motion to withdraw attorney filed  +
+ + + + +
[ PDF/183 KB ] Motion
+
+
09/27/2023Anders brief filed  +
+ + + + + + +
[ PDF/381 KB ] Anders Brief
[ PDF/114 KB ] Notice
+
+
09/12/2023Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/99 KB ] Notice
+
+
09/12/2023Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/251 KB ] Motion
+
+
08/28/2023Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/96 KB ] Notice
+
+
08/28/2023Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/256 KB ] Motion
+
+
07/12/2023Motion for extension of time to file brief disposedMotion or Writ Granted +
+ + + + +
[ PDF/101 KB ] Notice
+
+
07/12/2023Motion for extension of time to file brief filed  +
+ + + + +
[ PDF/289 KB ] Motion
+
+
06/12/2023Electronic Reporter/Recorders Record Filed  +
+ +
+
06/12/2023Electronic Reporter/Recorders Record Filed  +
+ +
+
06/12/2023Electronic Reporter/Recorders Record Filed  +
+ + + + +
[ PDF/129 KB ] Notice
+
+
05/31/2023Extension of time to file reporters record disposedMotion or Writ Granted +
+ + + + +
[ PDF/94 KB ] Notice
+
+
05/30/2023Extension of time to file reporters record filed  +
+ +
+
05/30/2023Electronic Clerks Record Filed  +
+ + + + +
[ PDF/88 KB ] Notice
+
+
04/17/2023Docketing statement filed  +
+ + + + +
[ PDF/740 KB ] Docketing Statement
+
+
04/11/2023Notice of appeal filed in court of appeals  +
+ + + + +
[ PDF/119 KB ] Notice
+
+
+ +
+ + +
+
+
+
+
+
+
+
+
+ Calendars +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
Set DateCalendar TypeReason Set
04/15/2024StatusRelease to publisher
04/24/2024StatusMandate to Issue
+ +
+ + +
+
+
+
+
+
+
+
+
+ Parties +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
PartyPartyTypeRepresentative
The State of TexasCriminal - State of TexasSteven W. Conder
McCuin, Lyneisha MarieCriminal - AppellantLyneisha Marie McCuin
Kevin C. Smith
Joshua Stewart Graham
+ +
+ + +
+
+
+
+
+
+
+
+
+ Case Documents, Opinions, Notices: +
+
+ +
+
+
+
+ +
+
+
+ +
+
+ Trial Court Information +
+
+
+
+ +
+
+ 372nd District Court  +
+
+
+
+ +
+
+ Tarrant  +
+
+
+
+ +
+
+ Honorable Julie Lugo  +
+
+
+
+ +
+
+ 1771194R  +
+
+
+
+ +
+
+ Court Reporter, 372nd District Court  +
+
+
+
+ +
+
+ 35 yrs TDCJ  +
+
+
+
+ +
+
+
+
+

+ To view or print PDF files you must have the Adobe Acrobat® reader. This software + may be obtained without charge from Adobe. Download the reader from the Adobe Web site +

+
+
+ + +
+ +
+
+
+ + + + + + +
+
+
+ \ No newline at end of file From d51223912d4f652b6b6ea495068c58b52d35f884 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Mon, 11 Mar 2024 21:47:56 -0500 Subject: [PATCH 06/12] feat(texapp): Change to NewOpinionSite class Update texapp_1 through texapp_14 classes, and example files --- .../opinions/united_states/state/texapp_1.py | 24 +- .../opinions/united_states/state/texapp_10.py | 4 +- .../opinions/united_states/state/texapp_11.py | 4 +- .../opinions/united_states/state/texapp_12.py | 4 +- .../opinions/united_states/state/texapp_13.py | 4 +- .../opinions/united_states/state/texapp_14.py | 4 +- .../opinions/united_states/state/texapp_3.py | 4 +- .../opinions/united_states/state/texapp_4.py | 4 +- .../opinions/united_states/state/texapp_5.py | 4 +- .../opinions/united_states/state/texapp_6.py | 4 +- .../opinions/united_states/state/texapp_7.py | 4 +- .../opinions/united_states/state/texapp_8.py | 4 +- .../opinions/united_states/state/texapp_9.py | 4 +- .../texapp_10_example.compare.json | 188 +- .../united_states/texapp_10_example.html | 1970 +++------------ .../united_states/texapp_10_subexample_1.html | 695 ++++++ .../united_states/texapp_10_subexample_2.html | 740 ++++++ .../united_states/texapp_10_subexample_3.html | 905 +++++++ .../texapp_11_example.compare.json | 250 +- .../united_states/texapp_11_example.html | 2141 +++-------------- .../united_states/texapp_11_subexample_1.html | 746 ++++++ .../united_states/texapp_11_subexample_2.html | 591 +++++ .../texapp_12_example.compare.json | 175 +- .../united_states/texapp_12_example.html | 1976 +++------------ .../united_states/texapp_12_subexample_1.html | 675 ++++++ .../united_states/texapp_12_subexample_2.html | 644 +++++ .../texapp_13_example.compare.json | 106 +- .../united_states/texapp_13_example.html | 1763 +++----------- .../united_states/texapp_13_subexample_1.html | 828 +++++++ .../united_states/texapp_13_subexample_2.html | 651 +++++ .../texapp_14_example.compare.json | 250 +- .../united_states/texapp_14_example.html | 2141 +++-------------- .../united_states/texapp_14_subexample_1.html | 878 +++++++ .../united_states/texapp_14_subexample_2.html | 693 ++++++ .../texapp_3_example.compare.json | 116 +- .../united_states/texapp_3_example.html | 1840 +++----------- .../united_states/texapp_3_subexample_1.html | 1025 ++++++++ .../united_states/texapp_3_subexample_2.html | 628 +++++ .../texapp_4_example.compare.json | 135 +- .../united_states/texapp_4_example.html | 1868 +++----------- .../united_states/texapp_4_subexample_1.html | 1175 +++++++++ .../united_states/texapp_4_subexample_2.html | 627 +++++ .../texapp_5_example.compare.json | 166 +- .../united_states/texapp_5_example.html | 1926 +++------------ .../united_states/texapp_5_subexample_1.html | 495 ++++ .../united_states/texapp_5_subexample_2.html | 936 +++++++ .../texapp_6_example.compare.json | 96 +- .../united_states/texapp_6_example.html | 1784 +++----------- .../united_states/texapp_6_subexample_1.html | 616 +++++ .../united_states/texapp_6_subexample_2.html | 803 +++++++ .../texapp_7_example.compare.json | 100 +- .../united_states/texapp_7_example.html | 1760 +++----------- .../united_states/texapp_7_subexample_1.html | 644 +++++ .../united_states/texapp_7_subexample_2.html | 885 +++++++ .../texapp_8_example.compare.json | 116 +- .../united_states/texapp_8_example.html | 1789 +++----------- .../united_states/texapp_8_subexample_1.html | 597 +++++ .../united_states/texapp_8_subexample_2.html | 958 ++++++++ .../texapp_9_example.compare.json | 106 +- .../united_states/texapp_9_example.html | 1763 +++----------- .../united_states/texapp_9_subexample_1.html | 744 ++++++ .../united_states/texapp_9_subexample_2.html | 650 +++++ .../texcrimapp_subexample_2.html | 709 ++++++ .../local/test_ScraperExtractFromTextTest.py | 3 +- 64 files changed, 24318 insertions(+), 19820 deletions(-) create mode 100644 tests/examples/opinions/united_states/texapp_10_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_10_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_10_subexample_3.html create mode 100644 tests/examples/opinions/united_states/texapp_11_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_11_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_12_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_12_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_13_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_13_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_14_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_14_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_3_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_3_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_4_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_4_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_5_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_5_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_6_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_6_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_7_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_7_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_8_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_8_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texapp_9_subexample_1.html create mode 100644 tests/examples/opinions/united_states/texapp_9_subexample_2.html create mode 100644 tests/examples/opinions/united_states/texcrimapp_subexample_2.html diff --git a/juriscraper/opinions/united_states/state/texapp_1.py b/juriscraper/opinions/united_states/state/texapp_1.py index 412cb8b7b..f955f0d4a 100644 --- a/juriscraper/opinions/united_states/state/texapp_1.py +++ b/juriscraper/opinions/united_states/state/texapp_1.py @@ -5,6 +5,7 @@ # Reviewer: # Date: 2014-07-10 +from datetime import datetime from typing import Dict, List from juriscraper.opinions.united_states.state import tex @@ -20,18 +21,39 @@ def __init__(self, *args, **kwargs): def get_opinions(self, html) -> List[Dict]: """Override from tex.py. See docstring there for more info + Some texapp courts mark the 'Judgement' document + as having a 'Opinion' type. For example, texapp 4 and 6. + These are skipped + + On some case pages, the Court of Criminal Appeals opinion appears + in the lower court. See texapp_12_subexample_2 + + Some cases have been re-heard in the same court, or remanded, + and their pages have multiple opinions that do not belong + to the same cluster. See texapp_10_subexample_3 + :param html: page's HTML object :return List of opinions """ + first_opinion_date = None opinions = [] opinion_xpath = "//div[div[contains(text(), 'Case Events')]]//tr[td[contains(text(), 'pinion issued')]]" - link_xpath = ".//tr[td[1]/a and td[2][contains(text(), 'pinion')]]" + link_xpath = ".//tr[td[1]/a and td[2][contains(text(), 'pinion') or normalize-space(text())='CCA']]" for opinion in html.xpath(opinion_xpath): op = {} link = opinion.xpath(link_xpath) if not link: continue + opinion_date = datetime.strptime( + opinion.xpath(".//td[1]/text()")[0], "%m/%d/%Y" + ).date() + if not first_opinion_date: + first_opinion_date = opinion_date + elif (first_opinion_date - opinion_date).days > 10: + # Older opinion cluster + continue + op["disposition"] = opinion.xpath(".//td[3]/text()")[0] op["download_url"] = link[0].xpath("td/a/@href")[0] diff --git a/juriscraper/opinions/united_states/state/texapp_10.py b/juriscraper/opinions/united_states/state/texapp_10.py index 3d42dd569..ad398f016 100644 --- a/juriscraper/opinions/united_states/state/texapp_10.py +++ b/juriscraper/opinions/united_states/state/texapp_10.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_11.py b/juriscraper/opinions/united_states/state/texapp_11.py index 2a4681a45..0ff163a9b 100644 --- a/juriscraper/opinions/united_states/state/texapp_11.py +++ b/juriscraper/opinions/united_states/state/texapp_11.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_12.py b/juriscraper/opinions/united_states/state/texapp_12.py index f771fe306..9ee38de49 100644 --- a/juriscraper/opinions/united_states/state/texapp_12.py +++ b/juriscraper/opinions/united_states/state/texapp_12.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_13.py b/juriscraper/opinions/united_states/state/texapp_13.py index 5e5e55fbb..b797e641a 100644 --- a/juriscraper/opinions/united_states/state/texapp_13.py +++ b/juriscraper/opinions/united_states/state/texapp_13.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_14.py b/juriscraper/opinions/united_states/state/texapp_14.py index 0cd9181a4..f1728db48 100644 --- a/juriscraper/opinions/united_states/state/texapp_14.py +++ b/juriscraper/opinions/united_states/state/texapp_14.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_3.py b/juriscraper/opinions/united_states/state/texapp_3.py index 9568f9af8..3f978a128 100644 --- a/juriscraper/opinions/united_states/state/texapp_3.py +++ b/juriscraper/opinions/united_states/state/texapp_3.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_4.py b/juriscraper/opinions/united_states/state/texapp_4.py index 031aa4f7f..c55372ee1 100644 --- a/juriscraper/opinions/united_states/state/texapp_4.py +++ b/juriscraper/opinions/united_states/state/texapp_4.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_5.py b/juriscraper/opinions/united_states/state/texapp_5.py index 6b71c3c65..f7ae23a9a 100644 --- a/juriscraper/opinions/united_states/state/texapp_5.py +++ b/juriscraper/opinions/united_states/state/texapp_5.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_6.py b/juriscraper/opinions/united_states/state/texapp_6.py index 093708967..7673bbda4 100644 --- a/juriscraper/opinions/united_states/state/texapp_6.py +++ b/juriscraper/opinions/united_states/state/texapp_6.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_7.py b/juriscraper/opinions/united_states/state/texapp_7.py index daec4023b..8fd2bd286 100644 --- a/juriscraper/opinions/united_states/state/texapp_7.py +++ b/juriscraper/opinions/united_states/state/texapp_7.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_8.py b/juriscraper/opinions/united_states/state/texapp_8.py index d84f9b114..a91e0413f 100644 --- a/juriscraper/opinions/united_states/state/texapp_8.py +++ b/juriscraper/opinions/united_states/state/texapp_8.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/texapp_9.py b/juriscraper/opinions/united_states/state/texapp_9.py index b4f9681e9..5b2763093 100644 --- a/juriscraper/opinions/united_states/state/texapp_9.py +++ b/juriscraper/opinions/united_states/state/texapp_9.py @@ -6,10 +6,10 @@ # Date: 2014-07-10 -from juriscraper.opinions.united_states.state import tex +from juriscraper.opinions.united_states.state import texapp_1 -class Site(tex.Site): +class Site(texapp_1.Site): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.court_id = self.__module__ diff --git a/tests/examples/opinions/united_states/texapp_10_example.compare.json b/tests/examples/opinions/united_states/texapp_10_example.compare.json index 07d67c735..0bb64e706 100644 --- a/tests/examples/opinions/united_states/texapp_10_example.compare.json +++ b/tests/examples/opinions/united_states/texapp_10_example.compare.json @@ -1,102 +1,102 @@ [ { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7045&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-12-00179-CV", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Real Property", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=ebebfa77-e9a1-4cf8-bb0c-949c3a7e5c9b&coa=coa10&DT=Opinion&MediaID=fc63048e-aac0-4d23-915f-9612e9b8b56f", + "type": "010combined" + } + ], + "date_filed": "2024-02-16", + "disposition": "Aff/Rev & Remanded to App Ct", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "Fort Worth & Western Railroad Company v. Nathan D. Albert and Chisholm Trail Redi-Mix, LLC", + "case_name_short": "" + }, + "case_name": "Fort Worth & Western Railroad Company v. Nathan D. Albert and Chisholm Trail Redi-Mix, LLC", + "appeal_from_str": "18th District Court", + "OriginatingCourtInformation": { + "assigned_to_str": "Kenneth C. Curry", + "docket_number": "DC-C201600307", + "court_reporter": "Robin S. Howe" + }, + "docket_number": "10-18-00219-CV", + "source": 2, + "blocked": false, + "case_name_short": "" + } }, { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7040&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-12-00308-CR", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Evading Arrest", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=48b90aa3-c8fa-4f0d-bc69-a15600ce7a2d&coa=coa10&DT=Opinion&MediaID=27d9a514-f1f8-4bd8-8193-315e3ed3f311", + "type": "040dissent" + }, + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=3b480ec6-987e-4679-9016-149cbec3de63&coa=coa10&DT=Opinion&MediaID=ad4a1d9e-20c0-499d-9ac7-2dabc401f124", + "type": "010combined" + } + ], + "date_filed": "2024-02-29", + "disposition": "Modified/Reformed and Affirmed", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "Hollis Lane Willingham v. the State of Texas", + "case_name_short": "" + }, + "case_name": "Hollis Lane Willingham v. the State of Texas", + "appeal_from_str": "369th District Court", + "OriginatingCourtInformation": { + "assigned_to_str": "Charles Michael Davis", + "docket_number": "19-0032CR", + "court_reporter": "Nancy K. Adams" + }, + "docket_number": "10-21-00158-CR", + "source": 2, + "blocked": false, + "case_name_short": "" + } }, { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7039&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-13-00136-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7038&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-13-00127-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7025&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-13-00131-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7022&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-13-00153-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7020&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-13-00390-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=7006&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-12-00287-CR", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=6983&Index=***coa10%5cOpinion", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-13-00275-CV", - "case_name_shorts": "" - }, - { - "case_dates": "2014-06-26", - "case_names": "No case names fetched during tests.", - "download_urls": "http://www.search.txcourts.gov/RetrieveDocument.aspx?DocId=142&Index=***coa10%5cOrder", - "precedential_statuses": "Published", - "blocked_statuses": false, - "date_filed_is_approximate": false, - "docket_numbers": "10-14-00162-CV", - "case_name_shorts": "" + "Docket": { + "OpinionCluster": { + "nature_of_suit": "Miscellaneous/Other Criminal including Misdemeanor or Felony", + "Opinions": [ + { + "download_url": "tests/examples/opinions/united_states/SearchMedia.aspx?MediaVersionID=27f7bbd8-6204-4734-b6d9-3eae1973bbc6&coa=coa10&DT=Opinion&MediaID=7a64ebd4-b434-457a-8c3b-b1d9712b3111", + "type": "010combined" + } + ], + "date_filed": "2024-03-04", + "disposition": "Dismissed", + "source": "C", + "date_filed_is_approximate": false, + "blocked": false, + "precedential_status": "Published", + "case_name": "James Gambrell v. the State of Texas", + "case_name_short": "" + }, + "case_name": "James Gambrell v. the State of Texas", + "appeal_from_str": "12th District Court", + "OriginatingCourtInformation": { + "assigned_to_str": "David W. Moorman", + "docket_number": "30096", + "court_reporter": "Jacqueline A. Mills" + }, + "docket_number": "10-23-00176-CR", + "source": 2, + "blocked": false, + "case_name_short": "" + } } ] \ No newline at end of file diff --git a/tests/examples/opinions/united_states/texapp_10_example.html b/tests/examples/opinions/united_states/texapp_10_example.html index e9e6d7ffe..55b18820b 100644 --- a/tests/examples/opinions/united_states/texapp_10_example.html +++ b/tests/examples/opinions/united_states/texapp_10_example.html @@ -1,1604 +1,398 @@ - - - TAMES SEARCH - Supreme Court - - - - - - - - - - -
-
- - - - - -
- - - - - - - - - - -
-
-
-
- -