From 7a40ce8b1a0475f8ea47a10c5a6c3ad51c2329a9 Mon Sep 17 00:00:00 2001 From: Denis Smetannikov Date: Sun, 7 Apr 2024 00:56:27 +0400 Subject: [PATCH] Implement schema presets (#146) --- README.md | 588 +++++++++++++- action.yml | 28 +- schema-examples/full.json | 11 +- schema-examples/full.php | 12 +- schema-examples/full.yml | 15 +- schema-examples/full_clean.yml | 12 +- schema-examples/preset_database.yml | 34 + schema-examples/preset_features.yml | 68 ++ schema-examples/preset_usage.yml | 44 + schema-examples/preset_users.yml | 114 +++ src/Commands/AbstractValidate.php | 23 + src/Commands/ValidateCsv.php | 6 +- src/Commands/ValidateSchema.php | 4 +- src/Csv/Column.php | 17 +- src/Rules/Cell/ContainsNone.php | 4 +- src/Schema.php | 122 +-- src/SchemaDataPrep.php | 251 ++++++ src/Utils.php | 66 +- src/Validators/ValidatorSchema.php | 18 +- tests/Commands/ValidateCsvBasicTest.php | 4 +- tests/Commands/ValidateCsvBatchSchemaTest.php | 2 +- tests/Commands/ValidateCsvReportsTest.php | 16 +- tests/ExampleSchemasTest.php | 79 +- tests/GithubActionsTest.php | 21 +- tests/ReadmeTest.php | 77 +- tests/Rules/Cell/ContainsNoneTest.php | 4 +- tests/SchemaPresetTest.php | 758 ++++++++++++++++++ tests/SchemaTest.php | 8 +- tests/Tools.php | 30 +- tests/UtilsTest.php | 19 +- tests/schemas/preset/child-of-child.yml | 36 + tests/schemas/preset/child.yml | 71 ++ tests/schemas/preset/parent.yml | 53 ++ tests/schemas/todo.yml | 16 +- 34 files changed, 2434 insertions(+), 197 deletions(-) create mode 100644 schema-examples/preset_database.yml create mode 100644 schema-examples/preset_features.yml create mode 100644 schema-examples/preset_usage.yml create mode 100644 schema-examples/preset_users.yml create mode 100644 src/SchemaDataPrep.php create mode 100644 tests/SchemaPresetTest.php create mode 100644 tests/schemas/preset/child-of-child.yml create mode 100644 tests/schemas/preset/child.yml create mode 100644 tests/schemas/preset/parent.yml diff --git a/README.md b/README.md index 5b53b052..efd0e0f5 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ [![Static Badge](https://img.shields.io/badge/Rules-118-green?label=Cell%20rules&labelColor=blue&color=gray)](src/Rules/Cell) [![Static Badge](https://img.shields.io/badge/Rules-206-green?label=Aggregate%20rules&labelColor=blue&color=gray)](src/Rules/Aggregate) [![Static Badge](https://img.shields.io/badge/Rules-8-green?label=Extra%20checks&labelColor=blue&color=gray)](#extra-checks) -[![Static Badge](https://img.shields.io/badge/Rules-17/11/25-green?label=Plan%20to%20add&labelColor=gray&color=gray)](tests/schemas/todo.yml) +[![Static Badge](https://img.shields.io/badge/Rules-20/11/20-green?label=Plan%20to%20add&labelColor=gray&color=gray)](tests/schemas/todo.yml) A console utility designed for validating CSV files against a strictly defined schema and validation rules outlined @@ -28,6 +28,7 @@ specifications, making it invaluable in scenarios where data quality and consist - [Introduction](#introduction) - [Usage](#usage) - [Schema definition](#schema-definition) +- [Presets and reusable schemas](#presets-and-reusable-schemas) - [Complete CLI help message](#complete-cli-help-message) - [Report examples](#report-examples) - [Benchmarks](#benchmarks) @@ -125,48 +126,48 @@ You can find launch examples in the [workflow demo](https://github.com/JBZoo/Csv ```yml -- uses: jbzoo/csv-blueprint@master # See the specific version on releases page +- uses: jbzoo/csv-blueprint@master # See the specific version on releases page. `@master` is latest. with: - # Path(s) to validate. You can specify path in which CSV files will be searched. Feel free to use glob pattrens. Usage examples: /full/path/file.csv, p/file.csv, p/*.csv, p/**/*.csv, p/**/name-*.csv, **/*.csv, etc. + # Specify the path(s) to the CSV files you want to validate. + # This can include a direct path to a file or a directory to search with a maximum depth of 10 levels. + # Examples: /full/path/name.csv; p/file.csv; p/*.csv; p/**/*.csv; p/**/name-*.csv; **/*.csv # Required: true csv: './tests/**/*.csv' - # Schema filepath. It can be a YAML, JSON or PHP. See examples on GitHub. + # Specify the path(s) to the schema file(s), supporting YAML, JSON, or PHP formats. + # Similar to CSV paths, you can direct to specific files or search directories with glob patterns. + # Examples: /full/path/name.yml; p/file.yml; p/*.yml; p/**/*.yml; p/**/name-*.yml; **/*.yml # Required: true schema: './tests/**/*.yml' # Report format. Available options: text, table, github, gitlab, teamcity, junit. - # Default value: table - # You can skip it - report: table + # Default value: 'table' + # Required: true + report: 'table' # Quick mode. It will not validate all rows. It will stop after the first error. - # Default value: no - # You can skip it - quick: no + # Default value: 'no' + # Required: true + quick: 'no' # Skip schema validation. If you are sure that the schema is correct, you can skip this check. - # Default value: no - # You can skip it - skip-schema: no + # Default value: 'no' + # Required: true + skip-schema: 'no' + + # Extra options for the CSV Blueprint. Only for debbuging and profiling. + # Available options: + # ANSI output. You can disable ANSI colors if you want with `--no-ansi`. + # Verbosity level: Available options: `-v`, `-vv`, `-vvv`. + # Add flag `--profile` if you want to see profiling info. Add details with `-vvv`. + # Add flag `--debug` if you want to see more really deep details. + # Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance. + # Default value: 'options: --ansi' + # You can skip it. + extra: 'options: --ansi' ``` -To see user-friendly error outputs in your pull requests (PRs), specify `report: github`. This -utilizes [annotations](https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message) -to highlight bugs directly within the GitHub interface at the PR level. This feature allows errors to be displayed in -the exact location within the CSV file, right in the diff of your Pull Requests. For a practical example, -view [this live demo PR](https://github.com/JBZoo/Csv-Blueprint-Demo/pull/1/files). - -![GitHub Actions - PR](.github/assets/github-actions-pr.png) - -
- Click to see example in GitHub Actions terminal - -![GitHub Actions - Terminal](.github/assets/github-actions-termintal.png) - -
- ### Docker container Ensure you have Docker installed on your machine. @@ -194,7 +195,7 @@ make docker-build # local tag is "jbzoo/csv-blueprint:local" ### Phar binary
- Click to see using PHAR file + CLICK to see using PHAR file Ensure you have PHP installed on your machine. @@ -307,15 +308,20 @@ description: | # Any description of the CSV file. Not u supporting a wide range of data validation rules from basic type checks to complex regex validations. This example serves as a comprehensive guide for creating robust CSV file validations. +presets: # Include another schema and define an alias for it. + my-preset: ./preset_users.yml # Define preset alias "my-preset". See README.md for details. + # Regular expression to match the file name. If not set, then no pattern check. # This allows you to pre-validate the file name before processing its contents. # Feel free to check parent directories as well. # See: https://www.php.net/manual/en/reference.pcre.pattern.syntax.php filename_pattern: /demo(-\d+)?\.csv$/i +# preset: my-preset # See README.md for details. # Here are default values to parse CSV file. # You can skip this section if you don't need to override the default values. csv: + preset: my-preset # See README.md for details. header: true # If the first row is a header. If true, name of each column is required. delimiter: , # Delimiter character in CSV file. quote_char: \ # Quote character in CSV file. @@ -327,6 +333,7 @@ csv: # They are not(!) related to the data in the columns. # You can skip this section if you don't need to override the default values. structural_rules: # Here are default values. + preset: my-preset # See README.md for details. strict_column_order: true # Ensure columns in CSV follow the same order as defined in this YML schema. It works only if "csv.header" is true. allow_extra_columns: false # Allow CSV files to have more columns than specified in this YML schema. @@ -335,7 +342,8 @@ structural_rules: # Here are default values. # This will not affect the validator, but will make it easier for you to navigate. # For convenience, use the first line as a header (if possible). columns: - - name: Column Name (header) # Any custom name of the column in the CSV file (first row). Required if "csv.header" is true. + - preset: my-preset/login # Add preset rules for the column. See README.md for details. + name: Column Name (header) # Any custom name of the column in the CSV file (first row). Required if "csv.header" is true. description: Lorem ipsum # Description of the column. Not used in the validation process. example: Some example # Example of the column value. Schema will also check this value on its own. @@ -348,6 +356,8 @@ columns: # Data validation for each(!) value in the column. Please, see notes in README.md # Every rule is optional. rules: + preset: my-preset/login # Add preset rules for the column. See README.md for details. + # General rules not_empty: true # Value is not an empty string. Actually checks if the string length is not 0. exact_value: Some string # Exact value for string in the column. @@ -513,9 +523,9 @@ columns: # Identifications phone: ALL # Validates if the input is a phone number. Specify the country code to validate the phone number for a specific country. Example: "ALL", "US", "BR".". + postal_code: US # Validate postal code by country code (alpha-2). Example: "02179". Extracted from https://www.geonames.org is_iban: true # IBAN - International Bank Account Number. See: https://en.wikipedia.org/wiki/International_Bank_Account_Number is_bic: true # Validates a Bank Identifier Code (BIC) according to ISO 9362 standards. See: https://en.wikipedia.org/wiki/ISO_9362 - postal_code: US # Validate postal code by country code (alpha-2). Example: "02179". Extracted from https://www.geonames.org is_imei: true # Validates an International Mobile Equipment Identity (IMEI). See: https://en.wikipedia.org/wiki/International_Mobile_Station_Equipment_Identity is_isbn: true # Validates an International Standard Book Number (ISBN). See: https://www.isbn-international.org/content/what-isbn @@ -556,6 +566,8 @@ columns: # Data validation for the entire(!) column using different data aggregation methods. # Every rule is optional. aggregate_rules: + preset: my-preset/login # Add preset aggregate rules for the column. See README.md for details. + is_unique: true # All values in the column are unique. # Check if the column is sorted in a specific order. @@ -905,6 +917,479 @@ ensure thorough validation of your CSV files. These additional checks further secure the integrity and consistency of your CSV data against the defined validation schema. +## Presets and reusable schemas + +Presets enhance the efficiency and reusability of schema definitions for CSV file validation, streamlining the +validation process across various files and schemas. Their benefits include: + +- **Consistency Across Schemas:** Presets guarantee uniform validation rules for common fields like user IDs, email + addresses, and phone numbers across different CSV files. This consistency is crucial for maintaining data integrity + and reliability. +- **Ease of Maintenance:** Centralized updates to presets automatically propagate changes to all schemas using them. + This approach eliminates the need to manually update each schema, significantly reducing maintenance efforts. +- **Flexibility and Customization:** While offering a foundational set of validation rules, presets also allow for + field-specific rule overrides to meet the unique requirements of individual schemas. This ensures a balance between + consistency and customization. +- **Rapid Development:** Presets facilitate quick schema setup for new CSV files by reusing established + validation rules. This allows for a faster development cycle, focusing on unique fields without redefining common + rules. +- **Error Reduction:** Utilizing consistent and tested presets reduces the likelihood of errors in manual schema + definitions, leading to improved data quality and reliability. +- **Efficiency in Large-scale Projects:** In large projects with extensive data volumes, presets provide a standardized + approach to applying common validation logic, simplifying data management and validation tasks. + +Overall, presets offer a compelling solution for anyone involved in CSV file validation, enhancing consistency, maintenance, flexibility, development speed, error minimization, and project efficiency. + + +### Example with presets + +Let's look at a real life example. Suppose you have a "library" of different user profile validation rules that can be +used in a wide variety of CSV files. + +In order not to care about integrity and not to suffer from copy and paste, you can reuse ANY(!) existing schema. +In fact, this can be considered as partial inheritance. + +**Important notes** + - You can make the chain of inheritance infinitely long. + I.e. make chains of the form `grant-parent.yml` -> `parent.yml` -> `child.yml` -> `grandchild.yml` -> etc. + Of course if you like to take risks ;). + - Any(!) of the schema files can be used alone or as a library. The syntax is the same. + - Schemas with presets validate themselves and if there are any obvious issues, you will see them when you try to use + the schema. But logical conflicts between rules are not checked (It's almost impossible from a code perspective). + As mentioned above, rules work in isolation and are not aware of each other. So the set of rules is your responsibility as always. + - Alias in presets must match the regex pattern `/^[a-z0-9-_]+$/i`. Otherwise, it might break the syntax. + +**If something went wrong** + +If you're having trouble working with presets and don't understand how the CSV Blueprint under the hood understands it, +just add `--dump-schema` to see it. Also, there is a separate CLI command for validating schema: + +```shell +./csv-blueprint validate:schema --dump-schema --schema=./your/schema.yml +``` + + +Let's take a look at what this looks like in code. +- Define a couple of basic rules for [database columns](schema-examples/preset_database.yml). +- Also, one of the files will contain rules specific only to the [users profile](schema-examples/preset_users.yml). +- And of course, let's [make a schema](schema-examples/preset_usage.yml) that will simultaneously reuse the rules from these two files. + +As a result, you don't just get a bunch of schemas for validation, which is difficult to manage, but something like a +framework(!) that will be targeted to the specifics of your project, especially when there are dozens or even hundreds +of CSV files and rules. It will be much easier to achieve consistency. Very often it's quite important. + +[Database preset](schema-examples/preset_database.yml) +
+ CLICK to see source code + + +```yml +name: Presets for database columns +description: This schema contains basic rules for database user data. + +columns: + - name: id + description: Unique identifier, usually used to denote a primary key in databases. + example: 12345 + rules: + not_empty: true + is_trimmed: true + is_int: true + num_min: 1 + aggregate_rules: + is_unique: true + sorted: [ asc, numeric ] + + - name: status + description: Status in database + example: active + rules: + not_empty: true + allow_values: [ active, inactive, pending, deleted ] +``` + + +
+ + +[User data preset](schema-examples/preset_users.yml) + +
+ CLICK to see source code + + +```yml +name: Common presets for user data +description: > + This schema contains common presets for user data. + It can be used as a base for other schemas. + +filename_pattern: /users-.*\.csv$/i + +csv: + delimiter: ';' + +columns: + - name: login + description: User's login name + example: johndoe + rules: + not_empty: true + is_trimmed: true + is_lowercase: true + is_slug: true + length_min: 3 + length_max: 20 + is_alnum: true + aggregate_rules: + is_unique: true + + - name: password + description: User's password + example: '9RfzENKD' + rules: + not_empty: true + is_trimmed: true + regex: /^[a-zA-Z\d!@#$%^&*()_+\-=\[\]{};':"\|,.<>\/?~]{6,}$/ # Safe list of special characters for passwords. + contains_none: [ "password", "123456", "qwerty", " " ] + charset: UTF-8 + length_min: 6 + length_max: 20 + + - name: full_name + description: User's full name + example: 'John Doe Smith' + rules: + not_empty: true + is_trimmed: true + charset: UTF-8 + contains: " " + word_count_min: 2 + word_count_max: 8 + is_capitalize: true + aggregate_rules: + is_unique: true + + - name: email + description: User's email address + example: user@example.com + rules: + not_empty: true + is_trimmed: true + is_email: true + is_lowercase: true + aggregate_rules: + is_unique: true + + - name: birthday + description: Validates the user's birthday. + example: '1990-01-01' + rules: + not_empty: true # The birthday field must not be empty. + is_trimmed: true # Trims the value before validation. + date_format: Y-m-d # Checks if the date matches the YYYY-MM-DD format. + is_date: true # Validates if the value is a valid date. + date_age_greater: 0 # Ensures the date is in the past. + date_age_less: 150 # Ensures the user is not older than 150 years. + date_max: now # Ensures the date is not in the future. + + - name: phone_number + description: User's phone number in US + example: '+1 650 253 00 00' + rules: + not_empty: true + is_trimmed: true + starts_with: '+1' + phone: US + + - name: balance + description: User's balance in USD + example: '100.00' + rules: + not_empty: true + is_trimmed: true + is_float: true + num_min: 0.00 + num_max: 1000000000.00 # 1 billion is max amount in our system. + precision: 2 + + - name: short_description + description: A brief description of the item + example: 'Lorem ipsum dolor sit amet' + rules: + not_empty: true + contains: " " + length_max: 255 + is_trimmed: true +``` + + +
+ +[Usage of presets](schema-examples/preset_usage.yml) This short and clear Yaml under the hood as roughly as follows. As you can see it simplifies your work a lot. + + +```yml +name: Schema uses presets and add new columns + specific rules. +description: This schema uses presets. Also, it demonstrates how to override preset values. + +presets: # Include any other schemas and defined for each alias. + users: ./preset_users.yml # Include the schema with common user data. + db: ./preset_database.yml # Include the schema with basic database columns. + +csv: + preset: users # Take the CSV settings from the preset. + enclosure: '|' # Overridden enclosure only for this schema. + +columns: + # Grap only needed columns from the preset in specific order. + - preset: db/id + - preset: db/status + - preset: users/login + - preset: users/email + - preset: users/full_name + - preset: users/birthday + - preset: users/phone_number # Rename the column. "phone_number" => "phone". + name: phone + - preset: users/password # Overridden value to force a strong password. + rules: { length_min: 10 } + - name: admin_note # New column specific only this schema. + description: Admin note + rules: + not_empty: true + length_min: 1 + length_max: 10 + aggregate_rules: # In practice this will be a rare case, but the opportunity is there. + preset: db/id # Take only aggregate rules from the preset. + is_unique: true # Added new specific aggregate rule. +``` + + + +
+ CLICK to see what it looks like in memory. + + +```yml +name: 'Schema uses presets and add new columns + specific rules.' +description: 'This schema uses presets. Also, it demonstrates how to override preset values.' +presets: + users: ./schema-examples/preset_users.yml + db: ./schema-examples/preset_database.yml +filename_pattern: '' +csv: + header: true + delimiter: ; + quote_char: \ + enclosure: '|' + encoding: utf-8 + bom: false +structural_rules: + strict_column_order: true + allow_extra_columns: false +columns: + - + name: id + description: 'Unique identifier, usually used to denote a primary key in databases.' + example: 12345 + required: true + rules: + not_empty: true + is_trimmed: true + is_int: true + num_min: 1 + aggregate_rules: + is_unique: true + sorted: + - asc + - numeric + - + name: status + description: 'Status in database' + example: active + required: true + rules: + not_empty: true + allow_values: + - active + - inactive + - pending + - deleted + aggregate_rules: [] + - + name: login + description: "User's login name" + example: johndoe + required: true + rules: + not_empty: true + is_trimmed: true + is_lowercase: true + is_slug: true + length_min: 3 + length_max: 20 + is_alnum: true + aggregate_rules: + is_unique: true + - + name: email + description: "User's email address" + example: user@example.com + required: true + rules: + not_empty: true + is_trimmed: true + is_email: true + is_lowercase: true + aggregate_rules: + is_unique: true + - + name: full_name + description: "User's full name" + example: 'John Doe Smith' + required: true + rules: + not_empty: true + is_trimmed: true + charset: UTF-8 + contains: ' ' + word_count_min: 2 + word_count_max: 8 + is_capitalize: true + aggregate_rules: + is_unique: true + - + name: birthday + description: "Validates the user's birthday." + example: '1990-01-01' + required: true + rules: + not_empty: true + is_trimmed: true + date_format: Y-m-d + is_date: true + date_age_greater: 0 + date_age_less: 150 + date_max: now + aggregate_rules: [] + - + name: phone + description: "User's phone number in US" + example: '+1 650 253 00 00' + required: true + rules: + not_empty: true + is_trimmed: true + starts_with: '+1' + phone: US + aggregate_rules: [] + - + name: password + description: "User's password" + example: 9RfzENKD + required: true + rules: + not_empty: true + is_trimmed: true + regex: '/^[a-zA-Z\d!@#$%^&*()_+\-=\[\]{};'':"\|,.<>\/?~]{6,}$/' + contains_none: + - password + - '123456' + - qwerty + - ' ' + charset: UTF-8 + length_min: 10 + length_max: 20 + aggregate_rules: [] + - + name: admin_note + description: 'Admin note' + example: ~ + required: true + rules: + not_empty: true + length_min: 1 + length_max: 10 + aggregate_rules: + is_unique: true + sorted: + - asc + - numeric +``` + + +
+ +As a result, readability and maintainability became dramatically easier. You can easily add new rules, change existing, etc. + + +### Complete example with all available syntax + + +```yml +name: Complite list of preset features +description: This schema contains all the features of the presets. + +presets: + # The basepath for the preset is `.` (current directory of the current schema file). + # Define alias "db" for schema in `./preset_database.yml`. + db: preset_database.yml # Or `db: ./preset_database.yml`. It's up to you. + + # For example, you can use a relative path. + users: ./../schema-examples/preset_users.yml + + # Or you can use an absolute path. + # db: /full/path/preset_database.yml + +filename_pattern: { preset: users } # Take the filename pattern from the preset. +structural_rules: { preset: users } # Take the global rules from the preset. +csv: { preset: users } # Take the CSV settings from the preset. + +columns: + # Use name of column from the preset. + # "db" is alias. "id" is column `name` in `preset_database.yml`. + - preset: 'db/id' + + # Use column index. "db" is alias. "0" is column index in `preset_database.yml`. + - preset: 'db/0' + - preset: 'db/0:' + + # Use column index and column name. It useful if column name is not unique. + - preset: 'db/0:id' + + # Use only `rules` of "status" column from the preset. + - name: My column + rules: + preset: 'db/status' + + # Override only `aggregate_rules` from the preset. + # Use only `aggregate_rules` of "id" column from the preset. + # We strictly take only the very first column (index = 0). + - name: My column + aggregate_rules: + preset: 'db/0:id' + + # Combo!!! If you're a risk-taker or have a high level of inner zen. :) + # Creating a column from three other columns. + # In fact, it will merge all three at once with key replacement. + - name: Crazy combo! + description: > # Just a great advice. + I like to take risks, too. + Be careful. Use your power wisely. + example: ~ # Ignore inherited "example" value. Set it `null`. + preset: 'users/login' + rules: + preset: 'users/email' + not_empty: true # Disable the rule from the preset. + aggregate_rules: + preset: 'db/0' +``` + + +**Note:** All provided YAML examples pass built-in validation, yet they may not make practical sense. +These are intended solely for demonstration and to illustrate potential configurations and features. + + ## Complete CLI help message This section outlines all available options and commands provided by the tool, leveraging the JBZoo/Cli package for its @@ -948,6 +1433,7 @@ Options: Returns a non-zero exit code if any error is detected. Enable by setting to any non-empty value or "yes". [default: "no"] + --dump-schema Dumps the schema of the CSV file if you want to see the final schema after inheritance. --debug Intended solely for debugging and advanced profiling purposes. Activating this option provides detailed process insights, useful for troubleshooting and performance analysis. @@ -1003,6 +1489,7 @@ Options: Returns a non-zero exit code if any error is detected. Enable by setting to any non-empty value or "yes". [default: "no"] + --dump-schema Dumps the schema of the CSV file if you want to see the final schema after inheritance. --debug Intended solely for debugging and advanced profiling purposes. Activating this option provides detailed process insights, useful for troubleshooting and performance analysis. @@ -1037,6 +1524,8 @@ The validation process culminates in a human-readable report detailing any error the default report format is a table, the tool supports various output formats, including text, GitHub, GitLab, TeamCity, JUnit, among others, to best suit your project's needs and your personal or team preferences. +### Table format + When using the `table` format (default), the output is organized in a clear, easily interpretable table that lists all discovered errors. This format is ideal for quick reviews and sharing with team members for further action. @@ -1088,12 +1577,30 @@ Summary: +### GitHub Action format + +To see user-friendly error outputs in your pull requests (PRs), specify `report: github`. This +utilizes [annotations](https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message) +to highlight bugs directly within the GitHub interface at the PR level. This feature allows errors to be displayed in +the exact location within the CSV file, right in the diff of your Pull Requests. For a practical example, +view [this live demo PR](https://github.com/JBZoo/Csv-Blueprint-Demo/pull/1/files). + +![GitHub Actions - PR](.github/assets/github-actions-pr.png) + +
+ CLICK to see example in GitHub Actions terminal + +![GitHub Actions - Terminal](.github/assets/github-actions-termintal.png) + +
+ + +### Text format Optional format `text` with highlited keywords: ```sh ./csv-blueprint validate:csv --report=text ``` - ![Report - Text](.github/assets/output-text.png) @@ -1102,6 +1609,7 @@ Optional format `text` with highlited keywords: * Tools uses [JBZoo/CI-Report-Converter](https://github.com/JBZoo/CI-Report-Converter) as SDK to convert reports to different formats. So you can easily integrate it with any CI system. + ## Benchmarks Understanding the performance of this tool is crucial, but it's important to note that its efficiency is influenced by @@ -1135,12 +1643,10 @@ However, to gain a general understanding of performance, refer to the table belo Profiles: -- **[Quickest:](tests/Benchmarks/bench_0_quickest_combo.yml)** Focuses on the fastest rules, either cell or aggregation, providing a baseline for maximum throughput. -- **[Minimum:](tests/Benchmarks/bench_1_mini_combo.yml)** Uses a set of normal performance rules, with two instances of each, to simulate a lightweight validation - scenario. -- **[Realistic:](tests/Benchmarks/bench_2_realistic_combo.yml)** Represents a mix of rules likely encountered in typical use cases, offering a balanced view of - performance. -- **[All Aggregations:](tests/Benchmarks/bench_3_all_agg.yml)** Tests all aggregation rules simultaneously, illustrating the tool's behavior under maximum load. +- **[Quickest:](tests/Benchmarks/bench_0_quickest_combo.yml)** Focuses on the fastest rules, either cell or aggregation, providing a baseline. +- **[Minimum:](tests/Benchmarks/bench_1_mini_combo.yml)** Uses a set of normal performance rules, with two instances of each. +- **[Realistic:](tests/Benchmarks/bench_2_realistic_combo.yml)** Represents a mix of rules likely encountered in typical use cases. +- **[All Aggregations:](tests/Benchmarks/bench_3_all_agg.yml)** Tests all aggregation rules simultaneously, illustrating maximum load. Divisions: @@ -1347,19 +1853,19 @@ In summary, the tool is developed with the highest standards of modern PHP pract It's random ideas and plans. No promises and deadlines. Feel free to [help me!](#contributing).
- Click to see the roadmap + CLICK to see the roadmap * **Batch processing** * If option `--csv` is not specified, then the STDIN is used. To build a pipeline in Unix-like systems. * Flag to ignore file name pattern. It's useful when you have a lot of files, and you don't want to validate the file name. * **Validation** + * Multi `filename_pattern`. Support list of regexs. * Multi values in one cell. * Custom cell rule as a callback. It's useful when you have a complex rule that can't be described in the schema file. * Custom agregate rule as a callback. It's useful when you have a complex rule that can't be described in the schema file. * Configurable keyword for null/empty values. By default, it's an empty string. But you will use `null`, `nil`, `none`, `empty`, etc. Overridable on the column level. * Handle empty files and files with only a header row, or only with one line of data. One column wthout header is also possible. - * Inheritance of schemas, rules and columns. Define parent schema and override some rules in the child schemas. Make it DRY and easy to maintain. * If option `--schema` is not specified, then validate only super base level things (like "is it a CSV file?"). * Complex rules (like "if field `A` is not empty, then field `B` should be not empty too"). * Extending with custom rules and custom report formats. Plugins? @@ -1442,7 +1948,7 @@ make codestyle - [Retry](https://github.com/JBZoo/Retry) - Tiny PHP library providing retry/backoff functionality with strategies and jitter.
- Click to see interesting fact + CLICK to see interesting fact I've achieved a personal milestone. The [initial release](https://github.com/JBZoo/Csv-Blueprint/releases/tag/0.1) of the project was crafted from the ground up in approximately 3 days, interspersed with regular breaks to care for a diff --git a/action.yml b/action.yml index 61b89c99..eff44d56 100644 --- a/action.yml +++ b/action.yml @@ -20,13 +20,16 @@ branding: inputs: csv: - description: > - Path(s) to validate. You can specify path in which CSV files will be searched. - Feel free to use glob pattrens. Usage examples: - /full/path/file.csv, p/file.csv, p/*.csv, p/**/*.csv, p/**/name-*.csv, **/*.csv, etc. + description: | + Specify the path(s) to the CSV files you want to validate. + This can include a direct path to a file or a directory to search with a maximum depth of 10 levels. + Examples: /full/path/name.csv; p/file.csv; p/*.csv; p/**/*.csv; p/**/name-*.csv; **/*.csv required: true schema: - description: 'Schema filepath. It can be a YAML, JSON or PHP. See examples on GitHub.' + description: | + Specify the path(s) to the schema file(s), supporting YAML, JSON, or PHP formats. + Similar to CSV paths, you can direct to specific files or search directories with glob patterns. + Examples: /full/path/name.yml; p/file.yml; p/*.yml; p/**/*.yml; p/**/name-*.yml; **/*.yml required: true report: description: 'Report format. Available options: text, table, github, gitlab, teamcity, junit.' @@ -43,12 +46,15 @@ inputs: # Only for debbuging and profiling extra: - description: > - ANSI output. You can disable ANSI colors if you want with `--no-ansi`. - Verbosity level: Available options: `-v`, `-vv`, `-vvv` - Add flag `--profile` if you want to see profiling info. Add details with `-vvv`. - Add flag `--debug` if you want to see more really deep details. - default: 'extra: --ansi' + description: | + Extra options for the CSV Blueprint. Only for debbuging and profiling. + Available options: + ANSI output. You can disable ANSI colors if you want with `--no-ansi`. + Verbosity level: Available options: `-v`, `-vv`, `-vvv`. + Add flag `--profile` if you want to see profiling info. Add details with `-vvv`. + Add flag `--debug` if you want to see more really deep details. + Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance. + default: 'options: --ansi' runs: using: 'docker' diff --git a/schema-examples/full.json b/schema-examples/full.json index 232d52be..8ea8fc5f 100644 --- a/schema-examples/full.json +++ b/schema-examples/full.json @@ -2,9 +2,14 @@ "name" : "CSV Blueprint Schema Example", "description" : "This YAML file provides a detailed description and validation rules for CSV files\nto be processed by CSV Blueprint tool. It includes specifications for file name patterns,\nCSV formatting options, and extensive validation criteria for individual columns and their values,\nsupporting a wide range of data validation rules from basic type checks to complex regex validations.\nThis example serves as a comprehensive guide for creating robust CSV file validations.\n", + "presets" : { + "my-preset" : ".\/preset_users.yml" + }, + "filename_pattern" : "\/demo(-\\d+)?\\.csv$\/i", "csv" : { + "preset" : "my-preset", "header" : true, "delimiter" : ",", "quote_char" : "\\", @@ -14,18 +19,21 @@ }, "structural_rules" : { + "preset" : "my-preset", "strict_column_order" : true, "allow_extra_columns" : false }, "columns" : [ { + "preset" : "my-preset/login", "name" : "Column Name (header)", "description" : "Lorem ipsum", "example" : "Some example", "required" : true, "rules" : { + "preset" : "my-preset/login", "not_empty" : true, "exact_value" : "Some string", "allow_values" : ["y", "n", ""], @@ -147,9 +155,9 @@ "is_luhn" : true, "phone" : "ALL", + "postal_code" : "US", "is_iban" : true, "is_bic" : true, - "postal_code" : "US", "is_imei" : true, "is_isbn" : true, @@ -166,6 +174,7 @@ "credit_card" : "Any" }, "aggregate_rules" : { + "preset" : "my-preset/login", "is_unique" : true, "sorted" : ["asc", "natural"], diff --git a/schema-examples/full.php b/schema-examples/full.php index 78c6adce..95703d45 100644 --- a/schema-examples/full.php +++ b/schema-examples/full.php @@ -23,9 +23,14 @@ This example serves as a comprehensive guide for creating robust CSV file validations. ', + 'presets' => [ + 'my-preset' => './preset_users.yml', + ], + 'filename_pattern' => '/demo(-\\d+)?\\.csv$/i', 'csv' => [ + 'preset' => 'my-preset', 'header' => true, 'delimiter' => ',', 'quote_char' => '\\', @@ -35,18 +40,21 @@ ], 'structural_rules' => [ + 'preset' => 'my-preset', 'strict_column_order' => true, 'allow_extra_columns' => false, ], 'columns' => [ [ + 'preset' => 'my-preset/login', 'name' => 'Column Name (header)', 'description' => 'Lorem ipsum', 'example' => 'Some example', 'required' => true, 'rules' => [ + 'preset' => 'my-preset/login', 'not_empty' => true, 'exact_value' => 'Some string', 'allow_values' => ['y', 'n', ''], @@ -167,9 +175,9 @@ 'is_luhn' => true, 'phone' => 'ALL', + 'postal_code' => 'US', 'is_iban' => true, 'is_bic' => true, - 'postal_code' => 'US', 'is_imei' => true, 'is_isbn' => true, @@ -188,6 +196,8 @@ ], 'aggregate_rules' => [ + 'preset' => 'my-preset/login', + 'is_unique' => true, 'sorted' => ['asc', 'natural'], diff --git a/schema-examples/full.yml b/schema-examples/full.yml index 4ab1f58f..b3c29bd4 100644 --- a/schema-examples/full.yml +++ b/schema-examples/full.yml @@ -22,15 +22,20 @@ description: | # Any description of the CSV file. Not u supporting a wide range of data validation rules from basic type checks to complex regex validations. This example serves as a comprehensive guide for creating robust CSV file validations. +presets: # Include another schema and define an alias for it. + my-preset: ./preset_users.yml # Define preset alias "my-preset". See README.md for details. + # Regular expression to match the file name. If not set, then no pattern check. # This allows you to pre-validate the file name before processing its contents. # Feel free to check parent directories as well. # See: https://www.php.net/manual/en/reference.pcre.pattern.syntax.php filename_pattern: /demo(-\d+)?\.csv$/i +# preset: my-preset # See README.md for details. # Here are default values to parse CSV file. # You can skip this section if you don't need to override the default values. csv: + preset: my-preset # See README.md for details. header: true # If the first row is a header. If true, name of each column is required. delimiter: , # Delimiter character in CSV file. quote_char: \ # Quote character in CSV file. @@ -42,6 +47,7 @@ csv: # They are not(!) related to the data in the columns. # You can skip this section if you don't need to override the default values. structural_rules: # Here are default values. + preset: my-preset # See README.md for details. strict_column_order: true # Ensure columns in CSV follow the same order as defined in this YML schema. It works only if "csv.header" is true. allow_extra_columns: false # Allow CSV files to have more columns than specified in this YML schema. @@ -50,7 +56,8 @@ structural_rules: # Here are default values. # This will not affect the validator, but will make it easier for you to navigate. # For convenience, use the first line as a header (if possible). columns: - - name: Column Name (header) # Any custom name of the column in the CSV file (first row). Required if "csv.header" is true. + - preset: my-preset/login # Add preset rules for the column. See README.md for details. + name: Column Name (header) # Any custom name of the column in the CSV file (first row). Required if "csv.header" is true. description: Lorem ipsum # Description of the column. Not used in the validation process. example: Some example # Example of the column value. Schema will also check this value on its own. @@ -63,6 +70,8 @@ columns: # Data validation for each(!) value in the column. Please, see notes in README.md # Every rule is optional. rules: + preset: my-preset/login # Add preset rules for the column. See README.md for details. + # General rules not_empty: true # Value is not an empty string. Actually checks if the string length is not 0. exact_value: Some string # Exact value for string in the column. @@ -228,9 +237,9 @@ columns: # Identifications phone: ALL # Validates if the input is a phone number. Specify the country code to validate the phone number for a specific country. Example: "ALL", "US", "BR".". + postal_code: US # Validate postal code by country code (alpha-2). Example: "02179". Extracted from https://www.geonames.org is_iban: true # IBAN - International Bank Account Number. See: https://en.wikipedia.org/wiki/International_Bank_Account_Number is_bic: true # Validates a Bank Identifier Code (BIC) according to ISO 9362 standards. See: https://en.wikipedia.org/wiki/ISO_9362 - postal_code: US # Validate postal code by country code (alpha-2). Example: "02179". Extracted from https://www.geonames.org is_imei: true # Validates an International Mobile Equipment Identity (IMEI). See: https://en.wikipedia.org/wiki/International_Mobile_Station_Equipment_Identity is_isbn: true # Validates an International Standard Book Number (ISBN). See: https://www.isbn-international.org/content/what-isbn @@ -271,6 +280,8 @@ columns: # Data validation for the entire(!) column using different data aggregation methods. # Every rule is optional. aggregate_rules: + preset: my-preset/login # Add preset aggregate rules for the column. See README.md for details. + is_unique: true # All values in the column are unique. # Check if the column is sorted in a specific order. diff --git a/schema-examples/full_clean.yml b/schema-examples/full_clean.yml index aba54a55..fc278628 100644 --- a/schema-examples/full_clean.yml +++ b/schema-examples/full_clean.yml @@ -21,9 +21,13 @@ description: | supporting a wide range of data validation rules from basic type checks to complex regex validations. This example serves as a comprehensive guide for creating robust CSV file validations. +presets: + my-preset: ./preset_users.yml + filename_pattern: '/demo(-\d+)?\.csv$/i' csv: + preset: my-preset header: true delimiter: ',' quote_char: \ @@ -32,16 +36,19 @@ csv: bom: false structural_rules: + preset: my-preset strict_column_order: true allow_extra_columns: false columns: - - name: 'Column Name (header)' + - preset: my-preset/login + name: 'Column Name (header)' description: 'Lorem ipsum' example: 'Some example' required: true rules: + preset: my-preset/login not_empty: true exact_value: 'Some string' allow_values: [ 'y', 'n', '' ] @@ -161,9 +168,9 @@ columns: is_luhn: true phone: ALL + postal_code: US is_iban: true is_bic: true - postal_code: US is_imei: true is_isbn: true @@ -182,6 +189,7 @@ columns: credit_card: Any aggregate_rules: + preset: my-preset/login is_unique: true sorted: [ asc, natural ] first_num_min: 1.0 diff --git a/schema-examples/preset_database.yml b/schema-examples/preset_database.yml new file mode 100644 index 00000000..ec7c8555 --- /dev/null +++ b/schema-examples/preset_database.yml @@ -0,0 +1,34 @@ +# +# JBZoo Toolbox - Csv-Blueprint. +# +# This file is part of the JBZoo Toolbox project. +# For the full copyright and license information, please view the LICENSE +# file that was distributed with this source code. +# +# @license MIT +# @copyright Copyright (C) JBZoo.com, All rights reserved. +# @see https://github.com/JBZoo/Csv-Blueprint +# + +name: Presets for database columns +description: This schema contains basic rules for database user data. + +columns: + - name: id + description: Unique identifier, usually used to denote a primary key in databases. + example: 12345 + rules: + not_empty: true + is_trimmed: true + is_int: true + num_min: 1 + aggregate_rules: + is_unique: true + sorted: [ asc, numeric ] + + - name: status + description: Status in database + example: active + rules: + not_empty: true + allow_values: [ active, inactive, pending, deleted ] diff --git a/schema-examples/preset_features.yml b/schema-examples/preset_features.yml new file mode 100644 index 00000000..baedf9aa --- /dev/null +++ b/schema-examples/preset_features.yml @@ -0,0 +1,68 @@ +# +# JBZoo Toolbox - Csv-Blueprint. +# +# This file is part of the JBZoo Toolbox project. +# For the full copyright and license information, please view the LICENSE +# file that was distributed with this source code. +# +# @license MIT +# @copyright Copyright (C) JBZoo.com, All rights reserved. +# @see https://github.com/JBZoo/Csv-Blueprint +# + +name: Complite list of preset features +description: This schema contains all the features of the presets. + +presets: + # The basepath for the preset is `.` (current directory of the current schema file). + # Define alias "db" for schema in `./preset_database.yml`. + db: preset_database.yml # Or `db: ./preset_database.yml`. It's up to you. + + # For example, you can use a relative path. + users: ./../schema-examples/preset_users.yml + + # Or you can use an absolute path. + # db: /full/path/preset_database.yml + +filename_pattern: { preset: users } # Take the filename pattern from the preset. +structural_rules: { preset: users } # Take the global rules from the preset. +csv: { preset: users } # Take the CSV settings from the preset. + +columns: + # Use name of column from the preset. + # "db" is alias. "id" is column `name` in `preset_database.yml`. + - preset: 'db/id' + + # Use column index. "db" is alias. "0" is column index in `preset_database.yml`. + - preset: 'db/0' + - preset: 'db/0:' + + # Use column index and column name. It useful if column name is not unique. + - preset: 'db/0:id' + + # Use only `rules` of "status" column from the preset. + - name: My column + rules: + preset: 'db/status' + + # Override only `aggregate_rules` from the preset. + # Use only `aggregate_rules` of "id" column from the preset. + # We strictly take only the very first column (index = 0). + - name: My column + aggregate_rules: + preset: 'db/0:id' + + # Combo!!! If you're a risk-taker or have a high level of inner zen. :) + # Creating a column from three other columns. + # In fact, it will merge all three at once with key replacement. + - name: Crazy combo! + description: > # Just a great advice. + I like to take risks, too. + Be careful. Use your power wisely. + example: ~ # Ignore inherited "example" value. Set it `null`. + preset: 'users/login' + rules: + preset: 'users/email' + not_empty: true # Disable the rule from the preset. + aggregate_rules: + preset: 'db/0' diff --git a/schema-examples/preset_usage.yml b/schema-examples/preset_usage.yml new file mode 100644 index 00000000..48539c9a --- /dev/null +++ b/schema-examples/preset_usage.yml @@ -0,0 +1,44 @@ +# +# JBZoo Toolbox - Csv-Blueprint. +# +# This file is part of the JBZoo Toolbox project. +# For the full copyright and license information, please view the LICENSE +# file that was distributed with this source code. +# +# @license MIT +# @copyright Copyright (C) JBZoo.com, All rights reserved. +# @see https://github.com/JBZoo/Csv-Blueprint +# + +name: Schema uses presets and add new columns + specific rules. +description: This schema uses presets. Also, it demonstrates how to override preset values. + +presets: # Include any other schemas and defined for each alias. + users: ./preset_users.yml # Include the schema with common user data. + db: ./preset_database.yml # Include the schema with basic database columns. + +csv: + preset: users # Take the CSV settings from the preset. + enclosure: '|' # Overridden enclosure only for this schema. + +columns: + # Grap only needed columns from the preset in specific order. + - preset: db/id + - preset: db/status + - preset: users/login + - preset: users/email + - preset: users/full_name + - preset: users/birthday + - preset: users/phone_number # Rename the column. "phone_number" => "phone". + name: phone + - preset: users/password # Overridden value to force a strong password. + rules: { length_min: 10 } + - name: admin_note # New column specific only this schema. + description: Admin note + rules: + not_empty: true + length_min: 1 + length_max: 10 + aggregate_rules: # In practice this will be a rare case, but the opportunity is there. + preset: db/id # Take only aggregate rules from the preset. + is_unique: true # Added new specific aggregate rule. diff --git a/schema-examples/preset_users.yml b/schema-examples/preset_users.yml new file mode 100644 index 00000000..87227d74 --- /dev/null +++ b/schema-examples/preset_users.yml @@ -0,0 +1,114 @@ +# +# JBZoo Toolbox - Csv-Blueprint. +# +# This file is part of the JBZoo Toolbox project. +# For the full copyright and license information, please view the LICENSE +# file that was distributed with this source code. +# +# @license MIT +# @copyright Copyright (C) JBZoo.com, All rights reserved. +# @see https://github.com/JBZoo/Csv-Blueprint +# + +name: Common presets for user data +description: > + This schema contains common presets for user data. + It can be used as a base for other schemas. + +filename_pattern: /users-.*\.csv$/i + +csv: + delimiter: ';' + +columns: + - name: login + description: User's login name + example: johndoe + rules: + not_empty: true + is_trimmed: true + is_lowercase: true + is_slug: true + length_min: 3 + length_max: 20 + is_alnum: true + aggregate_rules: + is_unique: true + + - name: password + description: User's password + example: '9RfzE$8NKD' + rules: + not_empty: true + is_trimmed: true + regex: /^[a-zA-Z\d!@#$%^&*()_+\-=\[\]{};':"\\|,.<>\/?~]{6,}$/ # Safe list of special characters for passwords. + contains_none: [ "password", "123456", "qwerty", " " ] + charset: UTF-8 + length_min: 6 + length_max: 20 + + - name: full_name + description: User's full name + example: 'John Doe Smith' + rules: + not_empty: true + is_trimmed: true + charset: UTF-8 + contains: " " + word_count_min: 2 + word_count_max: 8 + is_capitalize: true + aggregate_rules: + is_unique: true + + - name: email + description: User's email address + example: user@example.com + rules: + not_empty: true + is_trimmed: true + is_email: true + is_lowercase: true + aggregate_rules: + is_unique: true + + - name: birthday + description: Validates the user's birthday. + example: '1990-01-01' + rules: + not_empty: true # The birthday field must not be empty. + is_trimmed: true # Trims the value before validation. + date_format: Y-m-d # Checks if the date matches the YYYY-MM-DD format. + is_date: true # Validates if the value is a valid date. + date_age_greater: 0 # Ensures the date is in the past. + date_age_less: 150 # Ensures the user is not older than 150 years. + date_max: now # Ensures the date is not in the future. + + - name: phone_number + description: User's phone number in US + example: '+1 650 253 00 00' + rules: + not_empty: true + is_trimmed: true + starts_with: '+1' + phone: US + + - name: balance + description: User's balance in USD + example: '100.00' + rules: + not_empty: true + is_trimmed: true + is_float: true + num_min: 0.00 + num_max: 1000000000.00 # 1 billion is max amount in our system. + precision: 2 + + - name: short_description + description: A brief description of the item + example: 'Lorem ipsum dolor sit amet' + rules: + not_empty: true + contains: " " + length_max: 255 + is_trimmed: true diff --git a/src/Commands/AbstractValidate.php b/src/Commands/AbstractValidate.php index 79c7f94f..7b5a8bb7 100644 --- a/src/Commands/AbstractValidate.php +++ b/src/Commands/AbstractValidate.php @@ -18,6 +18,7 @@ use JBZoo\Cli\CliCommand; use JBZoo\CsvBlueprint\Exception; +use JBZoo\CsvBlueprint\Schema; use JBZoo\CsvBlueprint\Utils; use JBZoo\CsvBlueprint\Validators\ErrorSuite; use Symfony\Component\Console\Input\InputOption; @@ -60,6 +61,12 @@ protected function configure(): void ]), 'no', ) + ->addOption( + 'dump-schema', + null, + InputOption::VALUE_NONE, + 'Dumps the schema of the CSV file if you want to see the final schema after inheritance.', + ) ->addOption( 'debug', null, @@ -153,6 +160,22 @@ protected function renderIssues(string $prefix, int $number, string $filepath, i $this->out("{$prefix}{$number} {$issues} in {$filepath}", $indent); } + protected function printDumpOfSchema(?Schema $schema): void + { + if ($schema === null) { + return; + } + $dump = $schema->dumpAsYamlString(); + $dump = \preg_replace('/^([ \t]*)([^:\n]+:)/m', '$1$2', $dump); + + if ($this->getOptBool('dump-schema')) { + $this->_('```yaml'); + $this->_("# File: {$schema->getFilename()}"); + $this->_($dump); + $this->_('```'); + } + } + protected static function renderPrefix(int $index, int $totalFiles): string { if ($totalFiles <= 1) { diff --git a/src/Commands/ValidateCsv.php b/src/Commands/ValidateCsv.php index 62bb7e7a..538bf7d2 100644 --- a/src/Commands/ValidateCsv.php +++ b/src/Commands/ValidateCsv.php @@ -142,8 +142,11 @@ private function validateSchemas(array $schemaFilenames): int continue; } + $schema = null; + try { - $schemaErrors = (new Schema($schemaFilename->getPathname()))->validate($quickCheck); + $schema = new Schema($schemaFilename->getPathname()); + $schemaErrors = $schema->validate($quickCheck); if ($schemaErrors->count() > 0) { $this->renderIssues($prefix, $schemaErrors->count(), $schemaPath, 2); $this->outReport($schemaErrors, 4); @@ -158,6 +161,7 @@ private function validateSchemas(array $schemaFilenames): int "{$prefix}Exception: {$e->getMessage()}", ], 2); } + $this->printDumpOfSchema($schema); } $this->out(''); diff --git a/src/Commands/ValidateSchema.php b/src/Commands/ValidateSchema.php index 80255dc0..834c4458 100644 --- a/src/Commands/ValidateSchema.php +++ b/src/Commands/ValidateSchema.php @@ -75,7 +75,9 @@ protected function executeAction(): int $schemaErrors = new ErrorSuite($filename); try { - $schemaErrors = (new Schema($filename))->validate($this->isQuickMode()); + $schema = new Schema($filename); + $schemaErrors = $schema->validate($this->isQuickMode()); + $this->printDumpOfSchema(new Schema($filename)); } catch (ParseException $e) { $schemaErrors->addError(new Error('schema.syntax', $e->getMessage(), '', $e->getParsedLine())); } catch (\Throwable $e) { diff --git a/src/Csv/Column.php b/src/Csv/Column.php index 32f6aacd..e3dfaf72 100644 --- a/src/Csv/Column.php +++ b/src/Csv/Column.php @@ -33,21 +33,21 @@ final class Column private ?int $csvOffset = null; private int $schemaId; - private Data $column; + private Data $data; private array $rules; private array $aggRules; public function __construct(int $schemaId, array $config) { $this->schemaId = $schemaId; - $this->column = new Data($config); + $this->data = new Data($config); $this->rules = $this->prepareRuleSet('rules'); $this->aggRules = $this->prepareRuleSet('aggregate_rules'); } public function getName(): string { - return $this->column->getString('name', self::FALLBACK_VALUES['name']); + return $this->data->getString('name', self::FALLBACK_VALUES['name']); } public function getCsvOffset(): ?int @@ -62,7 +62,7 @@ public function getSchemaId(): int public function getDescription(): string { - return $this->column->getString('description', self::FALLBACK_VALUES['description']); + return $this->data->getString('description', self::FALLBACK_VALUES['description']); } public function getHumanName(): string @@ -78,7 +78,7 @@ public function getHumanName(): string public function isRequired(): bool { - return $this->column->getBool('required', self::FALLBACK_VALUES['required']); + return $this->data->getBool('required', self::FALLBACK_VALUES['required']); } public function getRules(): array @@ -106,11 +106,16 @@ public function setCsvOffset(int $csvOffset): void $this->csvOffset = $csvOffset; } + public function getData(): Data + { + return clone $this->data; + } + private function prepareRuleSet(string $schemaKey): array { $rules = []; - $ruleSetConfig = $this->column->getSelf($schemaKey, [])->getArrayCopy(); + $ruleSetConfig = $this->data->getSelf($schemaKey, [])->getArrayCopy(); foreach ($ruleSetConfig as $ruleName => $ruleValue) { $rules[$ruleName] = $ruleValue; } diff --git a/src/Rules/Cell/ContainsNone.php b/src/Rules/Cell/ContainsNone.php index 43a50af6..b8c9cdd5 100644 --- a/src/Rules/Cell/ContainsNone.php +++ b/src/Rules/Cell/ContainsNone.php @@ -41,8 +41,8 @@ public function validateRule(string $cellValue): ?string foreach ($exclusions as $exclusion) { if (\strpos($cellValue, $exclusion) !== false) { - return "Value \"{$cellValue}\" must not contain any of the following: " . - Utils::printList($exclusions, 'green'); + return "Value \"{$cellValue}\" must not contain the string: " . + Utils::printList($exclusion, 'green'); } } diff --git a/src/Schema.php b/src/Schema.php index ed33deb4..501fdcd1 100644 --- a/src/Schema.php +++ b/src/Schema.php @@ -21,6 +21,7 @@ use JBZoo\CsvBlueprint\Validators\ValidatorSchema; use JBZoo\Data\AbstractData; use JBZoo\Data\Data; +use Symfony\Component\Yaml\Yaml; use function JBZoo\Data\json; use function JBZoo\Data\phpArray; @@ -32,26 +33,8 @@ final class Schema public const ENCODING_UTF16 = 'utf-16'; public const ENCODING_UTF32 = 'utf-32'; - private const FALLBACK_VALUES = [ - 'csv' => [ - 'inherit' => null, - 'header' => true, - 'delimiter' => ',', - 'quote_char' => '\\', - 'enclosure' => '"', - 'encoding' => 'utf-8', - 'bom' => false, - ], - - 'structural_rules' => [ - 'strict_column_order' => true, - 'allow_extra_columns' => false, - ], - ]; - /** @var Column[] */ private array $columns; - private string $basepath = '.'; private ?string $filename; private AbstractData $data; @@ -59,22 +42,21 @@ public function __construct(null|array|string $csvSchemaFilenameOrArray = null) { if (\is_array($csvSchemaFilenameOrArray)) { $this->filename = '_custom_array_'; - $this->data = new Data($csvSchemaFilenameOrArray); + $data = new Data($csvSchemaFilenameOrArray); } elseif ( \is_string($csvSchemaFilenameOrArray) && $csvSchemaFilenameOrArray !== '' && \file_exists($csvSchemaFilenameOrArray) ) { $this->filename = $csvSchemaFilenameOrArray; - $this->data = new Data(); $fileExtension = \pathinfo($csvSchemaFilenameOrArray, \PATHINFO_EXTENSION); if ($fileExtension === 'yml' || $fileExtension === 'yaml') { - $this->data = yml($csvSchemaFilenameOrArray); + $data = yml($csvSchemaFilenameOrArray); } elseif ($fileExtension === 'json') { - $this->data = json($csvSchemaFilenameOrArray); + $data = json($csvSchemaFilenameOrArray); } elseif ($fileExtension === 'php') { - $this->data = phpArray($csvSchemaFilenameOrArray); + $data = phpArray($csvSchemaFilenameOrArray); } else { throw new \InvalidArgumentException("Unsupported file extension: {$fileExtension}"); } @@ -82,11 +64,22 @@ public function __construct(null|array|string $csvSchemaFilenameOrArray = null) throw new \InvalidArgumentException("Invalid schema data: {$csvSchemaFilenameOrArray}"); } else { $this->filename = null; - $this->data = new Data(); + $data = new Data(); + } + + $basepath = '.'; + $filename = (string)$this->filename; + if ($filename !== '' && \file_exists($filename)) { + $this->filename = (string)\realpath($filename); + $basepath = \dirname($filename); } - if ((string)$this->filename !== '') { - $this->basepath = \dirname((string)$this->filename); + try { + $this->data = (new SchemaDataPrep($data, $basepath))->buildData(); + } catch (\Exception $e) { + throw new \InvalidArgumentException( + "Invalid schema \"{$this->filename}\" data.\nUnexpected error: \"{$e->getMessage()}\"", + ); } $this->columns = $this->prepareColumns(); @@ -105,15 +98,39 @@ public function getColumns(): array return $this->columns; } - public function getColumn(int|string $columNameOrId): ?Column + public function getColumn(int|string $columNameOrId, ?string $forceName = null): ?Column { - if (\is_int($columNameOrId)) { + // By "index" + if (\is_numeric($columNameOrId)) { return \array_values($this->getColumns())[$columNameOrId] ?? null; } - foreach ($this->getColumns() as $schemaColumn) { - if ($schemaColumn->getName() === $columNameOrId) { - return $schemaColumn; + // by "index:" + if (\preg_match('/^(\d+):$/', $columNameOrId, $matches) !== 0) { + return $this->getColumn((int)$matches[1]); + } + + // by "index:name" + if (\preg_match('/^(\d+):(.*)$/', $columNameOrId, $matches) !== 0) { + return $this->getColumn((int)$matches[1], $matches[2]); + } + + if ($forceName !== null) { + // by "index:name" (real) + foreach ($this->getColumns() as $columnIndex => $schemaColumn) { + if ( + $columnIndex === (int)$columNameOrId + && $schemaColumn->getName() === $forceName + ) { + return $schemaColumn; + } + } + } else { + // by "name" + foreach ($this->getColumns() as $schemaColumn) { + if ($schemaColumn->getName() === $columNameOrId) { + return $schemaColumn; + } } } @@ -125,23 +142,6 @@ public function getFilenamePattern(): ?string return Utils::prepareRegex($this->data->getStringNull('filename_pattern')); } - public function getIncludes(): array - { - $result = []; - - foreach ($this->data->getArray('includes') as $alias => $includedPath) { - if (\file_exists($includedPath)) { - $path = $includedPath; - } else { - $path = $this->basepath . \DIRECTORY_SEPARATOR . $includedPath; - } - - $result[$alias] = new self($path); - } - - return $result; - } - public function validate(bool $quickStop = false): ErrorSuite { return (new ValidatorSchema($this))->validate($quickStop); @@ -173,12 +173,12 @@ public function isAllowExtraColumns(): bool public function csvHasBOM(): bool { - return $this->data->findBool('csv.bom', self::FALLBACK_VALUES['csv']['bom']); + return $this->data->findBool('csv.bom'); } public function getCsvDelimiter(): string { - $value = $this->data->findString('csv.delimiter', self::FALLBACK_VALUES['csv']['delimiter']); + $value = $this->data->findString('csv.delimiter'); if (\strlen($value) === 1) { return $value; } @@ -188,7 +188,7 @@ public function getCsvDelimiter(): string public function getCsvQuoteChar(): string { - $value = $this->data->findString('csv.quote_char', self::FALLBACK_VALUES['csv']['quote_char']); + $value = $this->data->findString('csv.quote_char'); if (\strlen($value) === 1) { return $value; } @@ -198,7 +198,7 @@ public function getCsvQuoteChar(): string public function getCsvEnclosure(): string { - $value = $this->data->findString('csv.enclosure', self::FALLBACK_VALUES['csv']['enclosure']); + $value = $this->data->findString('csv.enclosure'); if (\strlen($value) === 1) { return $value; @@ -210,7 +210,7 @@ public function getCsvEnclosure(): string public function getCsvEncoding(): string { $encoding = \strtolower( - \trim($this->data->findString('csv.encoding', self::FALLBACK_VALUES['csv']['encoding'])), + \trim($this->data->findString('csv.encoding')), ); $availableOptions = [ // TODO: add flexible handler for this @@ -229,7 +229,7 @@ public function getCsvEncoding(): string public function csvHasHeader(): bool { - return $this->data->findBool('csv.header', self::FALLBACK_VALUES['csv']['header']); + return $this->data->findBool('csv.header'); } public function getCsvParams(): array @@ -252,6 +252,20 @@ public function getStructuralRulesParams(): array ]; } + public function dumpAsYamlString(): string + { + return Yaml::dump( + $this->getData()->getArrayCopy(), + 10, + 2, + Yaml::DUMP_NULL_AS_TILDE + | Yaml::DUMP_NUMERIC_KEY_AS_STRING + | Yaml::DUMP_MULTI_LINE_LITERAL_BLOCK + | Yaml::DUMP_EMPTY_ARRAY_AS_SEQUENCE + | Yaml::DUMP_EXCEPTION_ON_INVALID_TYPE, + ); + } + /** * @return Column[] */ diff --git a/src/SchemaDataPrep.php b/src/SchemaDataPrep.php new file mode 100644 index 00000000..62827bd5 --- /dev/null +++ b/src/SchemaDataPrep.php @@ -0,0 +1,251 @@ + '', + 'description' => '', + 'filename_pattern' => '', + + 'inlcudes' => [], + + 'csv' => [ + 'header' => true, + 'delimiter' => ',', + 'quote_char' => '\\', + 'enclosure' => '"', + 'encoding' => Schema::ENCODING_UTF8, + 'bom' => false, + ], + + 'structural_rules' => [ + 'strict_column_order' => true, + 'allow_extra_columns' => false, + ], + + 'column' => [ + 'name' => '', + 'description' => '', + 'example' => null, + 'required' => true, + 'rules' => [], + 'aggregate_rules' => [], + ], + + 'rules' => [], + 'aggregate_rules' => [], + ]; + + private AbstractData $data; + private string $basepath; + + /** @var Schema[] */ + private array $aliases; + + public function __construct(AbstractData $data, string $basepath) + { + $this->data = $data; + $this->basepath = $basepath; + $this->aliases = $this->prepareAliases($data); + } + + public function buildData(): Data + { + $result = [ + 'name' => $this->buildName(), + 'description' => $this->buildDescription(), + 'presets' => $this->buildPresets(), + 'filename_pattern' => $this->buildByKey('filename_pattern')[0], + 'csv' => $this->buildByKey('csv'), + 'structural_rules' => $this->buildByKey('structural_rules'), + 'columns' => $this->buildColumns(), + ]; + + // Any extra keys to see schema validation errors + foreach ($this->data->getArrayCopy() as $key => $value) { + if (!isset($result[$key])) { + $result[$key] = $value; + } + } + + return new Data($result); + } + + public static function getAliasRegex(): string + { + return '/^' . self::ALIAS_REGEX . '$/i'; + } + + public static function validateAlias(string $alias): void + { + if ($alias === '') { + throw new \InvalidArgumentException('Empty alias'); + } + + $regex = self::getAliasRegex(); + if ($regex !== '' && \preg_match($regex, $alias) === 0) { + throw new \InvalidArgumentException("Invalid alias: \"{$alias}\""); + } + } + + /** + * @return Schema[] + */ + private function prepareAliases(AbstractData $data): array + { + $presets = []; + + foreach ($data->getArray('presets') as $alias => $includedPathOrArray) { + $alias = (string)$alias; + + self::validateAlias($alias); + + if (\is_array($includedPathOrArray)) { + $presets[$alias] = new Schema($includedPathOrArray); + } elseif (\file_exists($includedPathOrArray)) { + $presets[$alias] = (new Schema($includedPathOrArray)); + } elseif (\file_exists("{$this->basepath}/{$includedPathOrArray}")) { + $presets[$alias] = (new Schema("{$this->basepath}/{$includedPathOrArray}")); + } else { + throw new \InvalidArgumentException("Unknown included file: \"{$includedPathOrArray}\""); + } + } + + return $presets; + } + + private function getParentSchema(string $alias): Schema + { + if (isset($this->aliases[$alias])) { + return $this->aliases[$alias]; + } + + throw new \InvalidArgumentException("Unknown included alias: \"{$alias}\""); + } + + private function buildPresets(): array + { + $result = []; + foreach ($this->aliases as $alias => $schema) { + $result[$alias] = $schema->getFilename(); + } + + return $result; + } + + private function buildName(): string + { + return $this->data->getString('name', self::DEFAULTS['name']); + } + + private function buildDescription(): string + { + return $this->data->getString('description', self::DEFAULTS['description']); + } + + private function buildByKey(string $key = 'structural_rules'): array + { + $preset = $this->data->findString("{$key}.preset"); + + $parentConfig = []; + if ($preset !== '') { + $presetParts = self::parseAliasParts($preset); + $parent = $this->getParentSchema($presetParts['alias']); + $parentConfig = $parent->getData()->getArray($key); + } + + $result = Utils::mergeConfigs((array)self::DEFAULTS[$key], $parentConfig, $this->data->getArray($key)); + unset($result['preset']); + + return $result; + } + + private function buildColumns(): array + { + $columns = []; + + foreach ($this->data->getArray('columns') as $columnIndex => $column) { + $columnData = new Data($column); + $columnpreset = $columnData->getString('preset'); + + $parentConfig = []; + if ($columnpreset !== '') { + $presetParts = self::parseAliasParts($columnpreset); + $parent = $this->getParentSchema($presetParts['alias']); + $parentColumn = $parent->getColumn($presetParts['column']); + if ($parentColumn === null) { + throw new \InvalidArgumentException( + "Unknown column: \"{$presetParts['column']}\" by alias: \"{$presetParts['alias']}\"", + ); + } + + $parentConfig = $parentColumn->getData()->getArrayCopy(); + } + + $actualColumn = Utils::mergeConfigs(self::DEFAULTS['column'], $parentConfig, $columnData->getArrayCopy()); + $actualColumn['rules'] = $this->buildRules($actualColumn['rules'], 'rules'); + $actualColumn['aggregate_rules'] = $this->buildRules($actualColumn['aggregate_rules'], 'aggregate_rules'); + + unset($actualColumn['preset']); + + $columns[$columnIndex] = $actualColumn; + } + + return $columns; + } + + private function buildRules(array $rules, string $typeOfRules): array + { + $preset = $rules['preset'] ?? ''; + + $parentConfig = []; + if ($preset !== '') { + $presetParts = self::parseAliasParts($preset); + $parent = $this->getParentSchema($presetParts['alias']); + $parentColumn = $parent->getColumn($presetParts['column']); + if ($parentColumn === null) { + throw new \InvalidArgumentException("Unknown column: \"{$presetParts['column']}\""); + } + + $parentConfig = $parentColumn->getData()->getArray($typeOfRules); + } + + $actualRules = Utils::mergeConfigs((array)self::DEFAULTS[$typeOfRules], $parentConfig, $rules); + unset($actualRules['preset']); + + return $actualRules; + } + + private static function parseAliasParts(string $preset): array + { + $parts = \explode('/', $preset); + self::validateAlias($parts[0]); + + if (\count($parts) === 1) { + return ['alias' => $parts[0]]; + } + + return ['alias' => $parts[0], 'column' => $parts[1]]; + } +} diff --git a/src/Utils.php b/src/Utils.php index 116f1be2..d94e2a69 100644 --- a/src/Utils.php +++ b/src/Utils.php @@ -204,9 +204,33 @@ public static function compareArray( ): array { $differences = []; + // Exclude array params for some rules because it's not necessary to compare them. + // They have random values, and it's hard to predict them. + $excludeArrayParamsFor = [ + 'rules.contains_none', + 'rules.allow_values', + 'rules.not_allow_values', + 'rules.contains_none', + 'rules.contains_one', + 'rules.contains_any', + 'rules.contains_all', + 'rules.ip_v4_range', + ]; + foreach ($actualSchema as $key => $value) { $curPath = $path === '' ? (string)$key : "{$path}.{$key}"; + if (\in_array($curPath, $excludeArrayParamsFor, true)) { + if (!\is_array($value)) { + $differences[$columnId . '/' . $curPath] = [ + $columnId, + 'Expected type "array", actual "' . \gettype($value) . '" in ' . + ".{$keyPrefix}.{$curPath}", + ]; + } + continue; + } + if (!\array_key_exists($key, $expectedSchema)) { if (\strlen($keyPrefix) <= 1) { $message = "Unknown key: .{$curPath}"; @@ -246,12 +270,12 @@ public static function matchTypes( $actualType = \gettype($actual); $mapOfValidConvertions = [ - 'NULL' => [], + 'NULL' => ['string', 'integer', 'double', 'boolean'], 'array' => [], 'boolean' => [], - 'double' => ['string', 'integer'], - 'integer' => [], - 'string' => ['double', 'integer'], + 'double' => ['NULL', 'string', 'integer'], + 'integer' => ['NULL'], + 'string' => ['NULL', 'double', 'integer'], ]; if ($expectedType === $actualType) { @@ -412,8 +436,8 @@ public static function fixArgv(array $originalArgs): array continue; } - if (\str_starts_with($argValue, 'extra:')) { - $extraArgs = \str_replace('extra:', '', $argValue); + if (\str_starts_with($argValue, 'extra:') || \str_starts_with($argValue, 'options:')) { + $extraArgs = \str_replace(['extra:', 'options:'], '', $argValue); $flags = \array_filter( \array_map('trim', \explode(' ', $extraArgs)), static fn ($flag): bool => $flag !== '', @@ -430,6 +454,36 @@ public static function fixArgv(array $originalArgs): array return $newArgumens; } + /** + * @param array|int[]|string[] ...$configs + */ + public static function mergeConfigs(array ...$configs): array + { + $merged = (array)\array_shift($configs); // Start with the first array + + foreach ($configs as $config) { + foreach ($config as $key => $value) { + // If both values are arrays + if (isset($merged[$key]) && \is_array($merged[$key]) && \is_array($value)) { + // Check if arrays are associative (assuming keys are consistent across values for simplicity) + $isAssoc = \array_keys($value) !== \range(0, \count($value) - 1); + if ($isAssoc) { + // Merge associative arrays recursively + $merged[$key] = self::mergeConfigs($merged[$key], $value); + } else { + // Replace non-associative arrays entirely + $merged[$key] = $value; + } + } else { + // Replace the value entirely + $merged[$key] = $value; + } + } + } + + return $merged; + } + /** * @param SplFileInfo[] $files */ diff --git a/src/Validators/ValidatorSchema.php b/src/Validators/ValidatorSchema.php index b4709a02..3b551e5a 100644 --- a/src/Validators/ValidatorSchema.php +++ b/src/Validators/ValidatorSchema.php @@ -129,6 +129,7 @@ private static function validateColumnExample(array $actualColumn, int $schemaCo { $exclude = [ 'Some example', // I.e. this value is taken from full.yml, then it will be invalid in advance. + null, ]; if (isset($actualColumn['example']) && !\in_array($actualColumn['example'], $exclude, true)) { @@ -144,7 +145,22 @@ private static function validateMeta( bool $quickStop = false, ): ErrorSuite { $errors = new ErrorSuite(); - $metaErrors = Utils::compareArray($expectedMeta, $actualMeta->getArrayCopy(), 'meta', '.'); + + $actualMetaAsArray = $actualMeta->getArrayCopy(); + $actualPresets = $actualMetaAsArray['presets'] ?? []; + unset($expectedMeta['presets'], $actualMetaAsArray['presets']); + + $metaErrors = Utils::compareArray($expectedMeta, $actualMetaAsArray, 'meta', '.'); + + foreach ($actualPresets as $alias => $includedFile) { + if ($alias === '') { + $errors->addError(new Error('presets', 'Defined alias is empty')); + } + + if (!\is_string($includedFile)) { + $errors->addError(new Error('presets', 'Included filepath must be a string')); + } + } foreach ($metaErrors as $metaError) { $errors->addError(new Error('schema', $metaError[1], $metaError[0])); diff --git a/tests/Commands/ValidateCsvBasicTest.php b/tests/Commands/ValidateCsvBasicTest.php index f2c51045..f7eab1b8 100644 --- a/tests/Commands/ValidateCsvBasicTest.php +++ b/tests/Commands/ValidateCsvBasicTest.php @@ -173,8 +173,8 @@ public function testInvalidSchemaNotMatched(): void +-------+------------+--------+-------------------------------------------------------------------------+ | Line | id:Column | Rule | Message | +-------+------------+--------+-------------------------------------------------------------------------+ - | undef | meta | schema | Unknown key: .unknow_root_option | | undef | meta | schema | Unknown key: .csv.unknow_csv_param | + | undef | meta | schema | Unknown key: .unknow_root_option | | undef | 0:Name | schema | Unknown key: .columns.0.rules.unknow_rule | | undef | 1:City | schema | Unknown key: .columns.1.unknow_colum_option | | undef | 3:Birthday | schema | Expected type "string", actual "boolean" in .columns.3.rules.date_max | @@ -221,8 +221,8 @@ public function testInvalidSchemaAndNotFoundCSV(): void +-------+------------+--------+-------------------------------------------------------------------------+ | Line | id:Column | Rule | Message | +-------+------------+--------+-------------------------------------------------------------------------+ - | undef | meta | schema | Unknown key: .unknow_root_option | | undef | meta | schema | Unknown key: .csv.unknow_csv_param | + | undef | meta | schema | Unknown key: .unknow_root_option | | undef | 0:Name | schema | Unknown key: .columns.0.rules.unknow_rule | | undef | 1:City | schema | Unknown key: .columns.1.unknow_colum_option | | undef | 3:Birthday | schema | Expected type "string", actual "boolean" in .columns.3.rules.date_max | diff --git a/tests/Commands/ValidateCsvBatchSchemaTest.php b/tests/Commands/ValidateCsvBatchSchemaTest.php index e254eeef..829854b2 100644 --- a/tests/Commands/ValidateCsvBatchSchemaTest.php +++ b/tests/Commands/ValidateCsvBatchSchemaTest.php @@ -55,8 +55,8 @@ public function testMultiSchemaDiscovery(): void +-------+------------+--------+-------------------------------------------------------------------------+ | Line | id:Column | Rule | Message | +-------+------------+--------+-------------------------------------------------------------------------+ - | undef | meta | schema | Unknown key: .unknow_root_option | | undef | meta | schema | Unknown key: .csv.unknow_csv_param | + | undef | meta | schema | Unknown key: .unknow_root_option | | undef | 0:Name | schema | Unknown key: .columns.0.rules.unknow_rule | | undef | 1:City | schema | Unknown key: .columns.1.unknow_colum_option | | undef | 3:Birthday | schema | Expected type "string", actual "boolean" in .columns.3.rules.date_max | diff --git a/tests/Commands/ValidateCsvReportsTest.php b/tests/Commands/ValidateCsvReportsTest.php index e3409e48..cc3aef57 100644 --- a/tests/Commands/ValidateCsvReportsTest.php +++ b/tests/Commands/ValidateCsvReportsTest.php @@ -123,9 +123,9 @@ public function testGithub(): void Check schema syntax: 1 2 issues in ./tests/schemas/demo_invalid.yml - ::error file=./tests/schemas/demo_invalid.yml::is_float at column 2:Float%0A"is_float", column "2:Float". Value "Qwerty" is not a float number. + ::error file=/tests/schemas/demo_invalid.yml::is_float at column 2:Float%0A"is_float", column "2:Float". Value "Qwerty" is not a float number. - ::error file=./tests/schemas/demo_invalid.yml::allow_values at column 4:Favorite color%0A"allow_values", column "4:Favorite color". Value "123" is not allowed. Allowed values: ["red", "green", "Blue"]. + ::error file=/tests/schemas/demo_invalid.yml::allow_values at column 4:Favorite color%0A"allow_values", column "4:Favorite color". Value "123" is not allowed. Allowed values: ["red", "green", "Blue"]. CSV file validation: 1 @@ -171,11 +171,11 @@ public function testTeamcity(): void ##teamcity[testSuiteStarted name='tests/schemas/demo_invalid.yml' flowId='42'] - ##teamcity[testStarted name='is_float at column 2:Float' locationHint='php_qn://./tests/schemas/demo_invalid.yml' flowId='42'] + ##teamcity[testStarted name='is_float at column 2:Float' locationHint='php_qn:///tests/schemas/demo_invalid.yml' flowId='42'] "is_float", column "2:Float". Value "Qwerty" is not a float number. ##teamcity[testFinished name='is_float at column 2:Float' flowId='42'] - ##teamcity[testStarted name='allow_values at column 4:Favorite color' locationHint='php_qn://./tests/schemas/demo_invalid.yml' flowId='42'] + ##teamcity[testStarted name='allow_values at column 4:Favorite color' locationHint='php_qn:///tests/schemas/demo_invalid.yml' flowId='42'] "allow_values", column "4:Favorite color". Value "123" is not allowed. Allowed values: ["red", "green", "Blue"]. ##teamcity[testFinished name='allow_values at column 4:Favorite color' flowId='42'] @@ -241,10 +241,10 @@ public function testJunit(): void - + "is_float", column "2:Float". Value "Qwerty" is not a float number. - + "allow_values", column "4:Favorite color". Value "123" is not allowed. Allowed values: ["red", "green", "Blue"]. @@ -302,7 +302,7 @@ public function testGitlab(): void "fingerprint": "_replaced_", "severity": "major", "location": { - "path": ".\/tests\/schemas\/demo_invalid.yml", + "path": "\/tests\/schemas\/demo_invalid.yml", "lines": { "begin": 0 } @@ -313,7 +313,7 @@ public function testGitlab(): void "fingerprint": "_replaced_", "severity": "major", "location": { - "path": ".\/tests\/schemas\/demo_invalid.yml", + "path": "\/tests\/schemas\/demo_invalid.yml", "lines": { "begin": 0 } diff --git a/tests/ExampleSchemasTest.php b/tests/ExampleSchemasTest.php index 26720b19..82b951f7 100644 --- a/tests/ExampleSchemasTest.php +++ b/tests/ExampleSchemasTest.php @@ -29,6 +29,7 @@ final class ExampleSchemasTest extends TestCase public function testFullListOfRules(): void { $rulesInConfig = yml(Tools::SCHEMA_FULL_YML)->findArray('columns.0.rules'); + unset($rulesInConfig['preset']); $rulesInConfig = \array_keys($rulesInConfig); \sort($rulesInConfig, \SORT_NATURAL); @@ -82,17 +83,75 @@ public function testFullListOfRules(): void ); } + public function testFullListOfAggregateRules(): void + { + $rulesInConfig = yml(Tools::SCHEMA_FULL_YML)->findArray('columns.0.aggregate_rules'); + unset($rulesInConfig['preset']); + $rulesInConfig = \array_keys($rulesInConfig); + \sort($rulesInConfig, \SORT_NATURAL); + + $finder = (new Finder()) + ->files() + ->in(PROJECT_ROOT . '/src/Rules/Aggregate') + ->ignoreDotFiles(false) + ->ignoreVCS(true) + ->name('/\\.php$/') + ->sortByName(true); + + foreach ($finder as $file) { + $ruleName = Utils::camelToKebabCase($file->getFilenameWithoutExtension()); + + if (\str_contains($ruleName, 'abstract')) { + continue; + } + + if (\str_contains($ruleName, 'combo_')) { + $ruleName = \str_replace('combo_', '', $ruleName); + $rulesInCode[] = $ruleName; + $rulesInCode[] = "{$ruleName}_min"; + $rulesInCode[] = "{$ruleName}_greater"; + $rulesInCode[] = "{$ruleName}_not"; + $rulesInCode[] = "{$ruleName}_less"; + $rulesInCode[] = "{$ruleName}_max"; + } else { + $rulesInCode[] = $ruleName; + } + } + \sort($rulesInCode, \SORT_NATURAL); + + isSame( + $rulesInCode, + $rulesInConfig, + "New: \n" . \array_reduce( + \array_diff($rulesInConfig, $rulesInCode), + static fn (string $carry, string $item) => $carry . "{$item}: NEW\n", + '', + ), + ); + + isSame( + $rulesInCode, + $rulesInConfig, + "Not exists: \n" . \array_reduce( + \array_diff($rulesInCode, $rulesInConfig), + static fn (string $carry, string $item) => $carry . "{$item}: FIXME\n", + '', + ), + ); + } + public function testCsvDefaultValues(): void { - isSame(yml(Tools::SCHEMA_FULL_YML)->findArray('csv'), (new Schema([]))->getCsvParams()); + $full = yml(Tools::SCHEMA_FULL_YML)->findArray('csv'); + unset($full['preset']); + isSame($full, (new Schema([]))->getCsvParams()); } public function testStructuralRules(): void { - isSame( - yml(Tools::SCHEMA_FULL_YML)->findArray('structural_rules'), - (new Schema([]))->getStructuralRulesParams(), - ); + $full = yml(Tools::SCHEMA_FULL_YML)->findArray('structural_rules'); + unset($full['preset']); + isSame($full, (new Schema([]))->getStructuralRulesParams()); } public function testCheckPhpExample(): void @@ -123,8 +182,14 @@ public function testUniqueNameOfRules(): void { $yml = yml(Tools::SCHEMA_FULL_YML); - $rules = \array_keys($yml->findArray('columns.0.rules')); - $agRules = \array_keys($yml->findArray('columns.0.aggregate_rules')); + $rules = $yml->findArray('columns.0.rules'); + unset($rules['preset']); + $rules = \array_keys($rules); + + $agRules = $yml->findArray('columns.0.aggregate_rules'); + unset($agRules['preset']); + $agRules = \array_keys($agRules); + $notUnique = \array_intersect($rules, $agRules); isSame([], $notUnique, 'Rules names should be unique: ' . \implode(', ', $notUnique)); diff --git a/tests/GithubActionsTest.php b/tests/GithubActionsTest.php index 960113ed..6ff9a28d 100644 --- a/tests/GithubActionsTest.php +++ b/tests/GithubActionsTest.php @@ -51,32 +51,31 @@ public function testGitHubActionsReadMe(): void $examples = [ 'csv' => './tests/**/*.csv', 'schema' => './tests/**/*.yml', - 'report' => ErrorSuite::REPORT_DEFAULT, - 'quick' => 'no', - 'skip-schema' => 'no', + 'report' => "'" . ErrorSuite::REPORT_DEFAULT . "'", + 'quick' => "'no'", + 'skip-schema' => "'no'", + 'extra' => "'options: --ansi'", ]; $expectedMessage = [ '```yml', - '- uses: jbzoo/csv-blueprint@master # See the specific version on releases page', + '- uses: jbzoo/csv-blueprint@master # See the specific version on releases page. `@master` is latest.', ' with:', ]; foreach ($inputs as $key => $input) { - if ($key === 'extra') { - continue; - } - - $expectedMessage[] = ' # ' . \trim($input['description']); + $expectedMessage[] = ' # ' . \trim(\str_replace("\n", "\n # ", \trim($input['description']))); if (isset($input['default'])) { - $expectedMessage[] = " # Default value: {$input['default']}"; + $expectedMessage[] = " # Default value: '{$input['default']}'"; } if (isset($input['default']) && $examples[$key] === $input['default']) { - $expectedMessage[] = ' # You can skip it'; + $expectedMessage[] = ' # You can skip it.'; } elseif (isset($input['required']) && $input['required']) { $expectedMessage[] = ' # Required: true'; + } elseif ($key === 'extra') { + $expectedMessage[] = ' # You can skip it.'; } if ($key === 'csv' || $key === 'schema') { diff --git a/tests/ReadmeTest.php b/tests/ReadmeTest.php index a1080e44..5e09d647 100644 --- a/tests/ReadmeTest.php +++ b/tests/ReadmeTest.php @@ -16,6 +16,8 @@ namespace JBZoo\PHPUnit; +use JBZoo\CsvBlueprint\Schema; +use JBZoo\CsvBlueprint\SchemaDataPrep; use JBZoo\Utils\Cli; use JBZoo\Utils\Str; use Symfony\Component\Console\Input\StringInput; @@ -92,8 +94,8 @@ public function testTableOutputExample(): void public function testBadgeOfRules(): void { - $cellRules = \count(yml(Tools::SCHEMA_FULL_YML)->findArray('columns.0.rules')); - $aggRules = \count(yml(Tools::SCHEMA_FULL_YML)->findArray('columns.0.aggregate_rules')); + $cellRules = \count(yml(Tools::SCHEMA_FULL_YML)->findArray('columns.0.rules')) - 1; + $aggRules = \count(yml(Tools::SCHEMA_FULL_YML)->findArray('columns.0.aggregate_rules')) - 1; $extraRules = \count(self::EXTRA_RULES); $totalRules = $cellRules + $aggRules + $extraRules; @@ -105,15 +107,10 @@ public function testBadgeOfRules(): void 'csv.auto_detect', 'csv.end_of_line', 'csv.null_values', + 'filename_pattern - multiple', 'column.faker', 'column.null_values', 'column.multiple + column.multiple_separator', - 'inherit.', - 'inherit.csv', - 'inherit.structural_rules', - 'inherit.rules', - 'inherit.aggregate_rules', - 'inherit.complex_rules', ]) + \count($todoYml->findArray('structural_rules')) + \count($todoYml->findArray('complex_rules')), ]); @@ -173,6 +170,70 @@ public function testCheckSimpleYmlSchemaExampleInReadme(): void Tools::insertInReadme('readme-sample-yml', $text); } + public function testCheckPresetUsersExampleInReadme(): void + { + $ymlContent = \implode( + "\n", + \array_slice(\explode("\n", \file_get_contents('./schema-examples/preset_users.yml')), 12), + ); + + $text = \implode("\n", ['```yml', \trim($ymlContent), '```']); + + Tools::insertInReadme('preset-users-yml', $text); + } + + public function testCheckPresetFeaturesExampleInReadme(): void + { + $ymlContent = \implode( + "\n", + \array_slice(\explode("\n", \file_get_contents('./schema-examples/preset_features.yml')), 12), + ); + + $text = \implode("\n", ['```yml', \trim($ymlContent), '```']); + + Tools::insertInReadme('preset-features-yml', $text); + } + + public function testCheckPresetRegexInReadme(): void + { + $text = '`' . SchemaDataPrep::getAliasRegex() . '`'; + isFileContains($text, PROJECT_ROOT . '/README.md'); + } + + public function testCheckPresetDatabaseExampleInReadme(): void + { + $ymlContent = \implode( + "\n", + \array_slice(\explode("\n", \file_get_contents('./schema-examples/preset_database.yml')), 12), + ); + + $text = \implode("\n", ['```yml', \trim($ymlContent), '```']); + + Tools::insertInReadme('preset-database-yml', $text); + } + + public function testCheckPresetUsageExampleInReadme(): void + { + $ymlContent = \implode( + "\n", + \array_slice(\explode("\n", \file_get_contents('./schema-examples/preset_usage.yml')), 12), + ); + + $text = \implode("\n", ['```yml', \trim($ymlContent), '```']); + + Tools::insertInReadme('preset-usage-yml', $text); + } + + public function testCheckPresetUsageRealExampleInReadme(): void + { + $schema = new Schema('./schema-examples/preset_usage.yml'); + + $text = \implode("\n", ['```yml', \trim($schema->dumpAsYamlString()), '```']); + $text = \str_replace(PROJECT_ROOT, '.', $text); + + Tools::insertInReadme('preset-usage-real-yml', $text); + } + public function testAdditionalValidationRules(): void { $list[] = ''; diff --git a/tests/Rules/Cell/ContainsNoneTest.php b/tests/Rules/Cell/ContainsNoneTest.php index 6e83d742..b34f011c 100644 --- a/tests/Rules/Cell/ContainsNoneTest.php +++ b/tests/Rules/Cell/ContainsNoneTest.php @@ -45,13 +45,13 @@ public function testNegative(): void $rule = $this->create(['a', 'b', 'c']); isSame( - 'Value "a" must not contain any of the following: ["a", "b", "c"]', + 'Value "a" must not contain the string: "a"', $rule->test('a'), ); $rule = $this->create(['a', 'b', 'c']); isSame( - 'Value "ddddb" must not contain any of the following: ["a", "b", "c"]', + 'Value "ddddb" must not contain the string: "b"', $rule->test('ddddb'), ); } diff --git a/tests/SchemaPresetTest.php b/tests/SchemaPresetTest.php new file mode 100644 index 00000000..0bd36deb --- /dev/null +++ b/tests/SchemaPresetTest.php @@ -0,0 +1,758 @@ + '', + 'description' => '', + 'presets' => [], + 'filename_pattern' => '', + 'csv' => [ + 'header' => true, + 'delimiter' => ',', + 'quote_char' => '\\', + 'enclosure' => '"', + 'encoding' => 'utf-8', + 'bom' => false, + ], + 'structural_rules' => [ + 'strict_column_order' => true, + 'allow_extra_columns' => false, + ], + 'columns' => [], + ], $schema->getData()->getArrayCopy()); + + isSame('', (string)$schema->validate()); + } + + public function testOverideDefaults(): void + { + $schema = new Schema([ + 'name' => 'Qwerty', + 'description' => 'Some description.', + 'presets' => [], + 'filename_pattern' => '/.*/i', + 'csv' => [ + 'header' => false, + 'delimiter' => 'd', + 'quote_char' => 'q', + 'enclosure' => 'e', + 'encoding' => 'utf-16', + 'bom' => true, + ], + 'structural_rules' => [ + 'strict_column_order' => false, + 'allow_extra_columns' => true, + ], + 'columns' => [ + ['name' => 'Name', 'required' => true], + ['name' => 'Second Column', 'required' => false], + ], + ]); + + isSame([ + 'name' => 'Qwerty', + 'description' => 'Some description.', + 'presets' => [], + 'filename_pattern' => '/.*/i', + 'csv' => [ + 'header' => false, + 'delimiter' => 'd', + 'quote_char' => 'q', + 'enclosure' => 'e', + 'encoding' => 'utf-16', + 'bom' => true, + ], + 'structural_rules' => [ + 'strict_column_order' => false, + 'allow_extra_columns' => true, + ], + 'columns' => [ + [ + 'name' => 'Name', + 'description' => '', + 'example' => null, + 'required' => true, + 'rules' => [], + 'aggregate_rules' => [], + ], + [ + 'name' => 'Second Column', + 'description' => '', + 'example' => null, + 'required' => false, + 'rules' => [], + 'aggregate_rules' => [], + ], + ], + ], $schema->getData()->getArrayCopy()); + + isSame('', (string)$schema->validate()); + } + + public function testOverideFilenamePattern(): void + { + $schema = new Schema([ + 'presets' => [ + 'parent' => ['filename_pattern' => '/.*/i'], + ], + 'filename_pattern' => [ + 'preset' => 'parent', + ], + ]); + + isSame('/.*/i', $schema->getData()->getString('filename_pattern')); + isSame('', (string)$schema->validate()); + } + + public function testOverideCsvFull(): void + { + $schema = new Schema([ + 'presets' => [ + 'parent' => [ + 'csv' => [ + 'header' => false, + 'delimiter' => 'd', + 'quote_char' => 'q', + 'enclosure' => 'e', + 'encoding' => 'utf-16', + 'bom' => true, + ], + ], + ], + 'csv' => ['preset' => 'parent'], + ]); + + isSame([ + 'header' => false, + 'delimiter' => 'd', + 'quote_char' => 'q', + 'enclosure' => 'e', + 'encoding' => 'utf-16', + 'bom' => true, + ], $schema->getData()->getArray('csv')); + + isSame('', (string)$schema->validate()); + } + + public function testOverideCsvPartial(): void + { + $schema = new Schema([ + 'presets' => [ + 'parent' => [ + 'csv' => [ + 'header' => false, + 'delimiter' => 'd', + 'quote_char' => 'q', + 'bom' => true, + ], + ], + ], + 'csv' => [ + 'preset' => 'parent', + 'encoding' => 'utf-32', + ], + ]); + + isSame([ + 'header' => false, // parent value + 'delimiter' => 'd', // parent value + 'quote_char' => 'q', // parent value + 'enclosure' => '"', // default value + 'encoding' => 'utf-32', // child value + 'bom' => true, // parent value + ], $schema->getData()->getArray('csv')); + + isSame('', (string)$schema->validate()); + } + + public function testOverideStructuralRulesFull(): void + { + $schema = new Schema([ + 'presets' => [ + 'parent' => [ + 'structural_rules' => [ + 'strict_column_order' => false, + 'allow_extra_columns' => true, + ], + ], + ], + 'structural_rules' => [ + 'preset' => 'parent', + ], + ]); + + isSame([ + 'strict_column_order' => false, + 'allow_extra_columns' => true, + ], $schema->getData()->getArray('structural_rules')); + + isSame('', (string)$schema->validate()); + } + + public function testOverideStructuralRulesPartial1(): void + { + $schema = new Schema([ + 'presets' => [ + 'parent' => [ + 'structural_rules' => [ + 'strict_column_order' => true, + 'allow_extra_columns' => false, + ], + ], + ], + 'structural_rules' => [ + 'preset' => 'parent', + 'allow_extra_columns' => true, + ], + ]); + + isSame([ + 'strict_column_order' => true, // parent value + 'allow_extra_columns' => true, // child value + ], $schema->getData()->getArray('structural_rules')); + isSame('', (string)$schema->validate()); + } + + public function testOverideStructuralRulesPartial2(): void + { + $schema = new Schema([ + 'presets' => ['parent' => ['structural_rules' => []]], + 'structural_rules' => [ + 'preset' => 'parent', + 'allow_extra_columns' => true, + ], + ]); + + isSame([ + 'strict_column_order' => true, // default value + 'allow_extra_columns' => true, // parent value + ], $schema->getData()->getArray('structural_rules')); + isSame('', (string)$schema->validate()); + } + + public function testOverideColumnFull(): void + { + $parentColum0 = [ + 'name' => 'Name', + 'description' => 'Description', + 'example' => '123', + 'required' => false, + 'rules' => ['not_empty' => true], + 'aggregate_rules' => ['sum' => 10], + ]; + + $parentColum1 = [ + 'name' => 'Name', + 'description' => 'Another Description', + 'example' => '234', + 'required' => false, + 'rules' => ['is_int' => true], + 'aggregate_rules' => ['sum_max' => 100], + ]; + + $schema = new Schema([ + 'presets' => ['parent' => ['columns' => [$parentColum0, $parentColum1]]], + 'columns' => [ + ['preset' => 'parent/0'], + ['preset' => 'parent/1'], + ['preset' => 'parent/0:'], + ['preset' => 'parent/1:'], + ['preset' => 'parent/Name'], + ['preset' => 'parent/0:Name'], + ['preset' => 'parent/1:Name'], + ], + ]); + + isSame([ + $parentColum0, + $parentColum1, + $parentColum0, + $parentColum1, + $parentColum0, + $parentColum0, + $parentColum1, + ], $schema->getData()->getArray('columns')); + isSame('', (string)$schema->validate()); + } + + public function testOverideColumnPartial(): void + { + $parentColum = [ + 'name' => 'Name', + 'description' => 'Description', + 'rules' => [ + 'allow_values' => ['a', 'b', 'c'], + 'length_min' => 1, + 'length' => 5, + 'length_max' => 10, + ], + 'aggregate_rules' => ['sum_max' => 42], + ]; + + $schema = new Schema([ + 'presets' => ['parent' => ['columns' => [$parentColum]]], + 'columns' => [ + [ + 'preset' => 'parent/Name', + 'name' => 'Child name', + 'rules' => [ + 'is_int' => true, + 'length_min' => 2, + 'length' => 5, + 'allow_values' => ['c'], + ], + ], + ], + ]); + + isSame([ + [ + 'name' => 'Child name', // Child + 'description' => 'Description', // Parent + 'example' => null, // Default + 'required' => true, // Default + 'rules' => [ + 'allow_values' => ['c'], // Child + 'length_min' => 2, // Child + 'length' => 5, // Parent + 'length_max' => 10, // Parent + 'is_int' => true, // Child + ], + 'aggregate_rules' => ['sum_max' => 42], // Parent + ], + ], $schema->getData()->getArray('columns')); + isSame('', (string)$schema->validate()); + } + + public function testOverideColumnRulesFull(): void + { + $parentColum = [ + 'rules' => [ + 'allow_values' => ['a', 'b', 'c'], + 'length_min' => 1, + 'length' => 5, + 'length_max' => 10, + ], + 'aggregate_rules' => [ + 'sum_max' => 42, + 'is_unique' => true, + ], + ]; + + $schema = new Schema([ + 'presets' => ['parent' => ['columns' => [$parentColum]]], + 'columns' => [ + [ + 'name' => 'Child name', + 'rules' => ['preset' => 'parent/0:'], + ], + ], + ]); + + isSame([ + [ + 'name' => 'Child name', // Child + 'description' => '', // Default + 'example' => null, // Default + 'required' => true, // Default + 'rules' => [ // Parent All + 'allow_values' => ['a', 'b', 'c'], + 'length_min' => 1, + 'length' => 5, + 'length_max' => 10, + ], + 'aggregate_rules' => [], // Default + ], + ], $schema->getData()->getArray('columns')); + isSame('', (string)$schema->validate()); + } + + public function testOverideColumnRulesPartial(): void + { + $parentColum = [ + 'rules' => [ + 'allow_values' => ['a', 'b', 'c'], + 'length_min' => 1, + 'length' => 5, + 'length_max' => 10, + ], + 'aggregate_rules' => [ + 'sum_max' => 42, + 'is_unique' => true, + ], + ]; + + $schema = new Schema([ + 'presets' => ['parent' => ['columns' => [$parentColum]]], + 'columns' => [ + [ + 'name' => 'Child name', + 'rules' => [ + 'preset' => 'parent/0:', + 'allow_values' => ['d', 'c'], + 'length_max' => 100, + ], + ], + ], + ]); + + isSame([ + [ + 'name' => 'Child name', // Child + 'description' => '', // Default + 'example' => null, // Default + 'required' => true, // Default + 'rules' => [ + 'allow_values' => ['d', 'c'], // Child + 'length_min' => 1, // Parent + 'length' => 5, // Parent + 'length_max' => 100, // Child + ], + 'aggregate_rules' => [], // Default + ], + ], $schema->getData()->getArray('columns')); + isSame('', (string)$schema->validate()); + } + + public function testOverideColumnAggregateRulesFull(): void + { + $parentColum = [ + 'rules' => [ + 'allow_values' => ['a', 'b', 'c'], + 'length_min' => 1, + 'length' => 5, + 'length_max' => 10, + ], + 'aggregate_rules' => [ + 'sum_max' => 42, + 'is_unique' => true, + ], + ]; + + $schema = new Schema([ + 'presets' => ['parent' => ['columns' => [$parentColum]]], + 'columns' => [ + [ + 'name' => 'Child name', + 'aggregate_rules' => ['preset' => 'parent/0:'], + ], + ], + ]); + + isSame([ + [ + 'name' => 'Child name', // Child + 'description' => '', // Default + 'example' => null, // Default + 'required' => true, // Default + 'rules' => [], // default + 'aggregate_rules' => [ // Parent All + 'sum_max' => 42, + 'is_unique' => true, + ], + ], + ], $schema->getData()->getArray('columns')); + isSame('', (string)$schema->validate()); + } + + public function testOverideColumnAggregateRulesPartial(): void + { + $parentColum = [ + 'rules' => [ + 'allow_values' => ['a', 'b', 'c'], + 'length_min' => 1, + 'length' => 5, + 'length_max' => 10, + ], + 'aggregate_rules' => [ + 'sum_max' => 42, + 'is_unique' => true, + ], + ]; + + $schema = new Schema([ + 'presets' => ['parent' => ['columns' => [$parentColum]]], + 'columns' => [ + [ + 'name' => 'Child name', + 'aggregate_rules' => [ + 'preset' => 'parent/0:', + 'sum_max' => 4200, + 'sum_min' => 1, + ], + ], + ], + ]); + + isSame([ + [ + 'name' => 'Child name', // Child + 'description' => '', // Default + 'example' => null, // Default + 'required' => true, // Default + 'rules' => [], // default + 'aggregate_rules' => [ + 'sum_max' => 4200, // Child + 'is_unique' => true, // Parent + 'sum_min' => 1, // Child + ], + ], + ], $schema->getData()->getArray('columns')); + isSame('', (string)$schema->validate()); + } + + public function testRealParent(): void + { + $schema = new Schema('./tests/schemas/preset/parent.yml'); + isSame([ + 'name' => 'Parent schema', + 'description' => '', + 'presets' => [], + 'filename_pattern' => '/preset-\d.csv$/i', + 'csv' => [ + 'header' => false, + 'delimiter' => 'd', + 'quote_char' => 'q', + 'enclosure' => 'e', + 'encoding' => 'utf-16', + 'bom' => true, + ], + 'structural_rules' => [ + 'strict_column_order' => false, + 'allow_extra_columns' => true, + ], + 'columns' => [ + [ + 'name' => 'Name', + 'description' => 'Full name of the person.', + 'example' => 'John D', + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 5, + 'length_max' => 7, + ], + 'aggregate_rules' => [ + 'nth_num' => [4, 0.001], + ], + ], + [ + 'name' => 'Second Column', + 'description' => 'Some number.', + 'example' => 123, + 'required' => false, + 'rules' => [ + 'length_min' => 1, + 'length_max' => 4, + ], + 'aggregate_rules' => [ + 'sum' => 1000, + ], + ], + ], + ], $schema->getData()->getArrayCopy()); + isSame('', (string)$schema->validate()); + } + + public function testRealChild(): void + { + $schema = new Schema('./tests/schemas/preset/child.yml'); + isSame([ + 'name' => 'Child schema', + 'description' => '', + 'presets' => [ + 'preset' => PROJECT_ROOT . '/tests/schemas/preset/parent.yml', + ], + 'filename_pattern' => '/preset-\d.csv$/i', + 'csv' => [ + 'header' => true, + 'delimiter' => 'd', + 'quote_char' => 'q', + 'enclosure' => 'e', + 'encoding' => 'utf-16', + 'bom' => true, + ], + 'structural_rules' => [ + 'strict_column_order' => true, + 'allow_extra_columns' => true, + ], + 'columns' => [ + 0 => [ + 'name' => 'Name', + 'description' => 'Full name of the person.', + 'example' => 'John D', + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 5, + 'length_max' => 7, + ], + 'aggregate_rules' => ['nth_num' => [4, 0.001]], + ], + 1 => [ + 'name' => 'Overridden name by column name', + 'description' => 'Full name of the person.', + 'example' => 'John D', + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 5, + 'length_max' => 7, + ], + 'aggregate_rules' => ['nth_num' => [4, 0.001]], + ], + 2 => [ + 'name' => 'Overridden name by column index', + 'description' => 'Full name of the person.', + 'example' => 'John D', + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 5, + 'length_max' => 7, + ], + 'aggregate_rules' => ['nth_num' => [4, 0.001]], + ], + 3 => [ + 'name' => 'Overridden name by column index and column name', + 'description' => 'Full name of the person.', + 'example' => 'John D', + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 5, + 'length_max' => 7, + ], + 'aggregate_rules' => ['nth_num' => [4, 0.001]], + ], + 4 => [ + 'name' => 'Overridden name by column index and column name + added rules', + 'description' => 'Full name of the person.', + 'example' => 'John D', + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 1, + 'length_max' => 7, + ], + 'aggregate_rules' => ['nth_num' => [4, 0.001]], + ], + 5 => [ + 'name' => 'Overridden name by column index and column name + added aggregate rules', + 'description' => 'Full name of the person.', + 'example' => 'John D', + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 5, + 'length_max' => 7, + ], + 'aggregate_rules' => ['nth_num' => [10, 0.05]], + ], + 6 => [ + 'name' => 'Overridden only rules', + 'description' => '', + 'example' => null, + 'required' => true, + 'rules' => [ + 'not_empty' => true, + 'length_min' => 5, + 'length_max' => 7, + ], + 'aggregate_rules' => [], + ], + 7 => [ + 'name' => 'Overridden only aggregation rules', + 'description' => '', + 'example' => null, + 'required' => true, + 'rules' => [], + 'aggregate_rules' => ['nth_num' => [4, 0.001]], + ], + 8 => [ + 'name' => 'Second Column', + 'description' => 'Some number.', + 'example' => 123, + 'required' => false, + 'rules' => [ + 'length_min' => 1, + 'length_max' => 4, + ], + 'aggregate_rules' => ['sum' => 1000], + ], + ], + ], $schema->getData()->getArrayCopy()); + isSame('', (string)$schema->validate()); + } + + public function testRealChildOfChild(): void + { + $schema = new Schema('./tests/schemas/preset/child-of-child.yml'); + isSame([ + 'name' => 'Child of child schema', + 'description' => '', + 'presets' => [ + 'preset-1' => PROJECT_ROOT . '/tests/schemas/preset/child.yml', + ], + 'filename_pattern' => '/child-of-child-\d.csv$/i', + 'csv' => [ + 'header' => true, + 'delimiter' => 'dd', + 'quote_char' => 'qq', + 'enclosure' => 'ee', + 'encoding' => 'utf-32', + 'bom' => false, + ], + 'structural_rules' => [ + 'strict_column_order' => true, + 'allow_extra_columns' => false, + ], + 'columns' => [ + [ + 'name' => 'Second Column', + 'description' => 'Some number.', + 'example' => 123, + 'required' => false, + 'rules' => [ + 'length_min' => 1, + 'length_max' => 4, + ], + 'aggregate_rules' => ['sum' => 1000], + ], + ], + ], $schema->getData()->getArrayCopy()); + isSame('', (string)$schema->validate()); + } + + public function testInvalidPresetFile(): void + { + $this->expectExceptionMessage( + "Invalid schema \"_custom_array_\" data.\n" . + 'Unexpected error: "Unknown included file: "invalid.yml""', + ); + + $schema = new Schema(['presets' => ['alias' => 'invalid.yml']]); + } +} diff --git a/tests/SchemaTest.php b/tests/SchemaTest.php index 3878fb9d..308102a5 100644 --- a/tests/SchemaTest.php +++ b/tests/SchemaTest.php @@ -195,6 +195,7 @@ public function testValidateValidSchemaFixtures(): void { $schemas = (new Finder()) ->in(PROJECT_ROOT . '/tests/schemas') + ->in(PROJECT_ROOT . '/tests/schemas/preset') ->in(PROJECT_ROOT . '/tests/Benchmarks') ->in(PROJECT_ROOT . '/schema-examples') ->name('*.yml') @@ -208,7 +209,8 @@ public function testValidateValidSchemaFixtures(): void foreach ($schemas as $schemaFile) { $filepath = $schemaFile->getPathname(); - isSame('', (string)(new Schema($filepath))->validate(), $filepath); + $validated = (new Schema($filepath))->validate()->render(ErrorSuite::RENDER_TABLE); + isSame('', (string)$validated, "{$filepath}\n----------\n{$validated}"); } } @@ -220,8 +222,8 @@ public function testValidateInvalidSchema(): void +-------+------------+--------+-------------------------------------------------------------------------+ | Line | id:Column | Rule | Message | +-------+------------+--------+-------------------------------------------------------------------------+ - | undef | meta | schema | Unknown key: .unknow_root_option | | undef | meta | schema | Unknown key: .csv.unknow_csv_param | + | undef | meta | schema | Unknown key: .unknow_root_option | | undef | 0:Name | schema | Unknown key: .columns.0.rules.unknow_rule | | undef | 1:City | schema | Unknown key: .columns.1.unknow_colum_option | | undef | 3:Birthday | schema | Expected type "string", actual "boolean" in .columns.3.rules.date_max | @@ -235,8 +237,8 @@ public function testValidateInvalidSchema(): void isSame( <<<'TEXT' - "schema", column "meta". Unknown key: .unknow_root_option. "schema", column "meta". Unknown key: .csv.unknow_csv_param. + "schema", column "meta". Unknown key: .unknow_root_option. "schema", column "0:Name". Unknown key: .columns.0.rules.unknow_rule. "schema", column "1:City". Unknown key: .columns.1.unknow_colum_option. "schema", column "3:Birthday". Expected type "string", actual "boolean" in .columns.3.rules.date_max. diff --git a/tests/Tools.php b/tests/Tools.php index 876c21f6..8fa18d28 100644 --- a/tests/Tools.php +++ b/tests/Tools.php @@ -34,9 +34,9 @@ final class Tools public const SCHEMA_SIMPLE_NO_HEADER = './tests/schemas/simple_no_header.yml'; public const SCHEMA_SIMPLE_HEADER_PHP = './tests/schemas/simple_header.php'; public const SCHEMA_SIMPLE_HEADER_JSON = './tests/schemas/simple_header.json'; - public const SCHEMA_EXAMPLE_EMPTY = './tests/schemas/example_empty.yml'; + public const SCHEMA_EXAMPLE_EMPTY = PROJECT_ROOT . '/tests/schemas/example_empty.yml'; - public const SCHEMA_FULL_YML = './schema-examples/full.yml'; + public const SCHEMA_FULL_YML = PROJECT_ROOT . '/schema-examples/full.yml'; public const SCHEMA_FULL_YML_CLEAN = './schema-examples/full_clean.yml'; public const SCHEMA_FULL_JSON = './schema-examples/full.json'; public const SCHEMA_FULL_PHP = './schema-examples/full.php'; @@ -108,18 +108,26 @@ public static function getAggregateRule( return ['columns' => [['name' => $columnName, 'aggregate_rules' => [$ruleName => $options]]]]; } - public static function insertInReadme(string $code, string $content): void + public static function insertInReadme(string $code, string $content, bool $isInline = false): void { isFile(self::README); $prefix = 'auto-update:'; isFileContains("", self::README); isFileContains("", self::README); - $replacement = \implode("\n", [ - "", - \trim($content), - "", - ]); + if ($isInline) { + $replacement = \implode('', [ + "", + $content, + "", + ]); + } else { + $replacement = \implode("\n", [ + "", + \trim($content), + "", + ]); + } $result = \preg_replace( "/<\\!-- {$prefix}{$code} -->(.*?)<\\!-- {$prefix}\\/{$code} -->/s", @@ -132,7 +140,11 @@ public static function insertInReadme(string $code, string $content): void isTrue(\file_put_contents(self::README, $result) > 0); $hashAfter = \hash_file('md5', self::README); - isSame($hashAfter, $hashBefore, "README.md was not updated. Code: {$code}"); + isSame( + $hashAfter, + $hashBefore, + "README.md was not updated. Code: {$code}\n\n---------\n{$replacement}\n---------", + ); isFileContains($result, self::README); } diff --git a/tests/UtilsTest.php b/tests/UtilsTest.php index 88c364aa..45546924 100644 --- a/tests/UtilsTest.php +++ b/tests/UtilsTest.php @@ -125,10 +125,11 @@ public function testColorsTags(): void $tags = \explode( '|', - '|i|c|q|e' . + 'i|c|q|e' . '|comment|info|error|question' . '|black|red|green|yellow|blue|magenta|cyan|white|default' . - '|bl|b|u|r|bg', + '|bl|b|u|r|bg' . + '|details|summary', ); foreach ($packs as $files) { @@ -157,20 +158,26 @@ public function testFixCliArguments(): void isSame( ['cmd', '-h', '--ansi'], - Utils::fixArgv(['cmd', '', ' -h ', 'extra: --ansi']), + Utils::fixArgv(['cmd', '', ' -h ', 'options: --ansi']), ); isSame( ['cmd', '-h'], - Utils::fixArgv(['cmd', '', ' -h ', 'extra:']), + Utils::fixArgv(['cmd', '', ' -h ', 'options:']), ); isSame( ['cmd', '-h'], - Utils::fixArgv(['cmd', '', ' -h ', ' extra: ']), + Utils::fixArgv(['cmd', '', ' -h ', ' options: ']), ); isSame( ['cmd', '-h', '--ansi', '--no'], - Utils::fixArgv(['cmd', '', ' -h ', 'extra: --ansi --no']), + Utils::fixArgv(['cmd', '', ' -h ', 'options: --ansi --no']), ); + isSame( + ['cmd', '-h', '--ansi', '--no'], + Utils::fixArgv(['cmd', '', ' -h ', 'options: --ansi --no ']), + ); + + // Test legacy "extra:" isSame( ['cmd', '-h', '--ansi', '--no'], Utils::fixArgv(['cmd', '', ' -h ', 'extra: --ansi --no ']), diff --git a/tests/schemas/preset/child-of-child.yml b/tests/schemas/preset/child-of-child.yml new file mode 100644 index 00000000..eb816933 --- /dev/null +++ b/tests/schemas/preset/child-of-child.yml @@ -0,0 +1,36 @@ +# +# JBZoo Toolbox - Csv-Blueprint. +# +# This file is part of the JBZoo Toolbox project. +# For the full copyright and license information, please view the LICENSE +# file that was distributed with this source code. +# +# @license MIT +# @copyright Copyright (C) JBZoo.com, All rights reserved. +# @see https://github.com/JBZoo/Csv-Blueprint +# + +# This schema is invalid because does not match the CSV file (tests/fixtures/demo.csv). + + +name: Child of child schema + +presets: + preset-1: child.yml + +filename_pattern: /child-of-child-\d.csv$/i + +csv: + preset: preset-1 + delimiter: dd + quote_char: qq + enclosure: ee + encoding: utf-32 + bom: false + +structural_rules: + preset: preset-1 + allow_extra_columns: false + +columns: + - preset: preset-1/Second Column diff --git a/tests/schemas/preset/child.yml b/tests/schemas/preset/child.yml new file mode 100644 index 00000000..91553e0d --- /dev/null +++ b/tests/schemas/preset/child.yml @@ -0,0 +1,71 @@ +# +# JBZoo Toolbox - Csv-Blueprint. +# +# This file is part of the JBZoo Toolbox project. +# For the full copyright and license information, please view the LICENSE +# file that was distributed with this source code. +# +# @license MIT +# @copyright Copyright (C) JBZoo.com, All rights reserved. +# @see https://github.com/JBZoo/Csv-Blueprint +# + +# This schema is invalid because does not match the CSV file (tests/fixtures/demo.csv). + + +name: Child schema + +presets: + preset: ./../preset/parent.yml + +filename_pattern: + preset: preset + +csv: + preset: preset + header: true + +structural_rules: + preset: preset + strict_column_order: true + +columns: + # 0 + - preset: preset/Name + + # 1 + - preset: preset/Name + name: Overridden name by column name + + # 2 + - preset: 'preset/0:' + name: Overridden name by column index + + # 3 + - preset: preset/0:Name + name: Overridden name by column index and column name + + # 4 + - preset: preset/0:Name + name: Overridden name by column index and column name + added rules + rules: + length_min: 1 + + # 5 + - preset: preset/0:Name + name: Overridden name by column index and column name + added aggregate rules + aggregate_rules: + nth_num: [ 10, 0.05 ] + + # 6 + - name: Overridden only rules + rules: + preset: preset/0:Name + + # 7 + - name: Overridden only aggregation rules + aggregate_rules: + preset: preset/0:Name + + # 8 + - preset: preset/Second Column diff --git a/tests/schemas/preset/parent.yml b/tests/schemas/preset/parent.yml new file mode 100644 index 00000000..37bca64f --- /dev/null +++ b/tests/schemas/preset/parent.yml @@ -0,0 +1,53 @@ +# +# JBZoo Toolbox - Csv-Blueprint. +# +# This file is part of the JBZoo Toolbox project. +# For the full copyright and license information, please view the LICENSE +# file that was distributed with this source code. +# +# @license MIT +# @copyright Copyright (C) JBZoo.com, All rights reserved. +# @see https://github.com/JBZoo/Csv-Blueprint +# + +# This schema is invalid because does not match the CSV file (tests/fixtures/demo.csv). + +name: Parent schema + +filename_pattern: /preset-\d.csv$/i + +csv: + header: false + delimiter: d + quote_char: q + enclosure: e + encoding: utf-16 + bom: true + + +structural_rules: + strict_column_order: false + allow_extra_columns: true + + +columns: + - name: Name + required: true + example: John D + description: Full name of the person. + rules: + not_empty: true + length_min: 5 + length_max: 7 + aggregate_rules: + nth_num: [ 4, 0.001 ] + + - name: Second Column + required: false + example: 123 + description: Some number. + rules: + length_min: 1 + length_max: 4 + aggregate_rules: + sum: 1000 diff --git a/tests/schemas/todo.yml b/tests/schemas/todo.yml index 83312538..1a4f68fc 100644 --- a/tests/schemas/todo.yml +++ b/tests/schemas/todo.yml @@ -12,17 +12,9 @@ # File contains just ideas. It's invalid! -# Include another schemas -includes: # Alias is always required - - /path/schema_1.yml as alias_1 # Full path to another schema. - - ./path/schema_2.yml as alias_2 # Relative path based on the current schema path. - - ../path/schema_3.yml as alias_3 # Relative path based on the current schema path. Go up one level. - csv: # How to parse file before validation - inherit: alias_1 # Inherited from another schema. Options above will overwrite inherited options. - auto_detect: false # If true, then the cintrol chars will be detected automatically. - end_of_line: LF # End of line character. LF => \n, CRLF => \r\n, CR => \r + auto_detect: false # If true, then the control chars will be detected automatically. empty_values: # List of values that will be treated as empty - "" # By default, only empty string is treated as empty (string length = 0). - null @@ -41,8 +33,7 @@ structural_rules: columns: - - inherit: alias_1\Column Name - empty_values: [''] # Override csv.empty_values. List of values that will be treated as empty. + - empty_values: [''] # Override csv.empty_values. List of values that will be treated as empty. # Multi prop multiple: true @@ -51,6 +42,9 @@ columns: rules: is_null: true # see csv.empty_values and column.empty_values + password_strength: 3 # 0-4 + is_password: true # /^[a-zA-Z\d!@#$%^&*()_+\-=\[\]{};':"\\|,.<>\/?~]{8,}$/ + _list: true # Example: starts_with_list: [ 'a', 'b', 'c' ] # identifier is_bsn: true # Validates a Dutch citizen service number (BSN).