From 9e9f13d5a238fac41ae526da726a35c9774ccff2 Mon Sep 17 00:00:00 2001 From: Denis Smetannikov Date: Sun, 31 Mar 2024 01:36:17 +0400 Subject: [PATCH] Optimize CSV validation and header mapping logic if header is enabled (#115) --- Makefile | 30 ++++++-- src/Csv/Column.php | 5 ++ src/Csv/CsvFile.php | 36 ++++++++- src/Schema.php | 28 ++----- src/Utils.php | 6 +- src/Validators/ValidatorCsv.php | 33 ++++---- tests/Benchmarks/bench_0_quickest.yml | 6 +- tests/Benchmarks/bench_0_quickest_agg.yml | 6 +- tests/Benchmarks/bench_0_quickest_combo.yml | 6 +- tests/Benchmarks/bench_1_mini.yml | 6 +- tests/Benchmarks/bench_1_mini_agg.yml | 6 +- tests/Benchmarks/bench_1_mini_combo.yml | 6 +- tests/Benchmarks/bench_2_realistic.yml | 6 +- tests/Benchmarks/bench_2_realistic_agg.yml | 6 +- tests/Benchmarks/bench_2_realistic_combo.yml | 6 +- tests/Benchmarks/bench_3_all_agg.yml | 6 +- tests/Csv/CsvFileTest.php | 13 ++-- tests/SchemaTest.php | 6 -- tests/Validators/CsvValidatorTest.php | 81 ++++++++++++++++++-- 19 files changed, 188 insertions(+), 110 deletions(-) diff --git a/Makefile b/Makefile index 3b9e1fea..50ceceff 100644 --- a/Makefile +++ b/Makefile @@ -93,7 +93,11 @@ BENCH_ROWS_SRC ?= 2000 BENCH_CSV_PATH := ./build/bench/$(BENCH_COLS)_$(BENCH_ROWS_SRC)_000.csv BENCH_CSV := --csv='$(BENCH_CSV_PATH)' BENCH_FLAGS := --debug --profile --report=text -vvv -BENCH_SCHEMAS := --schema='./tests/Benchmarks/bench_*.yml' +BENCH_SCHEMAS_ALL := --schema='./tests/Benchmarks/bench_*.yml' +BENCH_SCHEMAS_0 := --schema='./tests/Benchmarks/bench_0_*.yml' +BENCH_SCHEMAS_1 := --schema='./tests/Benchmarks/bench_1_*.yml' +BENCH_SCHEMAS_2 := --schema='./tests/Benchmarks/bench_2_*.yml' +BENCH_SCHEMAS_3 := --schema='./tests/Benchmarks/bench_3_*.yml' bench: ##@Benchmarks Run all benchmarks @@ -109,23 +113,37 @@ bench-create-csv: ##@Benchmarks Create CSV file bench-docker: ##@Benchmarks Run CSV file with Docker + @docker run --rm $(DOCKER_IMAGE) --ansi --version @echo "::group::Quickest" - -$(BLUEPRINT_DOCKER) $(BENCH_CSV) --schema='./tests/Benchmarks/bench_0_*.yml' $(BENCH_FLAGS) + -$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS) @echo "::endgroup::" @echo "::group::Minimum" - -$(BLUEPRINT_DOCKER) $(BENCH_CSV) --schema='./tests/Benchmarks/bench_1_*.yml' $(BENCH_FLAGS) + -$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_1) $(BENCH_FLAGS) @echo "::endgroup::" @echo "::group::Realistic" - -$(BLUEPRINT_DOCKER) $(BENCH_CSV) --schema='./tests/Benchmarks/bench_2_*.yml' $(BENCH_FLAGS) + -$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_2) $(BENCH_FLAGS) @echo "::endgroup::" @echo "::group::All aggregations at once" - -$(BLUEPRINT_DOCKER) $(BENCH_CSV) --schema='./tests/Benchmarks/bench_3_*.yml' $(BENCH_FLAGS) + -$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_3) $(BENCH_FLAGS) @echo "::endgroup::" bench-phar: ##@Benchmarks Run CSV file with Phar - -$(BLUEPRINT_PHAR) $(BENCH_CSV) $(BENCH_SCHEMAS) $(BENCH_FLAGS) + ./build/csv-blueprint.phar --ansi --version + @echo "::group::Quickest" + -$(BLUEPRINT_PHAR) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS) + @echo "::endgroup::" + @echo "::group::Minimum" + -$(BLUEPRINT_PHAR) $(BENCH_CSV) $(BENCH_SCHEMAS_1) $(BENCH_FLAGS) + @echo "::endgroup::" + @echo "::group::Realistic" + -$(BLUEPRINT_PHAR) $(BENCH_CSV) $(BENCH_SCHEMAS_2) $(BENCH_FLAGS) + @echo "::endgroup::" + @echo "::group::All aggregations at once" + -$(BLUEPRINT_PHAR) $(BENCH_CSV) $(BENCH_SCHEMAS_3) $(BENCH_FLAGS) + @echo "::endgroup::" bench-php: ##@Benchmarks Run CSV file with classic PHP binary + $(PHP_BIN) ./csv-blueprint --ansi --version -$(BLUEPRINT) $(BENCH_CSV) $(BENCH_SCHEMAS) $(BENCH_FLAGS) diff --git a/src/Csv/Column.php b/src/Csv/Column.php index 40265ca9..f3e361b9 100644 --- a/src/Csv/Column.php +++ b/src/Csv/Column.php @@ -107,6 +107,11 @@ public function validateCell(string $cellValue, int $line = Error::UNDEFINED_LIN return $this->getValidator()->validateCell($cellValue, $line); } + public function setId(int $realIndex): void + { + $this->id = $realIndex; + } + private function prepareRuleSet(string $schemaKey): array { $rules = []; diff --git a/src/Csv/CsvFile.php b/src/Csv/CsvFile.php index 96320a43..24ba0a60 100644 --- a/src/Csv/CsvFile.php +++ b/src/Csv/CsvFile.php @@ -65,7 +65,9 @@ public function getHeader(): array if ($this->structure->isHeader() && !$this->isEmpty) { // TODO: add handler for empty file // League\Csv\SyntaxError : The header record does not exist or is empty at offset: `0 - $this->header = $this->reader->getHeader(); + $this->header = $this->getRecordsChunk(0, 1)->first(); + } else { + $this->header = \range(0, \count($this->getRecordsChunk(0, 1)->first()) - 1); } } @@ -74,12 +76,12 @@ public function getHeader(): array public function getRecords(): \Iterator { - return $this->reader->getRecords($this->getHeader()); + return $this->reader->getRecords([]); } public function getRecordsChunk(int $offset = 0, int $limit = -1): TabularDataReader { - return Statement::create(null, $offset, $limit)->process($this->reader, $this->getHeader()); + return Statement::create(null, $offset, $limit)->process($this->reader, []); // No headers is required! } public function validate(bool $quickStop = false): ErrorSuite @@ -92,13 +94,39 @@ public function getRealColumNumber(): int return \count($this->getRecordsChunk(0, 1)->first()); } + public function getSchema(): Schema + { + return $this->schema; + } + + /** + * @return Column[] + */ + public function getColumnsMappedByHeader(): array + { + $map = []; + + $realHeader = $this->getHeader(); + foreach ($realHeader as $realIndex => $realColumn) { + $realIndex = (int)$realIndex; + $schemaColumn = $this->schema->getColumn($realColumn); + + if ($schemaColumn !== null) { + $schemaColumn->setId($realIndex); + $map[$realIndex] = $schemaColumn; + } + } + + return $map; + } + private function prepareReader(): LeagueReader { $reader = LeagueReader::createFromPath($this->csvFilename) ->setDelimiter($this->structure->getDelimiter()) ->setEnclosure($this->structure->getEnclosure()) ->setEscape($this->structure->getQuoteChar()) - ->setHeaderOffset($this->structure->isHeader() ? 0 : null); + ->setHeaderOffset(null); // It's important to set it to null to optimize memory usage! if ($this->structure->isBom()) { $reader->includeInputBOM(); diff --git a/src/Schema.php b/src/Schema.php index a105d83d..17bcf9d2 100644 --- a/src/Schema.php +++ b/src/Schema.php @@ -86,25 +86,6 @@ public function getColumns(): array return $this->columns; } - /** - * @return Column[]|null[] - * @phan-suppress PhanPartialTypeMismatchReturn - */ - public function getColumnsMappedByHeader(array $header): array - { - $map = []; - - if ($this->getCsvStructure()->isHeader()) { - foreach ($header as $headerName) { - $map[$headerName] = $this->columns[$headerName] ?? null; - } - } else { - return $this->getColumns(); - } - - return $map; - } - public function getColumn(int|string $columNameOrId): ?Column { if (\is_int($columNameOrId)) { @@ -113,10 +94,6 @@ public function getColumn(int|string $columNameOrId): ?Column $column = $this->getColumns()[$columNameOrId] ?? null; } - if ($column === null) { - throw new Exception("Column \"{$columNameOrId}\" not found in schema \"{$this->filename}\""); - } - return $column; } @@ -154,6 +131,11 @@ public function getData(): AbstractData return clone $this->data; } + public function getSchemaHeader(): array + { + return \array_keys($this->getColumns()); + } + /** * @return Column[] */ diff --git a/src/Utils.php b/src/Utils.php index 7c624892..df65e85d 100644 --- a/src/Utils.php +++ b/src/Utils.php @@ -63,8 +63,10 @@ public static function debug(int|string $message): void public static function debugSpeed(string $messPrefix, int $lines, float $startTimer): void { - $kiloLines = \round(($lines / (\microtime(true) - $startTimer)) / 1000); - self::debug("{$messPrefix} " . \number_format($kiloLines) . 'K lines/sec'); + if (\defined('DEBUG_MODE')) { + $kiloLines = \round(($lines / (\microtime(true) - $startTimer)) / 1000); + self::debug("{$messPrefix} " . \number_format($kiloLines) . 'K lines/sec'); + } } public static function kebabToCamelCase(string $input): string diff --git a/src/Validators/ValidatorCsv.php b/src/Validators/ValidatorCsv.php index adaf67f8..5a0a941f 100644 --- a/src/Validators/ValidatorCsv.php +++ b/src/Validators/ValidatorCsv.php @@ -106,15 +106,13 @@ private function validateHeader(bool $quickStop = false): ErrorSuite private function validateLines(bool $quickStop = false): ErrorSuite { $errors = new ErrorSuite(); - $realColumns = $this->schema->getColumnsMappedByHeader($this->csv->getHeader()); + $mappedColumns = $this->csv->getColumnsMappedByHeader(); + $isHeaderEnabled = $this->schema->getCsvStructure()->isHeader(); - foreach ($realColumns as $column) { - $columValues = []; - if ($column === null) { - continue; - } + foreach ($mappedColumns as $columnIndex => $column) { + $messPrefix = "Column \"{$column->getHumanName()}\" -"; // System message prefix. Debug only! - $messPrefix = "Column \"{$column->getHumanName()}\" -"; + $columValues = []; Utils::debug("{$messPrefix} Column start"); $colValidator = $column->getValidator(); @@ -138,21 +136,25 @@ private function validateLines(bool $quickStop = false): ErrorSuite $lineCounter = 0; $startTimer = \microtime(true); foreach ($this->csv->getRecords() as $line => $record) { + if ($isHeaderEnabled && $line === 0) { + continue; + } + $lineCounter++; $lineNum = (int)$line + 1; if ($isRules) { // Time optimization - if (!isset($record[$column->getKey()])) { + if (!isset($record[$columnIndex])) { $errors->addError( new Error( 'csv.column', - "Column index:{$column->getKey()} not found", + "Column index:{$columnIndex} not found", $column->getHumanName(), $lineNum, ), ); } else { - $errors->addErrorSuit($colValidator->validateCell($record[$column->getKey()], $lineNum)); + $errors->addErrorSuit($colValidator->validateCell($record[$columnIndex], $lineNum)); } if ($quickStop && $errors->count() > 0) { @@ -160,8 +162,8 @@ private function validateLines(bool $quickStop = false): ErrorSuite } } - if ($isAggRules && isset($record[$column->getKey()])) { // Time & memory optimization - $columValues[] = ValidatorColumn::prepareValue($record[$column->getKey()], $aggInputType); + if ($isAggRules && isset($record[$columnIndex])) { // Time & memory optimization + $columValues[] = ValidatorColumn::prepareValue($record[$columnIndex], $aggInputType); } } Utils::debug("{$messPrefix} Lines " . \number_format($lineCounter) . ''); @@ -213,10 +215,9 @@ private function validateColumn(bool $quickStop): ErrorSuite $errors = new ErrorSuite(); if ($this->schema->getCsvStructure()->isHeader()) { - $realColumns = $this->schema->getColumnsMappedByHeader($this->csv->getHeader()); - $schemaColumns = $this->schema->getColumns(); - - $notFoundColums = \array_diff(\array_keys($schemaColumns), \array_keys($realColumns)); + $realColumns = $this->csv->getHeader(); + $schemaColumns = $this->schema->getSchemaHeader(); + $notFoundColums = \array_diff($schemaColumns, $realColumns); if (\count($notFoundColums) > 0) { $error = new Error( diff --git a/tests/Benchmarks/bench_0_quickest.yml b/tests/Benchmarks/bench_0_quickest.yml index 1059ac99..0f24877d 100644 --- a/tests/Benchmarks/bench_0_quickest.yml +++ b/tests/Benchmarks/bench_0_quickest.yml @@ -12,9 +12,7 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - rules: + - name: id + rules: not_empty: true diff --git a/tests/Benchmarks/bench_0_quickest_agg.yml b/tests/Benchmarks/bench_0_quickest_agg.yml index 03823cc9..15175902 100644 --- a/tests/Benchmarks/bench_0_quickest_agg.yml +++ b/tests/Benchmarks/bench_0_quickest_agg.yml @@ -12,9 +12,7 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - aggregate_rules: + - name: id + aggregate_rules: count: 0 diff --git a/tests/Benchmarks/bench_0_quickest_combo.yml b/tests/Benchmarks/bench_0_quickest_combo.yml index 689aa30e..d4dc5d5d 100644 --- a/tests/Benchmarks/bench_0_quickest_combo.yml +++ b/tests/Benchmarks/bench_0_quickest_combo.yml @@ -12,11 +12,9 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - rules: + - name: id + rules: not_empty: true aggregate_rules: count: 0 diff --git a/tests/Benchmarks/bench_1_mini.yml b/tests/Benchmarks/bench_1_mini.yml index 44539480..e1b85544 100644 --- a/tests/Benchmarks/bench_1_mini.yml +++ b/tests/Benchmarks/bench_1_mini.yml @@ -12,10 +12,8 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - rules: + - name: id + rules: not_empty: true is_int: true diff --git a/tests/Benchmarks/bench_1_mini_agg.yml b/tests/Benchmarks/bench_1_mini_agg.yml index 6a97b6a9..9d1f2bcb 100644 --- a/tests/Benchmarks/bench_1_mini_agg.yml +++ b/tests/Benchmarks/bench_1_mini_agg.yml @@ -12,10 +12,8 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - aggregate_rules: + - name: id + aggregate_rules: average: 0 count: 0 diff --git a/tests/Benchmarks/bench_1_mini_combo.yml b/tests/Benchmarks/bench_1_mini_combo.yml index bafda794..62f5ac78 100644 --- a/tests/Benchmarks/bench_1_mini_combo.yml +++ b/tests/Benchmarks/bench_1_mini_combo.yml @@ -12,11 +12,9 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - rules: + - name: id + rules: not_empty: true is_int: true aggregate_rules: diff --git a/tests/Benchmarks/bench_2_realistic.yml b/tests/Benchmarks/bench_2_realistic.yml index 8fb3830a..6e3bfd65 100644 --- a/tests/Benchmarks/bench_2_realistic.yml +++ b/tests/Benchmarks/bench_2_realistic.yml @@ -12,11 +12,9 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - rules: + - name: id + rules: not_empty: true length_max: 100 is_int: true diff --git a/tests/Benchmarks/bench_2_realistic_agg.yml b/tests/Benchmarks/bench_2_realistic_agg.yml index eea74fc3..61e5dc34 100644 --- a/tests/Benchmarks/bench_2_realistic_agg.yml +++ b/tests/Benchmarks/bench_2_realistic_agg.yml @@ -12,11 +12,9 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - aggregate_rules: + - name: id + aggregate_rules: is_unique: true sorted: [ desc, natural ] count: 0 diff --git a/tests/Benchmarks/bench_2_realistic_combo.yml b/tests/Benchmarks/bench_2_realistic_combo.yml index b8701af0..1ce88766 100644 --- a/tests/Benchmarks/bench_2_realistic_combo.yml +++ b/tests/Benchmarks/bench_2_realistic_combo.yml @@ -12,11 +12,9 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - rules: + - name: id + rules: not_empty: true length_max: 100 is_int: true diff --git a/tests/Benchmarks/bench_3_all_agg.yml b/tests/Benchmarks/bench_3_all_agg.yml index dffc5f16..67c02a1d 100644 --- a/tests/Benchmarks/bench_3_all_agg.yml +++ b/tests/Benchmarks/bench_3_all_agg.yml @@ -12,11 +12,9 @@ filename_pattern: /.csv$/i -csv: - header: false - columns: - - rules: + - name: id + rules: not_empty: true length_max: 100 is_int: true diff --git a/tests/Csv/CsvFileTest.php b/tests/Csv/CsvFileTest.php index aa4c46ef..d8a824f2 100644 --- a/tests/Csv/CsvFileTest.php +++ b/tests/Csv/CsvFileTest.php @@ -29,7 +29,7 @@ public function testReadCsvFileWithoutHeader(): void $csv = new CsvFile(Tools::CSV_SIMPLE_NO_HEADER, Tools::SCHEMA_SIMPLE_NO_HEADER); isSame(Tools::CSV_SIMPLE_NO_HEADER, $csv->getCsvFilename()); - isSame([], $csv->getHeader()); + isSame([0, 1], $csv->getHeader()); isSame([ ['1', 'true'], @@ -50,18 +50,19 @@ public function testReadCsvFileWithHeader(): void isSame(['seq', 'bool', 'exact'], $csv->getHeader()); isSame([ - ['seq' => '1', 'bool' => 'true', 'exact' => '1'], - ['seq' => '2', 'bool' => 'true', 'exact' => '1'], - ['seq' => '3', 'bool' => 'false', 'exact' => '1'], + ['seq', 'bool', 'exact'], + ['1', 'true', '1'], + ['2', 'true', '1'], + ['3', 'false', '1'], ], $this->fetchRows($csv->getRecords())); isSame( - [['seq' => '2', 'bool' => 'true', 'exact' => '1']], + [['1', 'true', '1']], $this->fetchRows($csv->getRecordsChunk(1, 1)), ); isSame( - [['seq' => '2', 'bool' => 'true', 'exact' => '1'], ['seq' => '3', 'bool' => 'false', 'exact' => '1']], + [['1', 'true', '1'], ['2', 'true', '1']], $this->fetchRows($csv->getRecordsChunk(1, 2)), ); } diff --git a/tests/SchemaTest.php b/tests/SchemaTest.php index 20426e68..e43391da 100644 --- a/tests/SchemaTest.php +++ b/tests/SchemaTest.php @@ -115,18 +115,12 @@ public function testColumnByNameAndId(): void public function testGetUndefinedColumnById(): void { - $this->expectExceptionMessage( - 'Column "1000" not found in schema "' . Tools::SCHEMA_EXAMPLE_EMPTY . '"', - ); $schemaFull = new Schema(Tools::SCHEMA_EXAMPLE_EMPTY); isNull($schemaFull->getColumn(1000)); } public function testGetUndefinedColumnByName(): void { - $this->expectExceptionMessage( - 'Column "undefined_column" not found in schema "' . Tools::SCHEMA_EXAMPLE_EMPTY . '"', - ); $schemaFull = new Schema(Tools::SCHEMA_EXAMPLE_EMPTY); isNull($schemaFull->getColumn('undefined_column')); } diff --git a/tests/Validators/CsvValidatorTest.php b/tests/Validators/CsvValidatorTest.php index 15e6e97f..ceec7d3e 100644 --- a/tests/Validators/CsvValidatorTest.php +++ b/tests/Validators/CsvValidatorTest.php @@ -42,9 +42,6 @@ public function testInvalidWithoutHeader(): void isSame( <<<'TEXT' "csv.header" at line 1. Real number of columns is less than schema: 2 < 3. - "csv.column" at line 1, column "2:". Column index:2 not found. - "csv.column" at line 2, column "2:". Column index:2 not found. - "csv.column" at line 3, column "2:". Column index:2 not found. TEXT, \strip_tags((string)$csv->validate()), @@ -82,7 +79,7 @@ public function testCellRule(): void $csv = new CsvFile(Tools::CSV_COMPLEX, Tools::getRule('integer', 'not_empty', true)); isSame( - '"not_empty" at line 19, column "0:integer". Value is empty.' . "\n", + '"not_empty" at line 19, column "3:integer". Value is empty.' . "\n", \strip_tags((string)$csv->validate()), ); } @@ -94,7 +91,7 @@ public function testAggregateRule(): void $csv = new CsvFile(Tools::DEMO_CSV, Tools::getAggregateRule('City', 'is_unique', true)); isSame( - '"ag:is_unique" at line 1, column "0:City". Column has non-unique values. Unique: 9, total: 10.' . "\n", + '"ag:is_unique" at line 1, column "1:City". Column has non-unique values. Unique: 9, total: 10.' . "\n", \strip_tags((string)$csv->validate()), ); @@ -109,7 +106,7 @@ public function testAggregateRuleCombo(): void $csv = new CsvFile(Tools::DEMO_CSV, Tools::getAggregateRule('Float', 'sum', 20)); isSame( - '"ag:sum" at line 1, column "0:Float". The sum of numbers in the column is ' . + '"ag:sum" at line 1, column "2:Float". The sum of numbers in the column is ' . '"4691.3235", which is not equal than the expected "20".' . "\n", (string)$csv->validate(), ); @@ -143,10 +140,12 @@ public function testQuickStop(): void public function testErrorToArray(): void { $csv = new CsvFile(Tools::CSV_COMPLEX, Tools::getRule('yn', 'is_email', true)); + // dump($csv); + isSame([ 'ruleCode' => 'is_email', 'message' => 'Value "N" is not a valid email', - 'columnName' => '0:yn', + 'columnName' => '2:yn', 'line' => 2, ], $csv->validate(true)->get(0)->toArray()); } @@ -169,4 +168,72 @@ public function testFilenamePattern(): void $csv = new CsvFile(Tools::CSV_COMPLEX, ['filename_pattern' => '/.*\.csv$/']); isSame('', (string)$csv->validate()); } + + public function testHeaderMatchingIfHeaderEnabled(): void + { + $columns = [ + ['name' => 'Name'], + ['name' => 'City'], + ['name' => 'Float'], + // ['name' => 'Birthday'], // We skip it for tests + ['name' => 'Favorite color'], + ]; + + $csv = new CsvFile(Tools::DEMO_CSV, ['csv' => ['header' => true], 'columns' => $columns]); + + isSame(['Name', 'City', 'Float', 'Birthday', 'Favorite color'], $csv->getHeader()); + isSame(['Name', 'City', 'Float', 'Favorite color'], $csv->getSchema()->getSchemaHeader()); + + $mappedColumns = $csv->getColumnsMappedByHeader(); + isSame('not_set', $mappedColumns[3] ?? 'not_set'); + + isSame([0, 1, 2, 4], \array_keys($mappedColumns)); + + $names = []; + foreach ($mappedColumns as $columnIndex => $column) { + isSame($columnIndex, $column->getId()); + $names[] = [$column->getName(), $column->getHumanName()]; + } + + isSame([ + ['Name', '0:Name'], + ['City', '1:City'], + ['Float', '2:Float'], + ['Favorite color', '4:Favorite color'], // 4 is important here + ], $names); + } + + public function testHeaderMatchingIfHeaderDisabled(): void + { + $columns = [ + ['name' => 'Name'], + ['name' => 'City'], + ['name' => 'Float'], + // ['name' => 'Birthday'], // We skip it for tests + ['name' => 'Favorite color'], + ]; + + $csv = new CsvFile(Tools::DEMO_CSV, ['csv' => ['header' => false], 'columns' => $columns]); + + isSame([0, 1, 2, 3, 4], $csv->getHeader()); + isSame(['Name', 'City', 'Float', 'Favorite color'], $csv->getSchema()->getSchemaHeader()); + + $mappedColumns = $csv->getColumnsMappedByHeader(); + isSame('not_set', $mappedColumns[4] ?? 'not_set'); + + isSame([0, 1, 2, 3], \array_keys($mappedColumns)); + + $names = []; + foreach ($mappedColumns as $columnIndex => $column) { + isSame($columnIndex, $column->getId()); + $names[] = [$column->getName(), $column->getHumanName()]; + } + + isSame([ + ['Name', '0:Name'], + ['City', '1:City'], + ['Float', '2:Float'], + ['Favorite color', '3:Favorite color'], // 3 is important here + ], $names); + } }