Skip to content

Commit

Permalink
Merge pull request #16096 from uberbrady/detect_csv_encodings_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
snipe authored Jan 20, 2025
2 parents 327491c + bbb2af7 commit 446c7fb
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 4 deletions.
23 changes: 22 additions & 1 deletion app/Http/Controllers/Api/ImportController.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
use App\Models\Asset;
use App\Models\Company;
use App\Models\Import;
use Illuminate\Http\UploadedFile;
use Illuminate\Support\Facades\Artisan;
use Illuminate\Database\Eloquent\JsonEncodingException;
use Illuminate\Support\Facades\Request;
use Illuminate\Support\Facades\Session;
use Illuminate\Support\Facades\Storage;
use League\Csv\Reader;
use Onnov\DetectEncoding\EncodingDetector;
use Symfony\Component\HttpFoundation\File\Exception\FileException;
use Illuminate\Support\Facades\Log;
use Illuminate\Http\JsonResponse;
Expand Down Expand Up @@ -45,6 +47,8 @@ public function store() : JsonResponse
$path = config('app.private_uploads').'/imports';
$results = [];
$import = new Import;
$detector = new EncodingDetector();

foreach ($files as $file) {
if (! in_array($file->getMimeType(), [
'application/vnd.ms-excel',
Expand All @@ -55,15 +59,32 @@ public function store() : JsonResponse
'text/comma-separated-values',
'text/tsv', ])) {
$results['error'] = 'File type must be CSV. Uploaded file is '.$file->getMimeType();

return response()->json(Helper::formatStandardApiResponse('error', null, $results['error']), 422);
}

//TODO: is there a lighter way to do this?
if (! ini_get('auto_detect_line_endings')) {
ini_set('auto_detect_line_endings', '1');
}
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
$encoding = $detector->getEncoding($file_contents);
$reader = null;
if (strcasecmp($encoding, 'UTF-8') != 0) {
$transliterated = iconv($encoding, 'UTF-8', $file_contents);
if ($transliterated !== false) {
$tmpname = tempnam(sys_get_temp_dir(), '');
$tmpresults = file_put_contents($tmpname, $transliterated);
if ($tmpresults !== false) {
$transliterated = null; //save on memory?
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
if ($newfile->isValid()) {
$file = $newfile;
}
}
}
}
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
$file_contents = null; //try to save on memory, I guess?
try {
$import->header_row = $reader->fetchOne(0);
Expand Down
2 changes: 2 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"php": "^8.1",
"ext-curl": "*",
"ext-fileinfo": "*",
"ext-iconv": "*",
"ext-json": "*",
"ext-mbstring": "*",
"ext-pdo": "*",
Expand Down Expand Up @@ -55,6 +56,7 @@
"nunomaduro/collision": "^7.0",
"okvpn/clock-lts": "^1.0",
"onelogin/php-saml": "^3.4",
"onnov/detect-encoding": "^2.0",
"osa-eg/laravel-teams-notification": "^2.1",
"paragonie/constant_time_encoding": "^2.3",
"paragonie/sodium_compat": "^1.19",
Expand Down
67 changes: 66 additions & 1 deletion composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions tests/Feature/Importing/Ui/ImportTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
namespace Tests\Feature\Importing\Ui;

use App\Models\User;
use Illuminate\Http\UploadedFile;
use PHPUnit\Framework\Attributes\Test;
use Tests\TestCase;

class ImportTest extends TestCase
Expand All @@ -13,4 +15,35 @@ public function testPageRenders()
->get(route('imports.index'))
->assertOk();
}

public function testStoreInternationalAsset(): void
{
$evil_string = 'це'; //cyrillic - windows-1251 (ONE)
$csv_contents = "a,b,c\n$evil_string,$evil_string,$evil_string\n";

// now, deliberately transform our UTF-8 into Windows-1251 so we can test out the character-set detection
$transliterated_contents = iconv('UTF-8', 'WINDOWS-1251', $csv_contents);
//\Log::error("RAW TRANSLITERATED CONTENTS: $transliterated_contents"); // should show 'unicode missing glyph' symbol in logs.

$this->actingAsForApi(User::factory()->superuser()->create());
$results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $transliterated_contents)]])
->assertOk()
->assertJsonStructure([
"files" => [
[
"created_at",
"field_map",
"file_path",
"filesize",
"first_row",
"header_row",
"id",
"import_type",
"name",
]
]
]);
\Log::error(print_r($results, true));
$this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]);
}
}
10 changes: 8 additions & 2 deletions tests/Support/Importing/FileBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -206,17 +206,23 @@ public function toCsv(): array
*
* @return string The filename.
*/
public function saveToImportsDirectory(?string $filename = null): string
public function saveToImportsDirectory(?string $filename = null, ?string $locale = null): string
{
$filename ??= Str::random(40) . '.csv';

try {
$stream = fopen(config('app.private_uploads') . "/imports/{$filename}", 'w');

foreach ($this->toCsv() as $row) {
if ($locale) {
$newrow = [];
foreach ($row as $index => $cell) {
$newrow[$index] = iconv('utf-8', $locale, (string) $cell);
}
$row = $newrow;
}
fputcsv($stream, $row);
}

return $filename;
} finally {
if (is_resource($stream)) {
Expand Down

0 comments on commit 446c7fb

Please sign in to comment.