Skip to content

Commit

Permalink
Share FileAndSourceMapIdCache between lazy compilation invocations
Browse files Browse the repository at this point in the history
Summary:
It is possible for the `sourceMappingURL` to be tens of MB,
resulting in slow lookups, which is why this cache exists.
It's not reused between lazy compilations when it's instantiated
inside `BytecodeModuleGenerator`, so allow it to be passed in
and store it in `BCProviderFromSrc` for lazy compilation purposes.

This wasn't a problem in Hermes because the `stripSourceMappingURL`
option was being provided to the lazy compilation invocations,
but that seemed more fragile than just doing everything the same
and reusing the cache for performance optimization.

This is made easy enough by moving the definitions into their own
header to avoid circular dependencies in the HBC directory.

Reviewed By: neildhar

Differential Revision: D68166926

fbshipit-source-id: 14f52273f4642df91dca2db0ca2011fa11b9a4f6
  • Loading branch information
avp authored and facebook-github-bot committed Jan 24, 2025
1 parent 9d2971f commit ffbf125
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 29 deletions.
12 changes: 12 additions & 0 deletions include/hermes/BCGen/HBC/BCProviderFromSrc.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "hermes/BCGen/HBC/BCProvider.h"
#include "hermes/BCGen/HBC/Bytecode.h"
#include "hermes/BCGen/HBC/FileAndSourceMapIdCache.h"

#include "llvh/ADT/Optional.h"

Expand Down Expand Up @@ -87,6 +88,12 @@ class BCProviderFromSrc final : public BCProviderBase {
/// which reuses SemContext and the IR but makes a new BCProvider.
std::shared_ptr<sema::SemContext> semCtx;

/// The file and source map ID cache used for compiling more code into this
/// BytecodeModule.
/// Keep alive between lazy compilation calls to avoid expensive lookups
/// for huge data URLs in sourceMappingURL.
FileAndSourceMapIdCache fileAndSourceMapIdCache;

explicit CompilationData(
const BytecodeGenerationOptions &genOptions,
const std::shared_ptr<Module> &M,
Expand Down Expand Up @@ -236,6 +243,11 @@ class BCProviderFromSrc final : public BCProviderBase {
return compilationData_.semCtx;
}

/// \return the FileAndSourceMapIdCache for debug IDs.
FileAndSourceMapIdCache &getFileAndSourceMapIdCache() {
return compilationData_.fileAndSourceMapIdCache;
}

SHA1 getSourceHash() const override {
return sourceHash_;
};
Expand Down
31 changes: 31 additions & 0 deletions include/hermes/BCGen/HBC/FileAndSourceMapIdCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include "llvh/ADT/DenseMap.h"

#include <cstdint>

namespace hermes::hbc {

/// The filename ID and source map URL ID of a buffer.
struct FileAndSourceMapId {
/// ID of the filename when added to BytecodeFunctionGenerator.
uint32_t filenameId;
/// ID of the source map URL when added as a file to
/// BytecodeFunctionGenerator.
uint32_t sourceMappingUrlId;
};

/// A map from buffer ID to filelname+source map.
/// Looking up filename/sourcemap id for each instruction is pretty slow,
/// so we cache it.
using FileAndSourceMapIdCache =
llvh::SmallDenseMap<unsigned, FileAndSourceMapId>;

} // namespace hermes::hbc
1 change: 1 addition & 0 deletions include/hermes/BCGen/HBC/HBC.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ bool generateBytecodeFunctionLazy(
Module *M,
Function *lazyFunc,
uint32_t lazyFuncID,
FileAndSourceMapIdCache &debugIdCache,
const BytecodeGenerationOptions &options);

/// Generates a BytecodeModule from a module \p M, and will return a unique_ptr
Expand Down
5 changes: 1 addition & 4 deletions lib/BCGen/HBC/BytecodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,9 +436,6 @@ bool BytecodeModuleGenerator::generateAddedFunctions() {
BytecodeOptions &bytecodeOptions = bm_.getBytecodeOptionsMut();
bytecodeOptions.cjsModulesStaticallyResolved = M_->getCJSModulesResolved();

// Allow reusing the debug cache between functions
FileAndSourceMapIdCache debugCache{};

M_->assignIndexToVariables();

const uint32_t strippedFunctionNameId =
Expand Down Expand Up @@ -505,7 +502,7 @@ bool BytecodeModuleGenerator::generateAddedFunctions() {
*this,
RA,
options_,
debugCache,
debugIdCache_,
sourceMapGen_,
debugInfoGenerator_);
if (!func)
Expand Down
5 changes: 5 additions & 0 deletions lib/BCGen/HBC/BytecodeGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ class BytecodeModuleGenerator {
/// This map is populated before instruction selection.
LiteralBufferBuilder::LiteralOffsetMapTy literalOffsetMap_{};

/// The debug ID cache for the module.
FileAndSourceMapIdCache &debugIdCache_;

/// Options controlling bytecode generation.
BytecodeGenerationOptions options_;

Expand Down Expand Up @@ -98,12 +101,14 @@ class BytecodeModuleGenerator {
BytecodeModuleGenerator(
BytecodeModule &bcModule,
Module *M,
FileAndSourceMapIdCache &debugIdCache,
BytecodeGenerationOptions options = BytecodeGenerationOptions::defaults(),
SourceMapGenerator *sourceMapGen = nullptr,
std::unique_ptr<BCProviderBase> baseBCProvider = nullptr)
: bm_(bcModule),
M_(M),
debugInfoGenerator_(bm_.getDebugInfo()),
debugIdCache_(debugIdCache),
options_(options),
sourceMapGen_(sourceMapGen),
baseBCProvider_(std::move(baseBCProvider)) {
Expand Down
18 changes: 14 additions & 4 deletions lib/BCGen/HBC/HBC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,16 @@ std::unique_ptr<BytecodeModule> generateBytecodeModule(
std::unique_ptr<BCProviderBase> baseBCProvider) {
PerfSection perf("Bytecode Generation");
auto bm = std::make_unique<BytecodeModule>();
FileAndSourceMapIdCache debugIdCache{};

bool success =
BytecodeModuleGenerator{
*bm, M, options, sourceMapGen, std::move(baseBCProvider)}
*bm,
M,
debugIdCache,
options,
sourceMapGen,
std::move(baseBCProvider)}
.generate(entryPoint, segment);

return success ? std::move(bm) : nullptr;
Expand All @@ -45,8 +51,9 @@ bool generateBytecodeFunctionLazy(
Module *M,
Function *lazyFunc,
uint32_t lazyFuncID,
FileAndSourceMapIdCache &debugIdCache,
const BytecodeGenerationOptions &options) {
return BytecodeModuleGenerator{bm, M, options, nullptr, nullptr}
return BytecodeModuleGenerator{bm, M, debugIdCache, options, nullptr, nullptr}
.generateLazyFunctions(lazyFunc, lazyFuncID);
}

Expand All @@ -56,9 +63,11 @@ std::unique_ptr<BytecodeModule> generateBytecodeModuleForEval(
const BytecodeGenerationOptions &options) {
PerfSection perf("Bytecode Generation");
auto bm = std::make_unique<BytecodeModule>();
FileAndSourceMapIdCache debugIdCache{};

bool success = BytecodeModuleGenerator{*bm, M, options, nullptr, nullptr}
.generateForEval(entryPoint);
bool success =
BytecodeModuleGenerator{*bm, M, debugIdCache, options, nullptr, nullptr}
.generateForEval(entryPoint);

return success ? std::move(bm) : nullptr;
}
Expand Down Expand Up @@ -165,6 +174,7 @@ static void compileLazyFunctionWorker(void *argPtr) {
M,
func,
funcID,
provider->getFileAndSourceMapIdCache(),
provider->getBytecodeGenerationOptions())) {
data->success = false;
data->error = outputManager.getErrorString();
Expand Down
3 changes: 0 additions & 3 deletions lib/BCGen/HBC/ISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,9 +452,6 @@ inline FileAndSourceMapId HBCISel::obtainFileAndSourceMapId(
sourceMappingUrl = sm.getSourceMappingUrl(bufId);
}

// Lazily compiled functions ask to strip the source mapping URL because
// it was already encoded in the top level module, and it could be a 1MB+
// data url that we don't want to duplicate once per function.
if (sourceMappingUrl.empty()) {
currentSourceMappingUrlId = kInvalidSourceMappingUrlId;
} else {
Expand Down
19 changes: 1 addition & 18 deletions lib/BCGen/HBC/ISel.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
#ifndef HERMES_BCGEN_HBC_ISEL_H
#define HERMES_BCGEN_HBC_ISEL_H

#include "llvh/ADT/DenseMap.h"

#include <cstdint>
#include "hermes/BCGen/HBC/FileAndSourceMapIdCache.h"

namespace hermes {

Expand All @@ -23,21 +21,6 @@ namespace hbc {
class BytecodeFunctionGenerator;
class HVMRegisterAllocator;

/// The filename ID and source map URL ID of a buffer.
struct FileAndSourceMapId {
/// ID of the filename when added to BytecodeFunctionGenerator.
uint32_t filenameId;
/// ID of the source map URL when added as a file to
/// BytecodeFunctionGenerator.
uint32_t sourceMappingUrlId;
};

/// A map from buffer ID to filelname+source map.
/// Looking up filename/sourcemap id for each instruction is pretty slow,
/// so we cache it.
using FileAndSourceMapIdCache =
llvh::SmallDenseMap<unsigned, FileAndSourceMapId>;

/// Generate the bytecode stream for the function.
void runHBCISel(
Function *F,
Expand Down

0 comments on commit ffbf125

Please sign in to comment.