From ed7913c1ce3ab9b5e76a6da109b92136cc5ed036 Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Mon, 26 Aug 2024 18:44:33 -0400 Subject: [PATCH] Complete run of legacy importer --- app/src/App/API.purs | 25 +++++++---- app/src/App/Effect/Source.purs | 78 +++++++++++++++++++++++---------- app/test/Test/Assert/Run.purs | 8 ++-- flake.nix | 9 +++- scripts/src/LegacyImporter.purs | 8 +++- 5 files changed, 90 insertions(+), 38 deletions(-) diff --git a/app/src/App/API.purs b/app/src/App/API.purs index 65009a9c..9a83187b 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -496,14 +496,23 @@ publish maybeLegacyIndex payload = do ] unless (Operation.Validation.locationMatches (Manifest receivedManifest) (Metadata metadata)) do - Except.throw $ Array.fold - [ "The manifest file specifies a location (" - , stringifyJson Location.codec receivedManifest.location - , ") that differs from the location in the registry metadata (" - , stringifyJson Location.codec metadata.location - , "). If you would like to change the location of your package you should " - , "submit a transfer operation." - ] + if isJust maybeLegacyIndex then + -- The legacy importer is sometimes run on older packages, some of which have been transferred. Since + -- package metadata only records the latest location, this can cause a problem: the manifest reports + -- the location at the time, but the metadata reports the current location. + Log.warn $ Array.fold + [ "In legacy mode and manifest location differs from existing metadata. This indicates a package that was " + , "transferred from a previous location. Ignoring location match validation..." + ] + else + Except.throw $ Array.fold + [ "The manifest file specifies a location (" + , stringifyJson Location.codec receivedManifest.location + , ") that differs from the location in the registry metadata (" + , stringifyJson Location.codec metadata.location + , "). If you would like to change the location of your package you should " + , "submit a transfer operation." + ] when (Operation.Validation.isMetadataPackage (Manifest receivedManifest)) do Except.throw "The `metadata` package cannot be uploaded to the registry because it is a protected package." diff --git a/app/src/App/Effect/Source.purs b/app/src/App/Effect/Source.purs index d0c44b6c..5f5fd328 100644 --- a/app/src/App/Effect/Source.purs +++ b/app/src/App/Effect/Source.purs @@ -6,6 +6,7 @@ import Registry.App.Prelude import Data.Array as Array import Data.DateTime (DateTime) import Data.JSDate as JSDate +import Data.String as String import Effect.Aff (Milliseconds(..)) import Effect.Aff as Aff import Effect.Exception as Exception @@ -38,7 +39,7 @@ data ImportType = Old | Recent derive instance Eq ImportType -- | An effect for fetching package sources -data Source a = Fetch FilePath Location String (Either String FetchedSource -> a) +data Source a = Fetch FilePath Location String (Either FetchError FetchedSource -> a) derive instance Functor Source @@ -49,9 +50,24 @@ _source = Proxy type FetchedSource = { path :: FilePath, published :: DateTime } +data FetchError + = GitHubOnly + | NoSubdir + | InaccessibleRepo Octokit.Address + | NoToplevelDir + | Fatal String + +printFetchError :: FetchError -> String +printFetchError = case _ of + GitHubOnly -> "Packages are only allowed to come from GitHub for now. See issue #15." + NoSubdir -> "Monorepos and the `subdir` key are not supported yet. See issue #16." + InaccessibleRepo { owner, repo } -> "Repository located at https://github.com/" <> owner <> "/" <> repo <> ".git is inaccessible or does not exist." + NoToplevelDir -> "Downloaded tarball has no top-level directory." + Fatal err -> "Unrecoverable error. " <> err + -- | Fetch the provided location to the provided destination path. fetch :: forall r. FilePath -> Location -> String -> Run (SOURCE + EXCEPT String + r) FetchedSource -fetch destination location ref = Except.rethrow =<< Run.lift _source (Fetch destination location ref identity) +fetch destination location ref = (Except.rethrow <<< lmap printFetchError) =<< Run.lift _source (Fetch destination location ref identity) -- | Run the SOURCE effect given a handler. interpret :: forall r a. (Source ~> Run r) -> Run (SOURCE + r) a -> Run r a @@ -65,11 +81,11 @@ handle importType = case _ of case location of Git _ -> do -- TODO: Support non-GitHub packages. Remember subdir when doing so. (See #15) - Except.throw "Packages are only allowed to come from GitHub for now. See #15" + Except.throw GitHubOnly GitHub { owner, repo, subdir } -> do -- TODO: Support subdir. In the meantime, we verify subdir is not present. (See #16) - when (isJust subdir) $ Except.throw "`subdir` is not supported for now. See #16" + when (isJust subdir) $ Except.throw NoSubdir case pursPublishMethod of -- This needs to be removed so that we can support non-GitHub packages (#15) @@ -91,29 +107,45 @@ handle importType = case _ of , timeout = Milliseconds 15_000.0 } - clonePackageAtTag = do - let url = Array.fold [ "https://github.com/", owner, "/", repo ] - let args = [ "clone", url, "--branch", ref, "--single-branch", "-c", "advice.detachedHead=false", repoDir ] - withRetry retryOpts (Git.gitCLI args Nothing) >>= case _ of - Cancelled -> Aff.throwError $ Aff.error $ "Timed out attempting to clone git tag: " <> url <> " " <> ref - Failed err -> Aff.throwError $ Aff.error err - Succeeded _ -> pure unit + cloneUrl = + Array.fold [ "https://github.com/", owner, "/", repo ] + + cloneArgs = + [ "clone", cloneUrl, "--branch", ref, "--single-branch", "-c", "advice.detachedHead=false", repoDir ] + + clonePackageAtTag = + withRetry retryOpts (Git.gitCLI cloneArgs Nothing) >>= case _ of + Cancelled -> + Aff.throwError $ Aff.error $ "Timed out attempting to clone git tag: " <> cloneUrl <> " " <> ref + Failed err -> + Aff.throwError $ Aff.error err + Succeeded _ -> + pure unit Run.liftAff (Aff.attempt clonePackageAtTag) >>= case _ of Right _ -> Log.debug $ "Cloned package source to " <> repoDir Left error -> do + Log.warn $ "Git clone command failed:\n " <> String.joinWith " " (Array.cons "git" cloneArgs) Log.error $ "Failed to clone git tag: " <> Aff.message error - Except.throw $ "Failed to clone repository " <> owner <> "/" <> repo <> " at ref " <> ref + + -- We'll receive this message if we try to clone a repo which doesn't + -- exist, which is interpreted as an attempt to fetch a private repo. + let missingRepoErr = "fatal: could not read Username for 'https://github.com': terminal prompts disabled" + + if String.contains (String.Pattern missingRepoErr) (Aff.message error) then + Except.throw $ InaccessibleRepo { owner, repo } + else + Except.throw $ Fatal $ "Failed to clone repository " <> owner <> "/" <> repo <> " at ref " <> ref Log.debug $ "Getting published time..." let getRefTime = case importType of Old -> do - timestamp <- Except.rethrow =<< Run.liftAff (Git.gitCLI [ "log", "-1", "--date=iso8601-strict", "--format=%cd", ref ] (Just repoDir)) + timestamp <- (Except.rethrow <<< lmap Fatal) =<< Run.liftAff (Git.gitCLI [ "log", "-1", "--date=iso8601-strict", "--format=%cd", ref ] (Just repoDir)) jsDate <- Run.liftEffect $ JSDate.parse timestamp dateTime <- case JSDate.toDateTime jsDate of - Nothing -> Except.throw $ "Could not parse timestamp of git ref to a datetime given timestamp " <> timestamp <> " and parsed js date " <> JSDate.toUTCString jsDate + Nothing -> Except.throw $ Fatal $ "Could not parse timestamp of git ref to a datetime given timestamp " <> timestamp <> " and parsed js date " <> JSDate.toUTCString jsDate Just parsed -> pure parsed pure dateTime Recent -> @@ -122,8 +154,8 @@ handle importType = case _ of -- Cloning will result in the `repo` name as the directory name publishedTime <- Except.runExcept getRefTime >>= case _ of Left error -> do - Log.error $ "Failed to get published time: " <> error - Except.throw $ "Cloned repository " <> owner <> "/" <> repo <> " at ref " <> ref <> ", but could not read the published time from the ref." + Log.error $ "Failed to get published time. " <> printFetchError error + Except.throw $ Fatal $ "Cloned repository " <> owner <> "/" <> repo <> " at ref " <> ref <> ", but could not read the published time from the ref." Right value -> pure value pure { path: repoDir, published: publishedTime } @@ -138,12 +170,12 @@ handle importType = case _ of commit <- GitHub.getRefCommit { owner, repo } (RawVersion ref) >>= case _ of Left githubError -> do Log.error $ "Failed to fetch " <> upstream <> " at ref " <> ref <> ": " <> Octokit.printGitHubError githubError - Except.throw $ "Failed to fetch commit data associated with " <> upstream <> " at ref " <> ref + Except.throw $ Fatal $ "Failed to fetch commit data associated with " <> upstream <> " at ref " <> ref Right result -> pure result GitHub.getCommitDate { owner, repo } commit >>= case _ of Left githubError -> do Log.error $ "Failed to fetch " <> upstream <> " at commit " <> commit <> ": " <> Octokit.printGitHubError githubError - Except.throw $ "Unable to get published time for commit " <> commit <> " associated with the given ref " <> ref + Except.throw $ Fatal $ "Unable to get published time for commit " <> commit <> " associated with the given ref " <> ref Right a -> pure a let tarballName = ref <> ".tar.gz" @@ -155,16 +187,16 @@ handle importType = case _ of Run.liftAff $ Fetch.withRetryRequest archiveUrl {} case response of - Cancelled -> Except.throw $ "Could not download " <> archiveUrl + Cancelled -> Except.throw $ Fatal $ "Could not download " <> archiveUrl Failed (Fetch.FetchError error) -> do Log.error $ "Failed to download " <> archiveUrl <> " because of an HTTP error: " <> Exception.message error - Except.throw $ "Could not download " <> archiveUrl + Except.throw $ Fatal $ "Could not download " <> archiveUrl Failed (Fetch.StatusError { status, arrayBuffer: arrayBufferAff }) -> do arrayBuffer <- Run.liftAff arrayBufferAff buffer <- Run.liftEffect $ Buffer.fromArrayBuffer arrayBuffer bodyString <- Run.liftEffect $ Buffer.toString UTF8 (buffer :: Buffer) Log.error $ "Failed to download " <> archiveUrl <> " because of a non-200 status code (" <> show status <> ") with body " <> bodyString - Except.throw $ "Could not download " <> archiveUrl + Except.throw $ Fatal $ "Could not download " <> archiveUrl Succeeded { arrayBuffer: arrayBufferAff } -> do arrayBuffer <- Run.liftAff arrayBufferAff Log.debug $ "Successfully downloaded " <> archiveUrl <> " into a buffer." @@ -172,14 +204,14 @@ handle importType = case _ of Run.liftAff (Aff.attempt (FS.Aff.writeFile absoluteTarballPath buffer)) >>= case _ of Left error -> do Log.error $ "Downloaded " <> archiveUrl <> " but failed to write it to the file at path " <> absoluteTarballPath <> ":\n" <> Aff.message error - Except.throw $ "Could not download " <> archiveUrl <> " due to an internal error." + Except.throw $ Fatal $ "Could not download " <> archiveUrl <> " due to an internal error." Right _ -> Log.debug $ "Tarball downloaded to " <> absoluteTarballPath Log.debug "Verifying tarball..." Foreign.Tar.getToplevelDir absoluteTarballPath >>= case _ of Nothing -> - Except.throw "Downloaded tarball from GitHub has no top-level directory." + Except.throw NoToplevelDir Just path -> do Log.debug "Extracting the tarball..." Tar.extract { cwd: destination, archive: tarballName } diff --git a/app/test/Test/Assert/Run.purs b/app/test/Test/Assert/Run.purs index 9d3c27c9..42cc7d6a 100644 --- a/app/test/Test/Assert/Run.purs +++ b/app/test/Test/Assert/Run.purs @@ -44,7 +44,7 @@ import Registry.App.Effect.Pursuit (PURSUIT, Pursuit(..)) import Registry.App.Effect.Pursuit as Pursuit import Registry.App.Effect.Registry (REGISTRY, Registry(..)) import Registry.App.Effect.Registry as Registry -import Registry.App.Effect.Source (SOURCE, Source(..)) +import Registry.App.Effect.Source (FetchError(..), SOURCE, Source(..)) import Registry.App.Effect.Source as Source import Registry.App.Effect.Storage (STORAGE, Storage) import Registry.App.Effect.Storage as Storage @@ -309,8 +309,8 @@ handleSourceMock env = case _ of Fetch destination location ref reply -> do now <- Run.liftEffect Now.nowDateTime case location of - Git _ -> pure $ reply $ Left "Packages cannot be published from Git yet (only GitHub)." - GitHub { subdir } | isJust subdir -> pure $ reply $ Left "Packages cannot use the 'subdir' key yet." + Git _ -> pure $ reply $ Left GitHubOnly + GitHub { subdir } | isJust subdir -> pure $ reply $ Left NoSubdir GitHub { repo } -> do let name = stripPureScriptPrefix repo @@ -319,7 +319,7 @@ handleSourceMock env = case _ of localPath = Path.concat [ env.github, dirname ] destinationPath = Path.concat [ destination, dirname <> "-checkout" ] Run.liftAff (Aff.attempt (FS.Aff.stat localPath)) >>= case _ of - Left _ -> pure $ reply $ Left $ "Cannot copy " <> localPath <> " because it does not exist." + Left _ -> pure $ reply $ Left $ Fatal $ "Cannot copy " <> localPath <> " because it does not exist." Right _ -> do Run.liftAff $ FS.Extra.copy { from: localPath, to: destinationPath, preserveTimestamps: true } case pursPublishMethod of diff --git a/flake.nix b/flake.nix index c886c63d..44f2c753 100644 --- a/flake.nix +++ b/flake.nix @@ -70,6 +70,11 @@ # (typically >4GB), and source packgaes really ought not be shipping large # files — just source code. GIT_LFS_SKIP_SMUDGE = 1; + + # We disable git from entering interactive mode at any time, as there is no + # one there to answer prompts. + GIT_TERMINAL_PROMPT = 0; + registryOverlay = final: prev: rec { nodejs = prev.nodejs_20; @@ -284,7 +289,7 @@ # according to the env.example file, or to the values explicitly set below # (e.g. DHALL_PRELUDE and DHALL_TYPES). defaultEnv = parseEnv ./.env.example // { - inherit DHALL_PRELUDE DHALL_TYPES GIT_LFS_SKIP_SMUDGE; + inherit DHALL_PRELUDE DHALL_TYPES GIT_LFS_SKIP_SMUDGE GIT_TERMINAL_PROMPT; }; # Parse a .env file, skipping empty lines and comments, into Nix attrset @@ -826,7 +831,7 @@ devShells = { default = pkgs.mkShell { - inherit GIT_LFS_SKIP_SMUDGE; + inherit GIT_LFS_SKIP_SMUDGE GIT_TERMINAL_PROMPT; name = "registry-dev"; packages = with pkgs; [ diff --git a/scripts/src/LegacyImporter.purs b/scripts/src/LegacyImporter.purs index 95bbd61f..fade3c9b 100644 --- a/scripts/src/LegacyImporter.purs +++ b/scripts/src/LegacyImporter.purs @@ -1255,7 +1255,13 @@ fetchSpagoYaml address ref = do Right contents -> do Log.debug $ "Found spago.yaml file\n" <> contents case parseYaml SpagoYaml.spagoYamlCodec contents of - Left error -> Run.Except.throw $ "Failed to parse spago.yaml file:\n" <> contents <> "\nwith errors:\n" <> error + Left error -> do + Log.warn $ "Failed to parse spago.yaml file:\n" <> contents <> "\nwith errors:\n" <> error + pure Nothing + Right { package: Just { publish: Just { location: Just location } } } + | location /= GitHub { owner: address.owner, repo: address.repo, subdir: Nothing } -> do + Log.warn "spago.yaml file does not use the same location it was fetched from, this is disallowed..." + pure Nothing Right config -> case SpagoYaml.spagoYamlToManifest config of Left err -> do Log.warn $ "Failed to convert parsed spago.yaml file to purs.json " <> contents <> "\nwith errors:\n" <> err