Skip to content

Commit

Permalink
Complete run of legacy importer
Browse files Browse the repository at this point in the history
  • Loading branch information
thomashoneyman committed Aug 26, 2024
1 parent 8156aa2 commit ed7913c
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 38 deletions.
25 changes: 17 additions & 8 deletions app/src/App/API.purs
Original file line number Diff line number Diff line change
Expand Up @@ -496,14 +496,23 @@ publish maybeLegacyIndex payload = do
]

unless (Operation.Validation.locationMatches (Manifest receivedManifest) (Metadata metadata)) do
Except.throw $ Array.fold
[ "The manifest file specifies a location ("
, stringifyJson Location.codec receivedManifest.location
, ") that differs from the location in the registry metadata ("
, stringifyJson Location.codec metadata.location
, "). If you would like to change the location of your package you should "
, "submit a transfer operation."
]
if isJust maybeLegacyIndex then
-- The legacy importer is sometimes run on older packages, some of which have been transferred. Since
-- package metadata only records the latest location, this can cause a problem: the manifest reports
-- the location at the time, but the metadata reports the current location.
Log.warn $ Array.fold
[ "In legacy mode and manifest location differs from existing metadata. This indicates a package that was "
, "transferred from a previous location. Ignoring location match validation..."
]
else
Except.throw $ Array.fold
[ "The manifest file specifies a location ("
, stringifyJson Location.codec receivedManifest.location
, ") that differs from the location in the registry metadata ("
, stringifyJson Location.codec metadata.location
, "). If you would like to change the location of your package you should "
, "submit a transfer operation."
]

when (Operation.Validation.isMetadataPackage (Manifest receivedManifest)) do
Except.throw "The `metadata` package cannot be uploaded to the registry because it is a protected package."
Expand Down
78 changes: 55 additions & 23 deletions app/src/App/Effect/Source.purs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Registry.App.Prelude
import Data.Array as Array
import Data.DateTime (DateTime)
import Data.JSDate as JSDate
import Data.String as String
import Effect.Aff (Milliseconds(..))
import Effect.Aff as Aff
import Effect.Exception as Exception
Expand Down Expand Up @@ -38,7 +39,7 @@ data ImportType = Old | Recent
derive instance Eq ImportType

-- | An effect for fetching package sources
data Source a = Fetch FilePath Location String (Either String FetchedSource -> a)
data Source a = Fetch FilePath Location String (Either FetchError FetchedSource -> a)

derive instance Functor Source

Expand All @@ -49,9 +50,24 @@ _source = Proxy

type FetchedSource = { path :: FilePath, published :: DateTime }

data FetchError
= GitHubOnly
| NoSubdir
| InaccessibleRepo Octokit.Address
| NoToplevelDir
| Fatal String

printFetchError :: FetchError -> String
printFetchError = case _ of
GitHubOnly -> "Packages are only allowed to come from GitHub for now. See issue #15."
NoSubdir -> "Monorepos and the `subdir` key are not supported yet. See issue #16."
InaccessibleRepo { owner, repo } -> "Repository located at https://github.com/" <> owner <> "/" <> repo <> ".git is inaccessible or does not exist."
NoToplevelDir -> "Downloaded tarball has no top-level directory."
Fatal err -> "Unrecoverable error. " <> err

-- | Fetch the provided location to the provided destination path.
fetch :: forall r. FilePath -> Location -> String -> Run (SOURCE + EXCEPT String + r) FetchedSource
fetch destination location ref = Except.rethrow =<< Run.lift _source (Fetch destination location ref identity)
fetch destination location ref = (Except.rethrow <<< lmap printFetchError) =<< Run.lift _source (Fetch destination location ref identity)

-- | Run the SOURCE effect given a handler.
interpret :: forall r a. (Source ~> Run r) -> Run (SOURCE + r) a -> Run r a
Expand All @@ -65,11 +81,11 @@ handle importType = case _ of
case location of
Git _ -> do
-- TODO: Support non-GitHub packages. Remember subdir when doing so. (See #15)
Except.throw "Packages are only allowed to come from GitHub for now. See #15"
Except.throw GitHubOnly

GitHub { owner, repo, subdir } -> do
-- TODO: Support subdir. In the meantime, we verify subdir is not present. (See #16)
when (isJust subdir) $ Except.throw "`subdir` is not supported for now. See #16"
when (isJust subdir) $ Except.throw NoSubdir

case pursPublishMethod of
-- This needs to be removed so that we can support non-GitHub packages (#15)
Expand All @@ -91,29 +107,45 @@ handle importType = case _ of
, timeout = Milliseconds 15_000.0
}

clonePackageAtTag = do
let url = Array.fold [ "https://github.com/", owner, "/", repo ]
let args = [ "clone", url, "--branch", ref, "--single-branch", "-c", "advice.detachedHead=false", repoDir ]
withRetry retryOpts (Git.gitCLI args Nothing) >>= case _ of
Cancelled -> Aff.throwError $ Aff.error $ "Timed out attempting to clone git tag: " <> url <> " " <> ref
Failed err -> Aff.throwError $ Aff.error err
Succeeded _ -> pure unit
cloneUrl =
Array.fold [ "https://github.com/", owner, "/", repo ]

cloneArgs =
[ "clone", cloneUrl, "--branch", ref, "--single-branch", "-c", "advice.detachedHead=false", repoDir ]

clonePackageAtTag =
withRetry retryOpts (Git.gitCLI cloneArgs Nothing) >>= case _ of
Cancelled ->
Aff.throwError $ Aff.error $ "Timed out attempting to clone git tag: " <> cloneUrl <> " " <> ref
Failed err ->
Aff.throwError $ Aff.error err
Succeeded _ ->
pure unit

Run.liftAff (Aff.attempt clonePackageAtTag) >>= case _ of
Right _ -> Log.debug $ "Cloned package source to " <> repoDir
Left error -> do
Log.warn $ "Git clone command failed:\n " <> String.joinWith " " (Array.cons "git" cloneArgs)
Log.error $ "Failed to clone git tag: " <> Aff.message error
Except.throw $ "Failed to clone repository " <> owner <> "/" <> repo <> " at ref " <> ref

-- We'll receive this message if we try to clone a repo which doesn't
-- exist, which is interpreted as an attempt to fetch a private repo.
let missingRepoErr = "fatal: could not read Username for 'https://github.com': terminal prompts disabled"

if String.contains (String.Pattern missingRepoErr) (Aff.message error) then
Except.throw $ InaccessibleRepo { owner, repo }
else
Except.throw $ Fatal $ "Failed to clone repository " <> owner <> "/" <> repo <> " at ref " <> ref

Log.debug $ "Getting published time..."

let
getRefTime = case importType of
Old -> do
timestamp <- Except.rethrow =<< Run.liftAff (Git.gitCLI [ "log", "-1", "--date=iso8601-strict", "--format=%cd", ref ] (Just repoDir))
timestamp <- (Except.rethrow <<< lmap Fatal) =<< Run.liftAff (Git.gitCLI [ "log", "-1", "--date=iso8601-strict", "--format=%cd", ref ] (Just repoDir))
jsDate <- Run.liftEffect $ JSDate.parse timestamp
dateTime <- case JSDate.toDateTime jsDate of
Nothing -> Except.throw $ "Could not parse timestamp of git ref to a datetime given timestamp " <> timestamp <> " and parsed js date " <> JSDate.toUTCString jsDate
Nothing -> Except.throw $ Fatal $ "Could not parse timestamp of git ref to a datetime given timestamp " <> timestamp <> " and parsed js date " <> JSDate.toUTCString jsDate
Just parsed -> pure parsed
pure dateTime
Recent ->
Expand All @@ -122,8 +154,8 @@ handle importType = case _ of
-- Cloning will result in the `repo` name as the directory name
publishedTime <- Except.runExcept getRefTime >>= case _ of
Left error -> do
Log.error $ "Failed to get published time: " <> error
Except.throw $ "Cloned repository " <> owner <> "/" <> repo <> " at ref " <> ref <> ", but could not read the published time from the ref."
Log.error $ "Failed to get published time. " <> printFetchError error
Except.throw $ Fatal $ "Cloned repository " <> owner <> "/" <> repo <> " at ref " <> ref <> ", but could not read the published time from the ref."
Right value -> pure value

pure { path: repoDir, published: publishedTime }
Expand All @@ -138,12 +170,12 @@ handle importType = case _ of
commit <- GitHub.getRefCommit { owner, repo } (RawVersion ref) >>= case _ of
Left githubError -> do
Log.error $ "Failed to fetch " <> upstream <> " at ref " <> ref <> ": " <> Octokit.printGitHubError githubError
Except.throw $ "Failed to fetch commit data associated with " <> upstream <> " at ref " <> ref
Except.throw $ Fatal $ "Failed to fetch commit data associated with " <> upstream <> " at ref " <> ref
Right result -> pure result
GitHub.getCommitDate { owner, repo } commit >>= case _ of
Left githubError -> do
Log.error $ "Failed to fetch " <> upstream <> " at commit " <> commit <> ": " <> Octokit.printGitHubError githubError
Except.throw $ "Unable to get published time for commit " <> commit <> " associated with the given ref " <> ref
Except.throw $ Fatal $ "Unable to get published time for commit " <> commit <> " associated with the given ref " <> ref
Right a -> pure a

let tarballName = ref <> ".tar.gz"
Expand All @@ -155,31 +187,31 @@ handle importType = case _ of
Run.liftAff $ Fetch.withRetryRequest archiveUrl {}

case response of
Cancelled -> Except.throw $ "Could not download " <> archiveUrl
Cancelled -> Except.throw $ Fatal $ "Could not download " <> archiveUrl
Failed (Fetch.FetchError error) -> do
Log.error $ "Failed to download " <> archiveUrl <> " because of an HTTP error: " <> Exception.message error
Except.throw $ "Could not download " <> archiveUrl
Except.throw $ Fatal $ "Could not download " <> archiveUrl
Failed (Fetch.StatusError { status, arrayBuffer: arrayBufferAff }) -> do
arrayBuffer <- Run.liftAff arrayBufferAff
buffer <- Run.liftEffect $ Buffer.fromArrayBuffer arrayBuffer
bodyString <- Run.liftEffect $ Buffer.toString UTF8 (buffer :: Buffer)
Log.error $ "Failed to download " <> archiveUrl <> " because of a non-200 status code (" <> show status <> ") with body " <> bodyString
Except.throw $ "Could not download " <> archiveUrl
Except.throw $ Fatal $ "Could not download " <> archiveUrl
Succeeded { arrayBuffer: arrayBufferAff } -> do
arrayBuffer <- Run.liftAff arrayBufferAff
Log.debug $ "Successfully downloaded " <> archiveUrl <> " into a buffer."
buffer <- Run.liftEffect $ Buffer.fromArrayBuffer arrayBuffer
Run.liftAff (Aff.attempt (FS.Aff.writeFile absoluteTarballPath buffer)) >>= case _ of
Left error -> do
Log.error $ "Downloaded " <> archiveUrl <> " but failed to write it to the file at path " <> absoluteTarballPath <> ":\n" <> Aff.message error
Except.throw $ "Could not download " <> archiveUrl <> " due to an internal error."
Except.throw $ Fatal $ "Could not download " <> archiveUrl <> " due to an internal error."
Right _ ->
Log.debug $ "Tarball downloaded to " <> absoluteTarballPath

Log.debug "Verifying tarball..."
Foreign.Tar.getToplevelDir absoluteTarballPath >>= case _ of
Nothing ->
Except.throw "Downloaded tarball from GitHub has no top-level directory."
Except.throw NoToplevelDir
Just path -> do
Log.debug "Extracting the tarball..."
Tar.extract { cwd: destination, archive: tarballName }
Expand Down
8 changes: 4 additions & 4 deletions app/test/Test/Assert/Run.purs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import Registry.App.Effect.Pursuit (PURSUIT, Pursuit(..))
import Registry.App.Effect.Pursuit as Pursuit
import Registry.App.Effect.Registry (REGISTRY, Registry(..))
import Registry.App.Effect.Registry as Registry
import Registry.App.Effect.Source (SOURCE, Source(..))
import Registry.App.Effect.Source (FetchError(..), SOURCE, Source(..))
import Registry.App.Effect.Source as Source
import Registry.App.Effect.Storage (STORAGE, Storage)
import Registry.App.Effect.Storage as Storage
Expand Down Expand Up @@ -309,8 +309,8 @@ handleSourceMock env = case _ of
Fetch destination location ref reply -> do
now <- Run.liftEffect Now.nowDateTime
case location of
Git _ -> pure $ reply $ Left "Packages cannot be published from Git yet (only GitHub)."
GitHub { subdir } | isJust subdir -> pure $ reply $ Left "Packages cannot use the 'subdir' key yet."
Git _ -> pure $ reply $ Left GitHubOnly
GitHub { subdir } | isJust subdir -> pure $ reply $ Left NoSubdir
GitHub { repo } -> do
let
name = stripPureScriptPrefix repo
Expand All @@ -319,7 +319,7 @@ handleSourceMock env = case _ of
localPath = Path.concat [ env.github, dirname ]
destinationPath = Path.concat [ destination, dirname <> "-checkout" ]
Run.liftAff (Aff.attempt (FS.Aff.stat localPath)) >>= case _ of
Left _ -> pure $ reply $ Left $ "Cannot copy " <> localPath <> " because it does not exist."
Left _ -> pure $ reply $ Left $ Fatal $ "Cannot copy " <> localPath <> " because it does not exist."
Right _ -> do
Run.liftAff $ FS.Extra.copy { from: localPath, to: destinationPath, preserveTimestamps: true }
case pursPublishMethod of
Expand Down
9 changes: 7 additions & 2 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@
# (typically >4GB), and source packgaes really ought not be shipping large
# files — just source code.
GIT_LFS_SKIP_SMUDGE = 1;

# We disable git from entering interactive mode at any time, as there is no
# one there to answer prompts.
GIT_TERMINAL_PROMPT = 0;

registryOverlay = final: prev: rec {
nodejs = prev.nodejs_20;

Expand Down Expand Up @@ -284,7 +289,7 @@
# according to the env.example file, or to the values explicitly set below
# (e.g. DHALL_PRELUDE and DHALL_TYPES).
defaultEnv = parseEnv ./.env.example // {
inherit DHALL_PRELUDE DHALL_TYPES GIT_LFS_SKIP_SMUDGE;
inherit DHALL_PRELUDE DHALL_TYPES GIT_LFS_SKIP_SMUDGE GIT_TERMINAL_PROMPT;
};

# Parse a .env file, skipping empty lines and comments, into Nix attrset
Expand Down Expand Up @@ -826,7 +831,7 @@

devShells = {
default = pkgs.mkShell {
inherit GIT_LFS_SKIP_SMUDGE;
inherit GIT_LFS_SKIP_SMUDGE GIT_TERMINAL_PROMPT;

name = "registry-dev";
packages = with pkgs; [
Expand Down
8 changes: 7 additions & 1 deletion scripts/src/LegacyImporter.purs
Original file line number Diff line number Diff line change
Expand Up @@ -1255,7 +1255,13 @@ fetchSpagoYaml address ref = do
Right contents -> do
Log.debug $ "Found spago.yaml file\n" <> contents
case parseYaml SpagoYaml.spagoYamlCodec contents of
Left error -> Run.Except.throw $ "Failed to parse spago.yaml file:\n" <> contents <> "\nwith errors:\n" <> error
Left error -> do
Log.warn $ "Failed to parse spago.yaml file:\n" <> contents <> "\nwith errors:\n" <> error
pure Nothing
Right { package: Just { publish: Just { location: Just location } } }
| location /= GitHub { owner: address.owner, repo: address.repo, subdir: Nothing } -> do
Log.warn "spago.yaml file does not use the same location it was fetched from, this is disallowed..."
pure Nothing
Right config -> case SpagoYaml.spagoYamlToManifest config of
Left err -> do
Log.warn $ "Failed to convert parsed spago.yaml file to purs.json " <> contents <> "\nwith errors:\n" <> err
Expand Down

0 comments on commit ed7913c

Please sign in to comment.