From 00aa82291987d0f19c805a2f00d7e90667826eb6 Mon Sep 17 00:00:00 2001 From: Jiadong Bai <43344272+bobbai00@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:44:00 -0800 Subject: [PATCH] Refactor DatasetResource and DatasetAccessResource (#3210) This PR refactors the codes style of the `DatasetResource` and `DatasetAccessResource`. Specifically, the refactoring principles are: 1. Move all the function definition in `object`, that are only being used within the `class`, to the `class` 2. Uniform the way of interacting with jooq: use either jooq raw query, or Dao, not both. 3. Flatten single-kv-pair case classes. 4. Flatten the single-line function. --- .../dashboard/DatasetSearchQueryBuilder.scala | 2 +- .../user/dataset/DatasetAccessResource.scala | 57 +-- .../user/dataset/DatasetResource.scala | 472 ++++++++---------- .../utils/DatasetStatisticsUtils.scala | 2 +- .../web/service/ResultExportService.scala | 12 +- .../user-dataset-explorer.component.ts | 1 + .../service/user/dataset/dataset.service.ts | 32 +- .../file-selection.component.ts | 4 +- .../result-exportation.component.ts | 3 +- 9 files changed, 250 insertions(+), 335 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/DatasetSearchQueryBuilder.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/DatasetSearchQueryBuilder.scala index 7157e4f9c40..b83903b7e1f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/DatasetSearchQueryBuilder.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/DatasetSearchQueryBuilder.scala @@ -118,7 +118,7 @@ object DatasetSearchQueryBuilder extends SearchQueryBuilder { ), dataset.getOwnerUid == uid, List(), - DatasetResource.calculateLatestDatasetVersionSize(dataset.getDid) + DatasetResource.calculateDatasetVersionSize(dataset.getDid) ) DashboardClickableFileEntry( resourceType = SearchQueryBuilder.DATASET_RESOURCE_TYPE, diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetAccessResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetAccessResource.scala index 4f54decbf5e..7f68b233f9c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetAccessResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetAccessResource.scala @@ -14,7 +14,8 @@ import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetAccessResou context, getOwner } -import org.jooq.DSLContext +import org.jooq.{Condition, DSLContext} +import org.jooq.impl.DSL import org.jooq.types.UInteger import java.util @@ -28,19 +29,21 @@ object DatasetAccessResource { .createDSLContext() def userHasReadAccess(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { + val datasetDao = new DatasetDao(ctx.configuration()) + val isDatasetPublic = Option(datasetDao.fetchOneByDid(did)) + .flatMap(dataset => Option(dataset.getIsPublic)) + .contains(1.toByte) + + isDatasetPublic || userHasWriteAccess(ctx, did, uid) || - datasetIsPublic(ctx, did) || getDatasetUserAccessPrivilege(ctx, did, uid) == DatasetUserAccessPrivilege.READ } def userOwnDataset(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { - ctx - .selectOne() - .from(DATASET) - .where(DATASET.DID.eq(did)) - .and(DATASET.OWNER_UID.eq(uid)) - .fetch() - .isNotEmpty + val datasetDao = new DatasetDao(ctx.configuration()) + + Option(datasetDao.fetchOneByDid(did)) + .exists(_.getOwnerUid == uid) } def userHasWriteAccess(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { @@ -48,16 +51,6 @@ object DatasetAccessResource { getDatasetUserAccessPrivilege(ctx, did, uid) == DatasetUserAccessPrivilege.WRITE } - def datasetIsPublic(ctx: DSLContext, did: UInteger): Boolean = { - Option( - ctx - .select(DATASET.IS_PUBLIC) - .from(DATASET) - .where(DATASET.DID.eq(did)) - .fetchOneInto(classOf[Boolean]) - ).getOrElse(false) - } - def getDatasetUserAccessPrivilege( ctx: DSLContext, did: UInteger, @@ -67,21 +60,23 @@ object DatasetAccessResource { ctx .select(DATASET_USER_ACCESS.PRIVILEGE) .from(DATASET_USER_ACCESS) - .where(DATASET_USER_ACCESS.DID.eq(did)) - .and(DATASET_USER_ACCESS.UID.eq(uid)) - .fetchOne() - ) - .map(_.getValue(DATASET_USER_ACCESS.PRIVILEGE)) - .getOrElse(DatasetUserAccessPrivilege.NONE) + .where( + DATASET_USER_ACCESS.DID + .eq(did) + .and(DATASET_USER_ACCESS.UID.eq(uid)) + ) + .fetchOneInto(classOf[DatasetUserAccessPrivilege]) + ).getOrElse(DatasetUserAccessPrivilege.NONE) } def getOwner(ctx: DSLContext, did: UInteger): User = { - val ownerUid = ctx - .select(DATASET.OWNER_UID) - .from(DATASET) - .where(DATASET.DID.eq(did)) - .fetchOneInto(classOf[UInteger]) - new UserDao(ctx.configuration()).fetchOneByUid(ownerUid) + val datasetDao = new DatasetDao(ctx.configuration()) + val userDao = new UserDao(ctx.configuration()) + + Option(datasetDao.fetchOneByDid(did)) + .flatMap(dataset => Option(dataset.getOwnerUid)) + .map(ownerUid => userDao.fetchOneByUid(ownerUid)) + .orNull } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala index 36d43e9905b..9a319c308ba 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala @@ -26,7 +26,7 @@ import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ DatasetVersion, User } -import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetAccessResource._ +import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetAccessResource.{context, _} import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.{context, _} import edu.uci.ics.texera.web.resource.dashboard.user.dataset.`type`.DatasetFileNode import io.dropwizard.auth.Auth @@ -40,7 +40,6 @@ import java.io.{IOException, InputStream, OutputStream} import java.net.{URI, URLDecoder} import java.nio.charset.StandardCharsets import java.nio.file.Files -import java.util import java.util.Optional import java.util.concurrent.locks.ReentrantLock import java.util.zip.{ZipEntry, ZipOutputStream} @@ -55,45 +54,77 @@ import scala.util.control.NonFatal import scala.util.{Failure, Success, Try, Using} object DatasetResource { - val DATASET_IS_PUBLIC: Byte = 1; - val DATASET_IS_PRIVATE: Byte = 0; - val FILE_OPERATION_UPLOAD_PREFIX = "file:upload:" - val FILE_OPERATION_REMOVE_PREFIX = "file:remove" + private val DATASET_IS_PUBLIC: Byte = 1 + private val DATASET_IS_PRIVATE: Byte = 0 + private val FILE_OPERATION_UPLOAD_PREFIX = "file:upload:" + private val FILE_OPERATION_REMOVE_PREFIX = "file:remove" - val datasetLocks: scala.collection.concurrent.Map[UInteger, ReentrantLock] = + private val datasetLocks: scala.collection.concurrent.Map[UInteger, ReentrantLock] = new scala.collection.concurrent.TrieMap[UInteger, ReentrantLock]() private val context = SqlServer .getInstance(StorageConfig.jdbcUrl, StorageConfig.jdbcUsername, StorageConfig.jdbcPassword) .createDSLContext() - // error messages - val ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE = "User has no read access to this dataset" - val ERR_DATASET_NOT_FOUND_MESSAGE = "Dataset not found" - val ERR_DATASET_VERSION_NOT_FOUND_MESSAGE = "The version of the dataset not found" - val ERR_DATASET_CREATION_FAILED_MESSAGE = - "Dataset creation is failed. Please make sure to upload files in order to create the initial version of dataset" - val ERR_DATASET_NAME_ALREADY_EXISTS = "A dataset with the same name already exists." + /** + * fetch the size of a certain dataset version. + * @param did the target dataset id + * @param versionHash the hash of the version. If None, fetch the latest version + * @return + */ + def calculateDatasetVersionSize(did: UInteger, versionHash: Option[String] = None): Long = { + + /** + * Internal util to calculate the size from the physical nodes + */ + def calculateSizeFromPhysicalNodes(nodes: java.util.Set[PhysicalFileNode]): Long = { + nodes.asScala.foldLeft(0L) { (totalSize, node) => + totalSize + (if (node.isDirectory) { + calculateSizeFromPhysicalNodes(node.getChildren) + } else { + node.getSize + }) + } + } + + Try { + val datasetPath = PathUtils.getDatasetPath(did) + val hash = versionHash.getOrElse { + getLatestDatasetVersion(context, did) + .map(_.getVersionHash) + .getOrElse(throw new NoSuchElementException("No versions found for this dataset")) + } - def sanitizePath(input: String): String = { - // Define the characters you want to remove - val sanitized = StringUtils.replaceEach(input, Array("/", "\\"), Array("", "")) - sanitized + val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( + datasetPath, + hash + ) + + calculateSizeFromPhysicalNodes(fileNodes) + } match { + case Success(size) => size + case Failure(exception) => + val errorMessage = versionHash.map(_ => "dataset version").getOrElse("dataset") + println(s"Error calculating $errorMessage size: ${exception.getMessage}") + 0L + } } - // this function get the dataset from DB identified by did, - // read access will be checked + /** + * Helper function to get the dataset from DB using did + */ private def getDatasetByID(ctx: DSLContext, did: UInteger): Dataset = { val datasetDao = new DatasetDao(ctx.configuration()) val dataset = datasetDao.fetchOneByDid(did) if (dataset == null) { - throw new NotFoundException(ERR_DATASET_NOT_FOUND_MESSAGE) + throw new NotFoundException(f"Dataset $did not found") } dataset } - // this function retrieve the version hash identified by dvid and did - // read access will be checked + /** + * Helper function to get the dataset version from DB using dvid + */ private def getDatasetVersionByID( ctx: DSLContext, dvid: UInteger @@ -106,52 +137,10 @@ object DatasetResource { version } - // this function retrieve the DashboardDataset(Dataset from DB+more information) identified by did - // read access will be checked - def getDashboardDataset(ctx: DSLContext, did: UInteger, uid: UInteger): DashboardDataset = { - if (!userHasReadAccess(ctx, did, uid)) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - - val targetDataset = getDatasetByID(ctx, did) - val userAccessPrivilege = getDatasetUserAccessPrivilege(ctx, did, uid) - - DashboardDataset( - targetDataset, - getOwner(ctx, did).getEmail, - userAccessPrivilege, - targetDataset.getOwnerUid == uid, - List(), - calculateLatestDatasetVersionSize(did) - ) - } - - // the format of dataset version name is: v{#n} - {user provided dataset version name}. e.g. v10 - new version - private def generateDatasetVersionName( - ctx: DSLContext, - did: UInteger, - userProvidedVersionName: String - ): String = { - val numberOfExistingVersions = ctx - .selectFrom(DATASET_VERSION) - .where(DATASET_VERSION.DID.eq(did)) - .fetch() - .size() - - val sanitizedUserProvidedVersionName = sanitizePath(userProvidedVersionName) - val res = if (sanitizedUserProvidedVersionName == "") { - "v" + (numberOfExistingVersions + 1).toString - } else { - "v" + (numberOfExistingVersions + 1).toString + " - " + sanitizedUserProvidedVersionName - } - - res - } - - // this function retrieve the latest DatasetVersion from DB - // the latest here means the one with latest creation time - // read access will be checked - private def fetchLatestDatasetVersionInternal( + /** + * Helper function to get the latest dataset version from the DB + */ + private def getLatestDatasetVersion( ctx: DSLContext, did: UInteger ): Option[DatasetVersion] = { @@ -164,37 +153,6 @@ object DatasetResource { .toScala } - def getLatestDatasetVersionWithAccessCheck( - ctx: DSLContext, - did: UInteger, - uid: UInteger - ): DatasetVersion = { - if (!userHasReadAccess(ctx, did, uid)) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - - fetchLatestDatasetVersionInternal(ctx, did) match { - case Some(latestVersion) => latestVersion - case None => throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) - } - } - - private def getFileNodesOfCertainVersion( - ownerNode: DatasetFileNode, - datasetName: String, - ownerName: String - ): List[DatasetFileNode] = { - ownerNode.children.get - .find(_.getName == datasetName) - .head - .children - .get - .find(_.getName == ownerName) - .head - .children - .get - } - // DatasetOperation defines the operations that will be applied when creating a new dataset version private case class DatasetOperation( filesToAdd: Map[java.nio.file.Path, InputStream], @@ -243,7 +201,13 @@ object DatasetResource { DatasetOperation(filesToAdd.toMap, filesToRemove.toList) } - // add file(s) to a dataset, a new version will be created + /** + * Create a new dataset version by adding new files + * @param did the target dataset id + * @param user the user submitting the request + * @param filesToAdd the map containing the files to add + * @return the created dataset version + */ def createNewDatasetVersionByAddingFiles( did: UInteger, user: User, @@ -259,50 +223,6 @@ object DatasetResource { ) } - // create a new dataset version using the form data from frontend - def createNewDatasetVersionFromFormData( - ctx: DSLContext, - did: UInteger, - uid: UInteger, - ownerEmail: String, - userProvidedVersionName: String, - multiPart: FormDataMultiPart - ): Option[DashboardDatasetVersion] = { - val datasetOperation = parseUserUploadedFormToDatasetOperations(did, multiPart) - applyDatasetOperationToCreateNewVersion( - ctx, - did, - uid, - ownerEmail, - userProvidedVersionName, - datasetOperation - ) - } - - // Private method to get user datasets - private def getUserDatasets(ctx: DSLContext, uid: UInteger): List[Dataset] = { - ctx - .selectFrom(DATASET) - .where(DATASET.OWNER_UID.eq(uid)) - .fetchInto(classOf[Dataset]) - .asScala - .toList - } - - private def getDatasetVersions( - ctx: DSLContext, - did: UInteger, - uid: UInteger - ): List[DatasetVersion] = { - val result: java.util.List[DatasetVersion] = ctx - .selectFrom(DATASET_VERSION) - .where(DATASET_VERSION.DID.eq(did)) - .orderBy(DATASET_VERSION.CREATION_TIME.desc()) // or .asc() for ascending - .fetchInto(classOf[DatasetVersion]) - - result.asScala.toList - } - // apply the dataset operation to create a new dataset version // it returns the created dataset version if creation succeed, else return None // concurrency control is performed here: the thread has to have the lock in order to create the new version @@ -314,6 +234,30 @@ object DatasetResource { userProvidedVersionName: String, datasetOperation: DatasetOperation ): Option[DashboardDatasetVersion] = { + // Helper function to generate the dataset version name + // the format of dataset version name is: v{#n} - {user provided dataset version name}. e.g. v10 - new version + def generateDatasetVersionName( + ctx: DSLContext, + did: UInteger, + userProvidedVersionName: String + ): String = { + val numberOfExistingVersions = ctx + .selectFrom(DATASET_VERSION) + .where(DATASET_VERSION.DID.eq(did)) + .fetch() + .size() + + val sanitizedUserProvidedVersionName = + StringUtils.replaceEach(userProvidedVersionName, Array("/", "\\"), Array("", "")) + val res = if (sanitizedUserProvidedVersionName == "") { + "v" + (numberOfExistingVersions + 1).toString + } else { + "v" + (numberOfExistingVersions + 1).toString + " - " + sanitizedUserProvidedVersionName + } + + res + } + // Acquire or Create the lock for dataset of {did} val lock = DatasetResource.datasetLocks.getOrElseUpdate(did, new ReentrantLock()) @@ -376,30 +320,6 @@ object DatasetResource { } } - private def retrievePublicDatasets(ctx: DSLContext): util.List[DashboardDataset] = { - ctx - .select() - .from( - DATASET - .leftJoin(USER) - .on(USER.UID.eq(DATASET.OWNER_UID)) - ) - .where(DATASET.IS_PUBLIC.eq(DATASET_IS_PUBLIC)) - .fetch() - .map(record => { - val dataset = record.into(DATASET).into(classOf[Dataset]) - val ownerEmail = record.into(USER).getEmail - DashboardDataset( - isOwner = false, - dataset = dataset, - accessPrivilege = DatasetUserAccessPrivilege.READ, - versions = List(), - ownerEmail = ownerEmail, - size = calculateLatestDatasetVersionSize(dataset.getDid) - ) - }) - } - case class DashboardDataset( dataset: Dataset, ownerEmail: String, @@ -408,15 +328,6 @@ object DatasetResource { versions: List[DashboardDatasetVersion], size: Long ) - - case class ListDatasetsResponse( - datasets: List[DashboardDataset] - ) - - case class DatasetVersionRootFileNodes(fileNodes: List[DatasetFileNode]) - - case class DatasetVersions(versions: List[DatasetVersion]) - case class DashboardDatasetVersion( datasetVersion: DatasetVersion, fileNodes: List[DatasetFileNode] @@ -428,55 +339,8 @@ object DatasetResource { case class DatasetDescriptionModification(did: UInteger, description: String) - /* - If versionHash is provided, calculate the size of the specific version of the dataset. - Otherwise, calculate the size of the latest version of the dataset. - */ - private def calculateSize(did: UInteger, versionHash: Option[String] = None): Long = { - Try { - val datasetPath = PathUtils.getDatasetPath(did) - val hash = versionHash.getOrElse { - fetchLatestDatasetVersionInternal(context, did) - .map(_.getVersionHash) - .getOrElse(throw new NoSuchElementException("No versions found for this dataset")) - } - - val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( - datasetPath, - hash - ) - - calculateSizeFromPhysicalNodes(fileNodes) - } match { - case Success(size) => size - case Failure(exception) => - val errorMessage = versionHash.map(_ => "dataset version").getOrElse("dataset") - println(s"Error calculating $errorMessage size: ${exception.getMessage}") - 0L - } - } - - def calculateDatasetVersionSize(did: UInteger, dvid: UInteger): Long = { - val versionHash = getDatasetVersionByID(context, dvid).getVersionHash - calculateSize(did, Some(versionHash)) - } - - def calculateLatestDatasetVersionSize(did: UInteger): Long = { - calculateSize(did) - } - - private def calculateSizeFromPhysicalNodes(nodes: java.util.Set[PhysicalFileNode]): Long = { - nodes.asScala.foldLeft(0L) { (totalSize, node) => - totalSize + (if (node.isDirectory) { - calculateSizeFromPhysicalNodes(node.getChildren) - } else { - node.getSize - }) - } - } - case class DatasetVersionRootFileNodesResponse( - rootFileNodes: DatasetVersionRootFileNodes, + fileNodes: List[DatasetFileNode], size: Long ) } @@ -485,6 +349,57 @@ object DatasetResource { @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/dataset") class DatasetResource { + private val ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE = "User has no read access to this dataset" + private val ERR_DATASET_VERSION_NOT_FOUND_MESSAGE = "The version of the dataset not found" + private val ERR_DATASET_CREATION_FAILED_MESSAGE = + "Dataset creation is failed. Please make sure to upload files in order to create the initial version of dataset" + + /** + * Helper function to get the dataset from DB with additional information including user access privilege and owner email + */ + private def getDashboardDataset( + ctx: DSLContext, + did: UInteger, + uid: UInteger + ): DashboardDataset = { + if (!userHasReadAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val targetDataset = getDatasetByID(ctx, did) + val userAccessPrivilege = getDatasetUserAccessPrivilege(ctx, did, uid) + + DashboardDataset( + targetDataset, + getOwner(ctx, did).getEmail, + userAccessPrivilege, + targetDataset.getOwnerUid == uid, + List(), + calculateDatasetVersionSize(did) + ) + } + + /** + * Helper function to create a new dataset version using the given multi-part form. + */ + private def createNewDatasetVersionFromFormData( + ctx: DSLContext, + did: UInteger, + uid: UInteger, + ownerEmail: String, + userProvidedVersionName: String, + multiPart: FormDataMultiPart + ): Option[DashboardDatasetVersion] = { + val datasetOperation = parseUserUploadedFormToDatasetOperations(did, multiPart) + applyDatasetOperationToCreateNewVersion( + ctx, + did, + uid, + ownerEmail, + userProvidedVersionName, + datasetOperation + ) + } @POST @Path("/create") @@ -500,12 +415,13 @@ class DatasetResource { withTransaction(context) { ctx => val uid = user.getUid + val datasetDao: DatasetDao = new DatasetDao(ctx.configuration()) val datasetOfUserDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) // do the name duplication check - val existingDatasets = getUserDatasets(ctx, uid) - if (existingDatasets.exists(_.getName == datasetName)) { - throw new BadRequestException(ERR_DATASET_NAME_ALREADY_EXISTS) + val userExistingDatasetNames = datasetDao.fetchByOwnerUid(uid).asScala.map(_.getName) + if (userExistingDatasetNames.contains(datasetName)) { + throw new BadRequestException("Dataset with the same name already exists") } val dataset: Dataset = new Dataset() @@ -556,7 +472,7 @@ class DatasetResource { DatasetUserAccessPrivilege.WRITE, isOwner = true, versions = List(), - size = calculateLatestDatasetVersionSize(did) + size = calculateDatasetVersionSize(did) ) } } @@ -695,7 +611,7 @@ class DatasetResource { @Path("") def listDatasets( @Auth user: SessionUser - ): ListDatasetsResponse = { + ): List[DashboardDataset] = { val uid = user.getUid withTransaction(context)(ctx => { var accessibleDatasets: ListBuffer[DashboardDataset] = ListBuffer() @@ -722,14 +638,34 @@ class DatasetResource { accessPrivilege = datasetAccess.getPrivilege, versions = List(), ownerEmail = ownerEmail, - size = calculateLatestDatasetVersionSize(dataset.getDid) + size = calculateDatasetVersionSize(dataset.getDid) ) }) .asScala ) // then we fetch the public datasets and merge it as a part of the result if not exist - val publicDatasets = retrievePublicDatasets(context) + val publicDatasets = ctx + .select() + .from( + DATASET + .leftJoin(USER) + .on(USER.UID.eq(DATASET.OWNER_UID)) + ) + .where(DATASET.IS_PUBLIC.eq(DATASET_IS_PUBLIC)) + .fetch() + .map(record => { + val dataset = record.into(DATASET).into(classOf[Dataset]) + val ownerEmail = record.into(USER).getEmail + DashboardDataset( + isOwner = false, + dataset = dataset, + accessPrivilege = DatasetUserAccessPrivilege.READ, + versions = List(), + ownerEmail = ownerEmail, + size = calculateDatasetVersionSize(dataset.getDid) + ) + }) publicDatasets.forEach { publicDataset => if (!accessibleDatasets.exists(_.dataset.getDid == publicDataset.dataset.getDid)) { val dashboardDataset = DashboardDataset( @@ -738,15 +674,13 @@ class DatasetResource { ownerEmail = publicDataset.ownerEmail, accessPrivilege = DatasetUserAccessPrivilege.READ, versions = List(), - size = calculateLatestDatasetVersionSize(publicDataset.dataset.getDid) + size = calculateDatasetVersionSize(publicDataset.dataset.getDid) ) accessibleDatasets = accessibleDatasets :+ dashboardDataset } } - ListDatasetsResponse( - accessibleDatasets.toList - ) + accessibleDatasets.toList }) } @@ -755,7 +689,7 @@ class DatasetResource { def getDatasetVersionList( @PathParam("did") did: UInteger, @Auth user: SessionUser - ): DatasetVersions = { + ): List[DatasetVersion] = { val uid = user.getUid withTransaction(context)(ctx => { @@ -768,13 +702,13 @@ class DatasetResource { .orderBy(DATASET_VERSION.CREATION_TIME.desc()) // or .asc() for ascending .fetchInto(classOf[DatasetVersion]) - DatasetVersions(result.asScala.toList) + result.asScala.toList }) } @GET @Path("/{did}/version/latest") - def getLatestDatasetVersion( + def retrieveLatestDatasetVersion( @PathParam("did") did: UInteger, @Auth user: SessionUser ): DashboardDatasetVersion = { @@ -784,7 +718,9 @@ class DatasetResource { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } val dataset = getDatasetByID(ctx, did) - val latestVersion = getLatestDatasetVersionWithAccessCheck(ctx, did, uid) + val latestVersion = getLatestDatasetVersion(ctx, did).getOrElse( + throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) + ) val datasetPath = PathUtils.getDatasetPath(did) val ownerNode = DatasetFileNode @@ -804,7 +740,15 @@ class DatasetResource { DashboardDatasetVersion( latestVersion, - getFileNodesOfCertainVersion(ownerNode, dataset.getName, latestVersion.getName) + ownerNode.children.get + .find(_.getName == dataset.getName) + .head + .children + .get + .find(_.getName == latestVersion.getName) + .head + .children + .get ) }) } @@ -827,7 +771,8 @@ class DatasetResource { targetDatasetPath, datasetVersion.getVersionHash ) - val size = calculateDatasetVersionSize(did, dvid) + val versionHash = getDatasetVersionByID(ctx, dvid).getVersionHash + val size = calculateDatasetVersionSize(did, Some(versionHash)) val ownerFileNode = DatasetFileNode .fromPhysicalFileNodes( Map((dataset.ownerEmail, datasetName, datasetVersion.getName) -> fileNodes.asScala.toList) @@ -835,9 +780,15 @@ class DatasetResource { .head DatasetVersionRootFileNodesResponse( - DatasetVersionRootFileNodes( - getFileNodesOfCertainVersion(ownerFileNode, datasetName, datasetVersion.getName) - ), + ownerFileNode.children.get + .find(_.getName == datasetName) + .head + .children + .get + .find(_.getName == datasetVersion.getName) + .head + .children + .get, size ) }) @@ -852,7 +803,7 @@ class DatasetResource { val uid = user.getUid withTransaction(context)(ctx => { val dashboardDataset = getDashboardDataset(ctx, did, uid) - val size = DatasetResource.calculateLatestDatasetVersionSize(did) + val size = calculateDatasetVersionSize(did) dashboardDataset.copy(size = size) }) } @@ -863,7 +814,6 @@ class DatasetResource { @QueryParam("path") pathStr: String, @Auth user: SessionUser ): Response = { - val uid = user.getUid val decodedPathStr = URLDecoder.decode(pathStr, StandardCharsets.UTF_8.name()) withTransaction(context)(ctx => { @@ -923,14 +873,13 @@ class DatasetResource { if (!userHasReadAccess(context, did, user.getUid)) { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - val (dataset, version) = if (dvid.isEmpty) { - // dvid is not given, retrieve latest - getLatestVersionInfo(did, user) - } else { - // dvid is given, retrieve certain version - withTransaction(context)(ctx => - (getDatasetByID(ctx, did), getDatasetVersionByID(ctx, UInteger.valueOf(dvid.get))) + val dataset = getDatasetByID(context, did) + val version = if (dvid.isEmpty) { + getLatestDatasetVersion(context, did).getOrElse( + throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) ) + } else { + getDatasetVersionByID(context, UInteger.valueOf(dvid.get)) } val targetDatasetPath = PathUtils.getDatasetPath(dataset.getDid) val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( @@ -987,17 +936,4 @@ class DatasetResource { .`type`("application/zip") .build() } - - private def getLatestVersionInfo(did: UInteger, user: SessionUser): (Dataset, DatasetVersion) = { - validateUserAccess(did, user.getUid) - val dataset = getDatasetByID(context, did) - val latestVersion = getLatestDatasetVersionWithAccessCheck(context, did, user.getUid) - (dataset, latestVersion) - } - - private def validateUserAccess(did: UInteger, uid: UInteger): Unit = { - if (!userHasReadAccess(context, did, uid)) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/utils/DatasetStatisticsUtils.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/utils/DatasetStatisticsUtils.scala index 561a50f2f02..3f1c17a39d2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/utils/DatasetStatisticsUtils.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/utils/DatasetStatisticsUtils.scala @@ -52,7 +52,7 @@ object DatasetStatisticsUtils { def getUserCreatedDatasets(uid: UInteger): List[DatasetQuota] = { val datasetList = getUserCreatedDatasetList(uid) datasetList.map { dataset => - val size = DatasetResource.calculateLatestDatasetVersionSize(dataset.did) + val size = DatasetResource.calculateDatasetVersionSize(dataset.did) dataset.copy(size = size) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 7d112f30afc..f57eb29fc79 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -17,10 +17,7 @@ import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse import edu.uci.ics.texera.web.resource.GoogleResource -import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.{ - createNewDatasetVersionByAddingFiles, - sanitizePath -} +import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.createNewDatasetVersionByAddingFiles import edu.uci.ics.texera.web.resource.dashboard.user.workflow.WorkflowVersionResource import org.jooq.types.UInteger import edu.uci.ics.amber.util.ArrowUtils @@ -39,6 +36,7 @@ import scala.jdk.CollectionConverters.SeqHasAsJava import org.apache.arrow.memory.RootAllocator import org.apache.arrow.vector._ import org.apache.arrow.vector.ipc.ArrowFileWriter +import org.apache.commons.lang3.StringUtils import java.io.OutputStream import java.nio.channels.Channels @@ -445,8 +443,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { .now() .truncatedTo(ChronoUnit.SECONDS) .format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")) - sanitizePath( - s"${request.workflowName}-v$latestVersion-${request.operatorName}-$timestamp.$extension" + StringUtils.replaceEach( + s"${request.workflowName}-v$latestVersion-${request.operatorName}-$timestamp.$extension", + Array("/", "\\"), + Array("", "") ) } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-explorer.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-explorer.component.ts index 56e35373f6a..3b54b2c5bf9 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-explorer.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-explorer.component.ts @@ -218,6 +218,7 @@ export class UserDatasetExplorerComponent implements OnInit { .retrieveDatasetVersionFileTree(this.did, this.selectedVersion.dvid) .pipe(untilDestroyed(this)) .subscribe(data => { + console.log(data); this.fileTreeNodeList = data.fileNodes; this.currentDatasetVersionSize = data.size; let currentNode = this.fileTreeNodeList[0]; diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 4ead1427dae..eecd65260a7 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -82,8 +82,8 @@ export class DatasetService { }); } - public retrieveAccessibleDatasets(): Observable<{ datasets: DashboardDataset[] }> { - return this.http.get<{ datasets: DashboardDataset[] }>(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}`); + public retrieveAccessibleDatasets(): Observable { + return this.http.get(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}`); } public createDatasetVersion( did: number, @@ -121,11 +121,9 @@ export class DatasetService { * @param did */ public retrieveDatasetVersionList(did: number): Observable { - return this.http - .get<{ - versions: DatasetVersion[]; - }>(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/${DATASET_VERSION_RETRIEVE_LIST_URL}`) - .pipe(map(response => response.versions)); + return this.http.get( + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/${DATASET_VERSION_RETRIEVE_LIST_URL}` + ); } /** @@ -155,16 +153,9 @@ export class DatasetService { did: number, dvid: number ): Observable<{ fileNodes: DatasetFileNode[]; size: number }> { - return this.http - .get( - `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/${DATASET_VERSION_BASE_URL}/${dvid}/rootFileNodes` - ) - .pipe( - map(response => ({ - fileNodes: response.rootFileNodes.fileNodes, - size: response.size, - })) - ); + return this.http.get<{ fileNodes: DatasetFileNode[]; size: number }>( + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/${DATASET_VERSION_BASE_URL}/${dvid}/rootFileNodes` + ); } public deleteDatasets(dids: number[]): Observable { @@ -194,10 +185,3 @@ export class DatasetService { ); } } - -interface DatasetVersionRootFileNodesResponse { - rootFileNodes: { - fileNodes: DatasetFileNode[]; - }; - size: number; -} diff --git a/core/gui/src/app/workspace/component/file-selection/file-selection.component.ts b/core/gui/src/app/workspace/component/file-selection/file-selection.component.ts index 3629cef000a..6643fcbb229 100644 --- a/core/gui/src/app/workspace/component/file-selection/file-selection.component.ts +++ b/core/gui/src/app/workspace/component/file-selection/file-selection.component.ts @@ -38,8 +38,8 @@ export class FileSelectionComponent implements OnInit { this.datasetService .retrieveAccessibleDatasets() .pipe(untilDestroyed(this)) - .subscribe(response => { - this._datasets = response.datasets; + .subscribe(datasets => { + this._datasets = datasets; this.isAccessibleDatasetsLoading = false; if (!this.selectedFilePath || this.selectedFilePath == "") { return; diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index affdd5b249c..9b8ab442013 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -45,8 +45,7 @@ export class ResultExportationComponent implements OnInit { this.datasetService .retrieveAccessibleDatasets() .pipe(untilDestroyed(this)) - .subscribe(response => { - const datasets = response.datasets; + .subscribe(datasets => { this.userAccessibleDatasets = datasets.filter(dataset => dataset.accessPrivilege === "WRITE"); this.filteredUserAccessibleDatasets = [...this.userAccessibleDatasets]; });