Skip to content

Commit

Permalink
Merge branch 'xinyuan-if-statement' into xinyuan-datatostate
Browse files Browse the repository at this point in the history
  • Loading branch information
aglinxinyuan committed Jan 14, 2025
2 parents 95e1e7b + a006eb2 commit f051749
Show file tree
Hide file tree
Showing 1,192 changed files with 11,912 additions and 173,974 deletions.
29 changes: 2 additions & 27 deletions .github/workflows/github-action-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,10 @@ jobs:
- name: Prod build
run: yarn --cwd core/gui run build:ci

amber:
core:
strategy:
matrix:
os:
- ubuntu-latest
os: [ ubuntu-22.04 ]
java-version: [ 11 ]
runs-on: ${{ matrix.os }}
env:
Expand All @@ -66,30 +65,6 @@ jobs:
steps:
- name: Prepare ENV
run: sudo apt-get install libncurses5
- name: Checkout Texera
uses: actions/checkout@v2
- name: Setup Java
uses: actions/setup-java@v2
with:
distribution: 'temurin'
java-version: ${{ matrix.java-version }}
- uses: coursier/cache-action@v6
with:
extraSbtFiles: '["core/amber/*.sbt", "core/amber/project/**.{scala,sbt}", "core/amber/project/build.properties" ]'
- name: Lint with scalafix & scalafmt
run: cd core/amber && sbt "scalafixAll --check" && sbt scalafmtCheckAll
- name: Compile with sbt
run: cd core/amber && sbt clean package
- name: Run backend tests
run: cd core/amber && sbt -v -J-Xmx2G test

core:
strategy:
matrix:
os: [ ubuntu-latest ]
java-version: [ 11 ]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout Texera
uses: actions/checkout@v2
- name: Setup Java
Expand Down
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,10 @@ StoredCredential*
**/apache2/
**/Apache24/
**/php/
Composer-Setup.exe
Composer-Setup.exe

# Ignoring folders generated by vscode IDE
.metals/
.bloop/
.ammonite/
metals.sbt
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ FROM sbtscala/scala-sbt:eclipse-temurin-jammy-11.0.17_8_1.9.3_2.13.11
WORKDIR /core
COPY core/ .

WORKDIR /core/amber
RUN sbt clean package
RUN apt-get update
RUN apt-get install -y netcat unzip python3-pip
RUN pip3 install python-lsp-server python-lsp-server[websockets]
Expand All @@ -34,10 +32,12 @@ WORKDIR /core
COPY .git ../.git
COPY --from=nodegui /gui/dist ./gui/dist

RUN scripts/build-docker.sh
RUN scripts/build-services.sh

CMD ["scripts/deploy-docker.sh"]

EXPOSE 8080

EXPOSE 9090
EXPOSE 9090

EXPOSE 8085
63 changes: 7 additions & 56 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
<h1 align="center">Texera - Collaborative Data Science and AI/ML Using Workflows</h1>

<p align="center">
<img src="core/gui/src/assets/logos/full_logo_small.png" alt="texera-logo" width="192px" height="109px"/>
<a href="https://texera.io"> <img src="core/gui/src/assets/logos/full_logo_small.png" alt="texera-logo" width="192px" height="109px"/> </a>
<br>
<i>Texera supports scalable data computation and enables advanced AI/ML techniques.</i>
<br>
<i>"Collaboration" is a key focus, and we enable an experience similar to Google Docs, but for data science. </i>
<br>

<h4 align="center">
<a href="https://github.com/Texera/texera#videos">Demo Video</a>
<a href="https://texera.io">Official Site</a>
|
<a href="https://texera.github.io/blog/">Blogs</a>
<a href="https://texera.io/publications/">Publications</a>
|
<a href="https://texera.io/category/video/">Video</a>
|
<a href="https://texera.io/category/blog/">Blog</a>
|
<a href="https://github.com/Texera/texera/wiki/Getting-Started">Getting Started</a>
<br>
Expand All @@ -29,13 +33,6 @@
<img alt="Static Badge" src="https://img.shields.io/badge/Largest_Deployment-100_nodes,_400_cores-green">
</p>

# Motivation

* Data science is labor-intensive and particularly challenging for non-IT users applying AI/ML.
* Many workflow-based data science platforms lack parallelism, limiting their ability to handle big datasets.
* Cloud services and technologies have advanced significantly over the past decade, enabling powerful browser-based interfaces supported by high-speed networks.
* Existing data science platforms offer limited interaction during long-running jobs, making them difficult to manage after execution begins.

# Goals

* Provide data science as cloud services;
Expand Down Expand Up @@ -148,52 +145,6 @@ The workflow in the use case shown below includes data cleaning, ML model traini
_In JAMIA 2021_ | [PDF](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7989302/pdf/ocab047.pdf)
</details>


# Education
<table>
<tr style="height: 500px;">
<td align="center">
<a href="https://ds4all.ics.uci.edu/">
<img src="https://ds4all.ics.uci.edu/wp-content/uploads/2023/07/banner-1024x576.png">
</a>
<p><b>Data Science for All</b></p>
An NSF-funded summer program to teach high-school students data science and AI/ML
</td>
<td align="center">
<a href="https://canvas.eee.uci.edu/courses/63639/pages/syllabus">
<img src="https://github.com/user-attachments/assets/a7569fd3-6857-48b4-80dc-d9f006ae2c8f">
</a>
<p><b>ICS 80: Data Science and AI/ML Using Workflows</b></p>
A Spring 2024 course at UCI, teaching 42 undergraduates, most of whom are not computer science majors, to learn data science and AI/ML
</td>
</tr>
</table>


# Videos
<table>
<tr style="height: 500px;">
<td align="center">
<a href="https://www.youtube.com/watch?v=B81iMFS5fPc">
<img src="https://img.youtube.com/vi/B81iMFS5fPc/0.jpg" alt="Watch the video">
</a>
<p><b>dkNET Webinar 04/26/2024</b></p>
</td>
<td align="center">
<a href="https://www.youtube.com/watch?v=SP-XiDADbw0">
<img src="https://img.youtube.com/vi/SP-XiDADbw0/0.jpg" alt="Watch the video">
</a>
<p><b>Texera Demo @ VLDB'20</b></p>
</td>
<td align="center">
<a href="https://www.youtube.com/watch?v=T5ShFRfHmgI">
<img src="https://img.youtube.com/vi/T5ShFRfHmgI/0.jpg" alt="Watch the video">
</a>
<p><b>Amber Presentation @ VLDB'20</b></p>
</td>
</tr>
</table>

# Getting Started

* For users, visit [Guide to Use Texera](https://github.com/Texera/texera/wiki/Getting-Started).
Expand Down
33 changes: 6 additions & 27 deletions core/amber/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ conflictManager := ConflictManager.latestRevision
// ensuring no parallel execution of multiple tasks
concurrentRestrictions in Global += Tags.limit(Tags.Test, 1)

// temp fix for the netty dependency issue
// https://github.com/coursier/coursier/issues/2016
ThisBuild / useCoursier := false

// add python as an additional source
Compile / unmanagedSourceDirectories += baseDirectory.value / "src" / "main" / "python"

Expand Down Expand Up @@ -116,29 +112,19 @@ val excludeHadoopSlf4j = ExclusionRule(organization = "org.slf4j")
val excludeHadoopJetty = ExclusionRule(organization = "org.eclipse.jetty")
val excludeHadoopJsp = ExclusionRule(organization = "javax.servlet.jsp")
val hadoopDependencies = Seq(
"org.apache.hadoop" % "hadoop-common" % hadoopVersion excludeAll (excludeHadoopJersey, excludeHadoopSlf4j, excludeHadoopJsp, excludeHadoopJetty)
"org.apache.hadoop" % "hadoop-common" % hadoopVersion excludeAll(excludeHadoopJersey, excludeHadoopSlf4j, excludeHadoopJsp, excludeHadoopJetty)
)

/////////////////////////////////////////////////////////////////////////////
// Google Service related
val googleServiceDependencies = Seq(
"com.google.oauth-client" % "google-oauth-client-jetty" % "1.34.1" exclude ("com.google.guava", "guava"),
"com.google.api-client" % "google-api-client" % "2.2.0" exclude ("com.google.guava", "guava"),
"com.google.apis" % "google-api-services-sheets" % "v4-rev612-1.25.0" exclude ("com.google.guava", "guava"),
"com.google.apis" % "google-api-services-drive" % "v3-rev197-1.25.0" exclude ("com.google.guava", "guava"),
"com.google.oauth-client" % "google-oauth-client-jetty" % "1.34.1" exclude("com.google.guava", "guava"),
"com.google.api-client" % "google-api-client" % "2.2.0" exclude("com.google.guava", "guava"),
"com.google.apis" % "google-api-services-sheets" % "v4-rev612-1.25.0" exclude("com.google.guava", "guava"),
"com.google.apis" % "google-api-services-drive" % "v3-rev197-1.25.0" exclude("com.google.guava", "guava"),
"com.sun.mail" % "javax.mail" % "1.6.2"
)

/////////////////////////////////////////////////////////////////////////////
// Arrow related
val arrowVersion = "14.0.1"
val arrowDependencies = Seq(
// https://mvnrepository.com/artifact/org.apache.arrow/flight-grpc
"org.apache.arrow" % "flight-grpc" % arrowVersion,
// https://mvnrepository.com/artifact/org.apache.arrow/flight-core
"org.apache.arrow" % "flight-core" % arrowVersion
)

/////////////////////////////////////////////////////////////////////////////
// MongoDB related
val mongoDbDependencies = Seq(
Expand All @@ -152,7 +138,6 @@ libraryDependencies ++= akkaDependencies
libraryDependencies ++= luceneDependencies
libraryDependencies ++= dropwizardDependencies
libraryDependencies ++= mbknorJacksonJsonSchemaDependencies
libraryDependencies ++= arrowDependencies
libraryDependencies ++= googleServiceDependencies
libraryDependencies ++= mongoDbDependencies
libraryDependencies ++= hadoopDependencies
Expand All @@ -165,13 +150,10 @@ PB.protocVersion := "3.19.4"

enablePlugins(Fs2Grpc)

fs2GrpcOutputPath := (Compile / sourceDirectory).value / "scalapb"
Compile / unmanagedSourceDirectories += (Compile / sourceDirectory).value / "scalapb"

Compile / PB.targets := Seq(
scalapb.gen(
singleLineToProtoString = true
) -> (Compile / sourceDirectory).value / "scalapb",
) -> (Compile / sourceManaged).value,
// let fs2 compile grpc-related proto, skip other protos in fs2 compilation pipeline.
scalapbCodeGenerators.value(1)
)
Expand Down Expand Up @@ -235,9 +217,6 @@ libraryDependencies += "io.github.redouane59.twitter" % "twittered" % "2.21"
// https://mvnrepository.com/artifact/org.jooq/jooq
libraryDependencies += "org.jooq" % "jooq" % "3.14.16"

// https://mvnrepository.com/artifact/mysql/mysql-connector-java
libraryDependencies += "mysql" % "mysql-connector-java" % "8.0.33"

// https://mvnrepository.com/artifact/org.jgrapht/jgrapht-core
libraryDependencies += "org.jgrapht" % "jgrapht-core" % "1.4.0"

Expand Down
Loading

0 comments on commit f051749

Please sign in to comment.