diff --git a/experimental/ubuntu/bionic/Dockerfile b/experimental/ubuntu/bionic/Dockerfile new file mode 100644 index 0000000..a252ed1 --- /dev/null +++ b/experimental/ubuntu/bionic/Dockerfile @@ -0,0 +1,77 @@ +# This will create an image based on Ubuntu 18.04.4 LTS (Bionic Beaver) which includes several improvements over 16.0 +# For information on various Ubuntu releases: https://en.wikipedia.org/wiki/Ubuntu_version_history +# This will include python, scala, and R support. R will be based on Microsoft R Open +# This will provide an image that has parity with the standard Databricks runtime but based on Ubuntu Bionic + +# First, use the databricks standard image and copy the databricks folder +FROM databricksruntime/standard AS BaseImage + +# Pull the bionic image from the ubuntu hub and use that as the base image for the new container +FROM ubuntu:bionic + +# Copy the databricks folder to the bionic image +# This really is cheating, we could easily install conda from scratch. The idea here was to make sure that we started from a known good databricks perspective +COPY --from=BaseImage /databricks /databricks + +# Make sure that the appropriate pieces for the Databricks runtimes are included +# Databricks runtime requirements are found here: https://docs.databricks.com/clusters/custom-containers.html +# First install the OpenJDK8 (Spark requires JDK 8, which is no longer available from Oracle) along with iproute2 and sudo, which are required + +RUN apt-get update \ +&& apt-get install -y openjdk-8-jdk iproute2 sudo \ +&& apt-get clean; + +# Fix any certificate errors +RUN apt-get update \ +&& apt-get install -y ca-certificates-java \ +&& apt-get clean \ +&& update-ca-certificates -f; + +# Ensure that the JAVA_HOME is properly set +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/ +RUN export JAVA_HOME + +# Add the proper Conda environment file +COPY env.yml /databricks/.conda-env-def/env.yml + +# Now Configure Conda to use the environment file and ensure the script is sourced for all shells +RUN /databricks/conda/bin/conda env create --file /databricks/.conda-env-def/env.yml \ +&& ln -s /databricks/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh + +# Conda recommends using strict channel priority speed up conda operations and reduce package incompatibility problems. +# Set always_yes to avoid needing -y flags, and improve conda experience in Databricks notebooks. +RUN /databricks/conda/bin/conda config --system --set channel_priority strict \ +&& /databricks/conda/bin/conda config --system --set always_yes True + +# This environment variable must be set to indicate which conda environment to activate. +# Note that currently, we have to set both of these environment variables. The first one is necessary to indicate that this runtime supports conda. +# The second one is necessary so that the python notebook/repl can be started (won't work without it) +ENV DEFAULT_DATABRICKS_ROOT_CONDA_ENV=dcs-std +ENV DATABRICKS_ROOT_CONDA_ENV=dcs-std + +# Setup the timezone correctly for non-interactive installs +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=America/Phoenix +RUN apt-get install -y tzdata + +# Now that the python and conda environments are properly configured, install Microsoft R Open +# And add the necessary Spark libraries for R +RUN apt-get update \ + && apt-get install -y curl libcurl4-openssl-dev wget libssl-dev libxml2-dev \ + && apt-get install -y software-properties-common apt-transport-https r-base-dev\ + && apt-get clean; + + # Configure apt to use Microsoft package repository and appropriate public key + RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ + && apt-add-repository https://packages.microsoft.com/ubuntu/18.04/prod \ + && apt-get update + + # Download Microsoft R Open and install, then add Spark-specific libraries + RUN wget https://mran.blob.core.windows.net/install/mro/3.5.3/ubuntu/microsoft-r-open-3.5.3.tar.gz \ + && tar -xf microsoft-r-open-3.5.3.tar.gz \ + && cd microsoft-r-open \ + && ./install.sh -a -s \ + && R -e "install.packages('htmltools', repo = 'https://cran.microsoft.com/snapshot/2019-06-19/')" \ + && R -e "install.packages('Rserve')" \ + && R -e "install.packages('hwriterPlus', repo = 'https://mran.revolutionanalytics.com/snapshot/2017-02-26')" \ + && R -e "install.packages('sparklyr')" \ No newline at end of file diff --git a/experimental/ubuntu/bionic/env.yml b/experimental/ubuntu/bionic/env.yml new file mode 100644 index 0000000..e2f5d48 --- /dev/null +++ b/experimental/ubuntu/bionic/env.yml @@ -0,0 +1,13 @@ +name: dcs-std +channels: + - default +dependencies: + - pip: + - pyarrow==0.13.0 + - python=3.7.3 + - six=1.12.0 + - nomkl=3 + - ipython=7.4.0 + - numpy=1.16.2 + - pandas=0.24.2 + - matplotlib=3.1.2 diff --git a/experimental/ubuntu/mxnet/Dockerfile b/experimental/ubuntu/mxnet/Dockerfile new file mode 100644 index 0000000..beb84b2 --- /dev/null +++ b/experimental/ubuntu/mxnet/Dockerfile @@ -0,0 +1,74 @@ +# Grab the latest Databricks runtime (ubuntu) +FROM databricksruntime/standard:latest + +# Install the latest python3 dev tools along with pip3 +RUN apt-get update && \ + apt-get install -y wget python3-dev gcc && \ + wget https://bootstrap.pypa.io/get-pip.py && \ + python3 get-pip.py + +# Install the latest xml2 package which will be required by Roxygen in MXNet +RUN apt-get install libxml2-dev libssl-dev gfortran libcurl4-gnutls-dev -y + +# Install Microsoft R Open and then add the necessary Spark libraries +RUN apt-get update \ + && apt-get install -y curl libcurl4-openssl-dev \ + && apt-get install -y software-properties-common apt-transport-https r-base-dev\ + && apt-add-repository https://packages.microsoft.com/ubuntu/16.04/prod \ + && apt-get update \ + && curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ + && wget https://mran.blob.core.windows.net/install/mro/3.5.3/ubuntu/microsoft-r-open-3.5.3.tar.gz \ + && tar -xf microsoft-r-open-3.5.3.tar.gz \ + && cd microsoft-r-open \ + && ./install.sh -a -s \ + && R -e "install.packages('htmltools', repo = 'https://cran.microsoft.com/snapshot/2019-06-19/')" \ + && R -e "install.packages('Rserve')" \ + && R -e "install.packages('hwriterPlus', repo = 'https://mran.revolutionanalytics.com/snapshot/2017-02-26')" + +# Setup the timezone correctly for non-interactive installs +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=America/Phoenix +RUN apt-get install -y tzdata + +# Install the required build tools +RUN apt-get install -y build-essential git ninja-build ccache libopenblas-dev libopencv-dev cmake + +# Even though we just installed cmake, we have to update it to the correct version +RUN pip3 install --user --upgrade "cmake>=3.13.2" +RUN cp ~/.local/bin/cmake /usr/bin/cmake + +# Now we have to update gcc/g++ to version 7.5 (I put these on individual commands as I was having difficulty combining them) +RUN apt-get install -y software-properties-common +RUN add-apt-repository ppa:ubuntu-toolchain-r/test +RUN apt update +RUN apt install g++-7 -y +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60 --slave /usr/bin/g++ g++ /usr/bin/g++-7 +RUN update-alternatives --config gcc + +# Since we updated the default compiler, we need to make some changes to the R Makeconf to ensure that it is properly used +COPY Makeconf /opt/microsoft/ropen/3.5.3/lib64/R/etc/Makeconf + +# Grab the MxNet Source and Build it! These instructions can be found here: https://mxnet.apache.org/get_started/ubuntu_setup.html + +RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet \ +&& cd mxnet \ +&& cp config/linux.cmake config.cmake +RUN rm -rf /mxnet/build +RUN mkdir /mxnet/build \ +&& cd /mxnet/build \ +&& cmake -GNinja .. \ +&& cmake --build . + +# Now add R libraries that are specific to our environment +RUN R -e "install.packages('Rcpp')" \ + && R -e "install.packages('DiagrammeR')" \ + && R -e "install.packages('data.table')" \ + && R -e "install.packages('roxygen2')" + + # Create the python bindings for MXNet + RUN cd /mxnet/python \ + && /databricks/conda/envs/dcs-minimal/bin/pip install --user -e . + + # Now create the R MXNet bindings + RUN cd /mxnet \ + && make -f R-package/Makefile rpkg \ No newline at end of file diff --git a/experimental/ubuntu/mxnet/Makeconf b/experimental/ubuntu/mxnet/Makeconf new file mode 100644 index 0000000..f1c5f52 --- /dev/null +++ b/experimental/ubuntu/mxnet/Makeconf @@ -0,0 +1,187 @@ +# etc/Makeconf. Generated from Makeconf.in by configure. +# +# ${R_HOME}/etc/Makeconf +# +# R was configured using the following call +# (not including env. vars and site configuration) +# configure '--verbose' '--with-x=yes' '--prefix=/builddir/target/R/Linux' '--enable-R-shlib' '--enable-BLAS-shlib' '--enable-memory-profiling' '--with-libpng' '--with-ICU' '--with-jpeglib' '--with-tcltk' '--with-tcl-config=/builddir/vendor/build/lib/tclConfig.sh' '--with-tk-config=/builddir/vendor/build/lib/tkConfig.sh' 'TCLTK_LIBS=-pthread -lz -lX11 -lXft -ltcl8.6 -ltk8.6 -lz' 'TCLTK_CPPFLAGS=-pthread' 'R_SHELL=/bin/bash' 'PKG_CONFIG_PATH=/builddir/vendor/build/lib/pkgconfig' 'CFLAGS=-I/builddir/vendor/build/include -DU_STATIC_IMPLEMENTATION -O2 -g' 'LDFLAGS=-L/builddir/vendor/build/lib' 'LIBS=-licui18n -licuuc -licudata -lstdc++' 'CPPFLAGS=-I/builddir/vendor/build/include -DU_STATIC_IMPLEMENTATION' 'CXXFLAGS=-I/builddir/vendor/build/include -DU_STATIC_IMPLEMENTATION -O2 -g' + +## This fails if it contains spaces, or if it is quoted +include $(R_SHARE_DIR)/make/vars.mk + +AR = ar +BLAS_LIBS = -L$(R_HOME)/lib$(R_ARCH) -lRblas +C_VISIBILITY = -fvisibility=hidden +CC = gcc -std=gnu99 +CFLAGS = -DU_STATIC_IMPLEMENTATION -O2 -g $(LTO) +CPICFLAGS = -fpic +CPPFLAGS = -DU_STATIC_IMPLEMENTATION +CXX = g++ +CXXCPP = $(CXX) -E +CXXFLAGS = -DU_STATIC_IMPLEMENTATION -O2 -g $(LTO) +CXXPICFLAGS = -fpic +CXX98 = g++ +CXX98FLAGS = -DU_STATIC_IMPLEMENTATION -O2 -g +CXX98PICFLAGS = -fpic +CXX98STD = +CXX11 = g++ +CXX11FLAGS = -DU_STATIC_IMPLEMENTATION -g -O2 +CXX11PICFLAGS = -fpic +CXX11STD = -std=gnu++11 +CXX14 = +CXX14FLAGS = +CXX14PICFLAGS = +CXX14STD = +CXX17 = +CXX17FLAGS = +CXX17PICFLAGS = +CXX17STD = +DYLIB_EXT = .so +DYLIB_LD = $(CC) +DYLIB_LDFLAGS = -shared -fopenmp# $(CFLAGS) $(CPICFLAGS) +DYLIB_LINK = $(DYLIB_LD) $(DYLIB_LDFLAGS) $(LDFLAGS) +ECHO = echo +ECHO_C = +ECHO_N = -n +ECHO_T = +## NB, set FC before F77 as on Solaris make, setting FC sets F77 +FC = gfortran +FCFLAGS = -g -O2 $(LTO) +## additional libs needed when linking with $(FC), e.g. on some Oracle compilers +FCLIBS = +F77 = gfortran +F77_VISIBILITY = -fvisibility=hidden +FFLAGS = -g -O2 $(LTO) +##FLIBS = -lgfortran -lm +FLIBS = -lm +FCPICFLAGS = -fpic +FPICFLAGS = -fpic +FOUNDATION_CPPFLAGS = +FOUNDATION_LIBS = +JAR = +JAVA = +JAVAC = +JAVAH = +## JAVA_HOME might be used in the next three. +## They are for packages 'JavaGD' and 'rJava' +JAVA_HOME = +JAVA_CPPFLAGS = +JAVA_LIBS = +JAVA_LD_LIBRARY_PATH = +LAPACK_LIBS = -L$(R_HOME)/lib$(R_ARCH) -lRlapack +## we only need this is if it is external, as otherwise link to R +LIBINTL= +LIBM = -lm +LIBR0 = -L$(R_HOME)/lib$(R_ARCH) +LIBR1 = -lR +LIBR = -L$(R_HOME)/lib$(R_ARCH) -lR +LIBS = +## needed by R CMD config +LIBnn = lib64 +LIBTOOL = $(SHELL) "$(R_HOME)/bin/libtool" +LDFLAGS = +LTO = +## needed to build applications linking to static libR +MAIN_LD = $(CC) +MAIN_LDFLAGS = -Wl,--export-dynamic -fopenmp +MAIN_LINK = $(MAIN_LD) $(MAIN_LDFLAGS) $(LDFLAGS) +MKINSTALLDIRS = $(R_HOME)/bin/mkinstalldirs +OBJC = +OBJCFLAGS = $(LTO) +OBJC_LIBS = +OBJCXX = +R_ARCH = +RANLIB = ranlib +SAFE_FFLAGS = -g -O2 -ffloat-store +SED = /bin/sed +SHELL = /bin/bash +SHLIB_CFLAGS = +SHLIB_CXXFLAGS = +SHLIB_CXXLD = $(CXX) +SHLIB_CXXLDFLAGS = -shared +SHLIB_CXX98LD = $(CXX98) $(CXX98STD) +SHLIB_CXX98LDFLAGS = -shared +SHLIB_CXX11LD = $(CXX11) $(CXX11STD) +SHLIB_CXX11LDFLAGS = -shared +SHLIB_CXX14LD = $(CXX14) $(CXX14STD) +SHLIB_CXX14LDFLAGS = -shared +SHLIB_CXX17LD = $(CXX17) $(CXX17STD) +SHLIB_CXX17LDFLAGS = -shared +SHLIB_EXT = .so +SHLIB_FCLD = $(FC) +SHLIB_FCLDFLAGS = -shared +SHLIB_FFLAGS = +SHLIB_LD = $(CC) +SHLIB_LDFLAGS = -shared# $(CFLAGS) $(CPICFLAGS) +SHLIB_LIBADD = +## We want to ensure libR is picked up from $(R_HOME)/lib +## before e.g. /usr/local/lib if a version is already installed. +SHLIB_LINK = $(SHLIB_LD) $(SHLIB_LDFLAGS) $(LIBR0) $(LDFLAGS) +SHLIB_OPENMP_CFLAGS = -fopenmp +SHLIB_OPENMP_CXXFLAGS = -fopenmp +SHLIB_OPENMP_FCFLAGS = -fopenmp +SHLIB_OPENMP_FFLAGS = -fopenmp +STRIP_LIBS = strip --strip-unneeded +STRIP_STATIC_LIBS = strip --strip-debug +TCLTK_CPPFLAGS = -pthread +TCLTK_LIBS = -pthread -lz -lX11 -lXft -ltcl8.6 -ltk8.6 -lz +YACC = bison -y + +## legacy +CXX1X = +CXX1XFLAGS = +CXX1XPICFLAGS = +CXX1XSTD = +SHLIB_CXX1XLD = $(CXX11) $(CXX11STD) +SHLIB_CXX1XLDFLAGS = -shared + + +## for linking to libR.a +STATIC_LIBR = # -Wl,--whole-archive "$(R_HOME)/lib$(R_ARCH)/libR.a" -Wl,--no-whole-archive $(BLAS_LIBS) $(FLIBS) $(LIBINTL) -lreadline -lncurses $(LIBS) + +R_XTRA_CFLAGS = +R_XTRA_CPPFLAGS = -I$(R_INCLUDE_DIR) -DNDEBUG +R_XTRA_CXXFLAGS = +R_XTRA_FFLAGS = + +ALL_CFLAGS = $(R_XTRA_CFLAGS) $(PKG_CFLAGS) $(CPICFLAGS) $(SHLIB_CFLAGS) $(CFLAGS) +ALL_CPPFLAGS = $(R_XTRA_CPPFLAGS) $(PKG_CPPFLAGS) $(CLINK_CPPFLAGS) $(CPPFLAGS) +ALL_CXXFLAGS = $(R_XTRA_CXXFLAGS) $(PKG_CXXFLAGS) $(CXXPICFLAGS) $(SHLIB_CXXFLAGS) $(CXXFLAGS) +ALL_OBJCFLAGS = $(PKG_OBJCFLAGS) $(CPICFLAGS) $(SHLIB_CFLAGS) $(OBJCFLAGS) +ALL_OBJCXXFLAGS = $(PKG_OBJCXXFLAGS) $(CXXPICFLAGS) $(SHLIB_CXXFLAGS) $(OBJCXXFLAGS) +ALL_FFLAGS = $(R_XTRA_FFLAGS) $(PKG_FFLAGS) $(FPICFLAGS) $(SHLIB_FFLAGS) $(FFLAGS) +## LIBR here as a couple of packages use this without SHLIB_LINK +ALL_LIBS = $(PKG_LIBS) $(SHLIB_LIBADD) $(LIBR)# $(LIBINTL) + +.SUFFIXES: +.SUFFIXES: .c .cc .cpp .d .f .f90 .f95 .m .mm .M .o + +.c.o: + $(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) -c $< -o $@ +.c.d: + @echo "making $@ from $<" + @$(CC) -MM $(ALL_CPPFLAGS) $< > $@ +.m.d: + @echo > $@ +.cc.o: + $(CXX) $(ALL_CPPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@ +.cpp.o: + $(CXX) $(ALL_CPPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@ +.cc.d: + @echo "making $@ from $<" + @$(CXX) -M $(ALL_CPPFLAGS) $< > $@ +.cpp.d: + @echo "making $@ from $<" + @$(CXX) -M $(ALL_CPPFLAGS) $< > $@ +.m.o: + $(OBJC) $(ALL_CPPFLAGS) $(ALL_OBJCFLAGS) -c $< -o $@ +.mm.o: + $(OBJCXX) $(ALL_CPPFLAGS) $(ALL_OBJCXXFLAGS) -c $< -o $@ +.M.o: + $(OBJCXX) $(ALL_CPPFLAGS) $(ALL_OBJCXXFLAGS) -c $< -o $@ +.f.o: + $(F77) $(ALL_FFLAGS) -c $< -o $@ +.f95.o: + $(FC) $(PKG_FCFLAGS) $(FCPICFLAGS) $(FCFLAGS) -c $< -o $@ +.f90.o: + $(FC) $(PKG_FCFLAGS) $(FCPICFLAGS) $(FCFLAGS) -c $< -o $@