Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for apache mxnet with R and python #23

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions experimental/ubuntu/bionic/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# This will create an image based on Ubuntu 18.04.4 LTS (Bionic Beaver) which includes several improvements over 16.0
# For information on various Ubuntu releases: https://en.wikipedia.org/wiki/Ubuntu_version_history
# This will include python, scala, and R support. R will be based on Microsoft R Open
# This will provide an image that has parity with the standard Databricks runtime but based on Ubuntu Bionic

# First, use the databricks standard image and copy the databricks folder
FROM databricksruntime/standard AS BaseImage

# Pull the bionic image from the ubuntu hub and use that as the base image for the new container
FROM ubuntu:bionic

# Copy the databricks folder to the bionic image
# This really is cheating, we could easily install conda from scratch. The idea here was to make sure that we started from a known good databricks perspective
COPY --from=BaseImage /databricks /databricks

# Make sure that the appropriate pieces for the Databricks runtimes are included
# Databricks runtime requirements are found here: https://docs.databricks.com/clusters/custom-containers.html
# First install the OpenJDK8 (Spark requires JDK 8, which is no longer available from Oracle) along with iproute2 and sudo, which are required

RUN apt-get update \
&& apt-get install -y openjdk-8-jdk iproute2 sudo \
&& apt-get clean;

# Fix any certificate errors
RUN apt-get update \
&& apt-get install -y ca-certificates-java \
&& apt-get clean \
&& update-ca-certificates -f;

# Ensure that the JAVA_HOME is properly set
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
RUN export JAVA_HOME

# Add the proper Conda environment file
COPY env.yml /databricks/.conda-env-def/env.yml

# Now Configure Conda to use the environment file and ensure the script is sourced for all shells
RUN /databricks/conda/bin/conda env create --file /databricks/.conda-env-def/env.yml \
&& ln -s /databricks/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh

# Conda recommends using strict channel priority speed up conda operations and reduce package incompatibility problems.
# Set always_yes to avoid needing -y flags, and improve conda experience in Databricks notebooks.
RUN /databricks/conda/bin/conda config --system --set channel_priority strict \
&& /databricks/conda/bin/conda config --system --set always_yes True

# This environment variable must be set to indicate which conda environment to activate.
# Note that currently, we have to set both of these environment variables. The first one is necessary to indicate that this runtime supports conda.
# The second one is necessary so that the python notebook/repl can be started (won't work without it)
ENV DEFAULT_DATABRICKS_ROOT_CONDA_ENV=dcs-std
ENV DATABRICKS_ROOT_CONDA_ENV=dcs-std

# Setup the timezone correctly for non-interactive installs
ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=America/Phoenix
RUN apt-get install -y tzdata

# Now that the python and conda environments are properly configured, install Microsoft R Open
# And add the necessary Spark libraries for R
RUN apt-get update \
&& apt-get install -y curl libcurl4-openssl-dev wget libssl-dev libxml2-dev \
&& apt-get install -y software-properties-common apt-transport-https r-base-dev\
&& apt-get clean;

# Configure apt to use Microsoft package repository and appropriate public key
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \
&& apt-add-repository https://packages.microsoft.com/ubuntu/18.04/prod \
&& apt-get update

# Download Microsoft R Open and install, then add Spark-specific libraries
RUN wget https://mran.blob.core.windows.net/install/mro/3.5.3/ubuntu/microsoft-r-open-3.5.3.tar.gz \
&& tar -xf microsoft-r-open-3.5.3.tar.gz \
&& cd microsoft-r-open \
&& ./install.sh -a -s \
&& R -e "install.packages('htmltools', repo = 'https://cran.microsoft.com/snapshot/2019-06-19/')" \
&& R -e "install.packages('Rserve')" \
&& R -e "install.packages('hwriterPlus', repo = 'https://mran.revolutionanalytics.com/snapshot/2017-02-26')" \
&& R -e "install.packages('sparklyr')"
13 changes: 13 additions & 0 deletions experimental/ubuntu/bionic/env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: dcs-std
channels:
- default
dependencies:
- pip:
- pyarrow==0.13.0
- python=3.7.3
- six=1.12.0
- nomkl=3
- ipython=7.4.0
- numpy=1.16.2
- pandas=0.24.2
- matplotlib=3.1.2
74 changes: 74 additions & 0 deletions experimental/ubuntu/mxnet/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Grab the latest Databricks runtime (ubuntu)
FROM databricksruntime/standard:latest

# Install the latest python3 dev tools along with pip3
RUN apt-get update && \
apt-get install -y wget python3-dev gcc && \
wget https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py

# Install the latest xml2 package which will be required by Roxygen in MXNet
RUN apt-get install libxml2-dev libssl-dev gfortran libcurl4-gnutls-dev -y

# Install Microsoft R Open and then add the necessary Spark libraries
RUN apt-get update \
&& apt-get install -y curl libcurl4-openssl-dev \
&& apt-get install -y software-properties-common apt-transport-https r-base-dev\
&& apt-add-repository https://packages.microsoft.com/ubuntu/16.04/prod \
&& apt-get update \
&& curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \
&& wget https://mran.blob.core.windows.net/install/mro/3.5.3/ubuntu/microsoft-r-open-3.5.3.tar.gz \
&& tar -xf microsoft-r-open-3.5.3.tar.gz \
&& cd microsoft-r-open \
&& ./install.sh -a -s \
&& R -e "install.packages('htmltools', repo = 'https://cran.microsoft.com/snapshot/2019-06-19/')" \
&& R -e "install.packages('Rserve')" \
&& R -e "install.packages('hwriterPlus', repo = 'https://mran.revolutionanalytics.com/snapshot/2017-02-26')"

# Setup the timezone correctly for non-interactive installs
ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=America/Phoenix
RUN apt-get install -y tzdata

# Install the required build tools
RUN apt-get install -y build-essential git ninja-build ccache libopenblas-dev libopencv-dev cmake

# Even though we just installed cmake, we have to update it to the correct version
RUN pip3 install --user --upgrade "cmake>=3.13.2"
RUN cp ~/.local/bin/cmake /usr/bin/cmake

# Now we have to update gcc/g++ to version 7.5 (I put these on individual commands as I was having difficulty combining them)
RUN apt-get install -y software-properties-common
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
RUN apt update
RUN apt install g++-7 -y
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60 --slave /usr/bin/g++ g++ /usr/bin/g++-7
RUN update-alternatives --config gcc

# Since we updated the default compiler, we need to make some changes to the R Makeconf to ensure that it is properly used
COPY Makeconf /opt/microsoft/ropen/3.5.3/lib64/R/etc/Makeconf

# Grab the MxNet Source and Build it! These instructions can be found here: https://mxnet.apache.org/get_started/ubuntu_setup.html

RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet \
&& cd mxnet \
&& cp config/linux.cmake config.cmake
RUN rm -rf /mxnet/build
RUN mkdir /mxnet/build \
&& cd /mxnet/build \
&& cmake -GNinja .. \
&& cmake --build .

# Now add R libraries that are specific to our environment
RUN R -e "install.packages('Rcpp')" \
&& R -e "install.packages('DiagrammeR')" \
&& R -e "install.packages('data.table')" \
&& R -e "install.packages('roxygen2')"

# Create the python bindings for MXNet
RUN cd /mxnet/python \
&& /databricks/conda/envs/dcs-minimal/bin/pip install --user -e .

# Now create the R MXNet bindings
RUN cd /mxnet \
&& make -f R-package/Makefile rpkg
187 changes: 187 additions & 0 deletions experimental/ubuntu/mxnet/Makeconf
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# etc/Makeconf. Generated from Makeconf.in by configure.
#
# ${R_HOME}/etc/Makeconf
#
# R was configured using the following call
# (not including env. vars and site configuration)
# configure '--verbose' '--with-x=yes' '--prefix=/builddir/target/R/Linux' '--enable-R-shlib' '--enable-BLAS-shlib' '--enable-memory-profiling' '--with-libpng' '--with-ICU' '--with-jpeglib' '--with-tcltk' '--with-tcl-config=/builddir/vendor/build/lib/tclConfig.sh' '--with-tk-config=/builddir/vendor/build/lib/tkConfig.sh' 'TCLTK_LIBS=-pthread -lz -lX11 -lXft -ltcl8.6 -ltk8.6 -lz' 'TCLTK_CPPFLAGS=-pthread' 'R_SHELL=/bin/bash' 'PKG_CONFIG_PATH=/builddir/vendor/build/lib/pkgconfig' 'CFLAGS=-I/builddir/vendor/build/include -DU_STATIC_IMPLEMENTATION -O2 -g' 'LDFLAGS=-L/builddir/vendor/build/lib' 'LIBS=-licui18n -licuuc -licudata -lstdc++' 'CPPFLAGS=-I/builddir/vendor/build/include -DU_STATIC_IMPLEMENTATION' 'CXXFLAGS=-I/builddir/vendor/build/include -DU_STATIC_IMPLEMENTATION -O2 -g'

## This fails if it contains spaces, or if it is quoted
include $(R_SHARE_DIR)/make/vars.mk

AR = ar
BLAS_LIBS = -L$(R_HOME)/lib$(R_ARCH) -lRblas
C_VISIBILITY = -fvisibility=hidden
CC = gcc -std=gnu99
CFLAGS = -DU_STATIC_IMPLEMENTATION -O2 -g $(LTO)
CPICFLAGS = -fpic
CPPFLAGS = -DU_STATIC_IMPLEMENTATION
CXX = g++
CXXCPP = $(CXX) -E
CXXFLAGS = -DU_STATIC_IMPLEMENTATION -O2 -g $(LTO)
CXXPICFLAGS = -fpic
CXX98 = g++
CXX98FLAGS = -DU_STATIC_IMPLEMENTATION -O2 -g
CXX98PICFLAGS = -fpic
CXX98STD =
CXX11 = g++
CXX11FLAGS = -DU_STATIC_IMPLEMENTATION -g -O2
CXX11PICFLAGS = -fpic
CXX11STD = -std=gnu++11
CXX14 =
CXX14FLAGS =
CXX14PICFLAGS =
CXX14STD =
CXX17 =
CXX17FLAGS =
CXX17PICFLAGS =
CXX17STD =
DYLIB_EXT = .so
DYLIB_LD = $(CC)
DYLIB_LDFLAGS = -shared -fopenmp# $(CFLAGS) $(CPICFLAGS)
DYLIB_LINK = $(DYLIB_LD) $(DYLIB_LDFLAGS) $(LDFLAGS)
ECHO = echo
ECHO_C =
ECHO_N = -n
ECHO_T =
## NB, set FC before F77 as on Solaris make, setting FC sets F77
FC = gfortran
FCFLAGS = -g -O2 $(LTO)
## additional libs needed when linking with $(FC), e.g. on some Oracle compilers
FCLIBS =
F77 = gfortran
F77_VISIBILITY = -fvisibility=hidden
FFLAGS = -g -O2 $(LTO)
##FLIBS = -lgfortran -lm
FLIBS = -lm
FCPICFLAGS = -fpic
FPICFLAGS = -fpic
FOUNDATION_CPPFLAGS =
FOUNDATION_LIBS =
JAR =
JAVA =
JAVAC =
JAVAH =
## JAVA_HOME might be used in the next three.
## They are for packages 'JavaGD' and 'rJava'
JAVA_HOME =
JAVA_CPPFLAGS =
JAVA_LIBS =
JAVA_LD_LIBRARY_PATH =
LAPACK_LIBS = -L$(R_HOME)/lib$(R_ARCH) -lRlapack
## we only need this is if it is external, as otherwise link to R
LIBINTL=
LIBM = -lm
LIBR0 = -L$(R_HOME)/lib$(R_ARCH)
LIBR1 = -lR
LIBR = -L$(R_HOME)/lib$(R_ARCH) -lR
LIBS =
## needed by R CMD config
LIBnn = lib64
LIBTOOL = $(SHELL) "$(R_HOME)/bin/libtool"
LDFLAGS =
LTO =
## needed to build applications linking to static libR
MAIN_LD = $(CC)
MAIN_LDFLAGS = -Wl,--export-dynamic -fopenmp
MAIN_LINK = $(MAIN_LD) $(MAIN_LDFLAGS) $(LDFLAGS)
MKINSTALLDIRS = $(R_HOME)/bin/mkinstalldirs
OBJC =
OBJCFLAGS = $(LTO)
OBJC_LIBS =
OBJCXX =
R_ARCH =
RANLIB = ranlib
SAFE_FFLAGS = -g -O2 -ffloat-store
SED = /bin/sed
SHELL = /bin/bash
SHLIB_CFLAGS =
SHLIB_CXXFLAGS =
SHLIB_CXXLD = $(CXX)
SHLIB_CXXLDFLAGS = -shared
SHLIB_CXX98LD = $(CXX98) $(CXX98STD)
SHLIB_CXX98LDFLAGS = -shared
SHLIB_CXX11LD = $(CXX11) $(CXX11STD)
SHLIB_CXX11LDFLAGS = -shared
SHLIB_CXX14LD = $(CXX14) $(CXX14STD)
SHLIB_CXX14LDFLAGS = -shared
SHLIB_CXX17LD = $(CXX17) $(CXX17STD)
SHLIB_CXX17LDFLAGS = -shared
SHLIB_EXT = .so
SHLIB_FCLD = $(FC)
SHLIB_FCLDFLAGS = -shared
SHLIB_FFLAGS =
SHLIB_LD = $(CC)
SHLIB_LDFLAGS = -shared# $(CFLAGS) $(CPICFLAGS)
SHLIB_LIBADD =
## We want to ensure libR is picked up from $(R_HOME)/lib
## before e.g. /usr/local/lib if a version is already installed.
SHLIB_LINK = $(SHLIB_LD) $(SHLIB_LDFLAGS) $(LIBR0) $(LDFLAGS)
SHLIB_OPENMP_CFLAGS = -fopenmp
SHLIB_OPENMP_CXXFLAGS = -fopenmp
SHLIB_OPENMP_FCFLAGS = -fopenmp
SHLIB_OPENMP_FFLAGS = -fopenmp
STRIP_LIBS = strip --strip-unneeded
STRIP_STATIC_LIBS = strip --strip-debug
TCLTK_CPPFLAGS = -pthread
TCLTK_LIBS = -pthread -lz -lX11 -lXft -ltcl8.6 -ltk8.6 -lz
YACC = bison -y

## legacy
CXX1X =
CXX1XFLAGS =
CXX1XPICFLAGS =
CXX1XSTD =
SHLIB_CXX1XLD = $(CXX11) $(CXX11STD)
SHLIB_CXX1XLDFLAGS = -shared


## for linking to libR.a
STATIC_LIBR = # -Wl,--whole-archive "$(R_HOME)/lib$(R_ARCH)/libR.a" -Wl,--no-whole-archive $(BLAS_LIBS) $(FLIBS) $(LIBINTL) -lreadline -lncurses $(LIBS)

R_XTRA_CFLAGS =
R_XTRA_CPPFLAGS = -I$(R_INCLUDE_DIR) -DNDEBUG
R_XTRA_CXXFLAGS =
R_XTRA_FFLAGS =

ALL_CFLAGS = $(R_XTRA_CFLAGS) $(PKG_CFLAGS) $(CPICFLAGS) $(SHLIB_CFLAGS) $(CFLAGS)
ALL_CPPFLAGS = $(R_XTRA_CPPFLAGS) $(PKG_CPPFLAGS) $(CLINK_CPPFLAGS) $(CPPFLAGS)
ALL_CXXFLAGS = $(R_XTRA_CXXFLAGS) $(PKG_CXXFLAGS) $(CXXPICFLAGS) $(SHLIB_CXXFLAGS) $(CXXFLAGS)
ALL_OBJCFLAGS = $(PKG_OBJCFLAGS) $(CPICFLAGS) $(SHLIB_CFLAGS) $(OBJCFLAGS)
ALL_OBJCXXFLAGS = $(PKG_OBJCXXFLAGS) $(CXXPICFLAGS) $(SHLIB_CXXFLAGS) $(OBJCXXFLAGS)
ALL_FFLAGS = $(R_XTRA_FFLAGS) $(PKG_FFLAGS) $(FPICFLAGS) $(SHLIB_FFLAGS) $(FFLAGS)
## LIBR here as a couple of packages use this without SHLIB_LINK
ALL_LIBS = $(PKG_LIBS) $(SHLIB_LIBADD) $(LIBR)# $(LIBINTL)

.SUFFIXES:
.SUFFIXES: .c .cc .cpp .d .f .f90 .f95 .m .mm .M .o

.c.o:
$(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) -c $< -o $@
.c.d:
@echo "making $@ from $<"
@$(CC) -MM $(ALL_CPPFLAGS) $< > $@
.m.d:
@echo > $@
.cc.o:
$(CXX) $(ALL_CPPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@
.cpp.o:
$(CXX) $(ALL_CPPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@
.cc.d:
@echo "making $@ from $<"
@$(CXX) -M $(ALL_CPPFLAGS) $< > $@
.cpp.d:
@echo "making $@ from $<"
@$(CXX) -M $(ALL_CPPFLAGS) $< > $@
.m.o:
$(OBJC) $(ALL_CPPFLAGS) $(ALL_OBJCFLAGS) -c $< -o $@
.mm.o:
$(OBJCXX) $(ALL_CPPFLAGS) $(ALL_OBJCXXFLAGS) -c $< -o $@
.M.o:
$(OBJCXX) $(ALL_CPPFLAGS) $(ALL_OBJCXXFLAGS) -c $< -o $@
.f.o:
$(F77) $(ALL_FFLAGS) -c $< -o $@
.f95.o:
$(FC) $(PKG_FCFLAGS) $(FCPICFLAGS) $(FCFLAGS) -c $< -o $@
.f90.o:
$(FC) $(PKG_FCFLAGS) $(FCPICFLAGS) $(FCFLAGS) -c $< -o $@