-
Notifications
You must be signed in to change notification settings - Fork 97
/
Dockerfile.alpine
69 lines (55 loc) · 2.54 KB
/
Dockerfile.alpine
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
FROM alpine:3.10
ARG SPARK_VERSION
ARG LIVY_VERSION
ENV SPARK_VERSION ${SPARK_VERSION}
ENV LIVY_VERSION ${LIVY_VERSION}
#ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v2.4.4
#ARG SPARK_VERSION=2.4.4
#ARG LIVY_VERSION=0.6.0-incubating
#FROM ${SPARK_IMAGE}
#ENV SPARK_VERSION ${SPARK_VERSION}
RUN apk add --update ca-certificates \
&& apk add --update -t deps curl \
&& apk add --no-cache bash \
&& apk add --no-cache curl \
&& apk del --purge deps \
&& rm /var/cache/apk/*
RUN apk add --no-cache unzip
RUN apk add --no-cache supervisor
RUN apk add --no-cache openjdk8
# Setup dependencies for Google Cloud Storage access.
# RUN rm $SPARK_HOME/jars/guava-14.0.1.jar
# RUN curl -OJL http://central.maven.org/maven2/com/google/guava/guava/23.0/guava-23.0.jar -o $SPARK_HOME/jars
# Add the connector jar needed to access Google Cloud Storage using the Hadoop FileSystem API.
RUN curl -OJL https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar -o $SPARK_HOME/jars
RUN mkdir -p /opt
WORKDIR /opt
RUN curl -OJL https://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz
RUN tar xf spark-${SPARK_VERSION}-bin-hadoop2.7.tgz
RUN mv spark-${SPARK_VERSION}-bin-hadoop2.7 spark
RUN curl -OJL https://www-us.apache.org/dist/incubator/livy/$LIVY_VERSION/apache-livy-$LIVY_VERSION-bin.zip
RUN unzip -qq apache-livy-$LIVY_VERSION-bin.zip
RUN mv apache-livy-$LIVY_VERSION-bin livy
ENV SPARK_HOME=/opt/spark
RUN curl -JL https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar -o $SPARK_HOME/jars/gcs-connector-latest-hadoop2.jar
RUN curl -JL http://central.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar -o $SPARK_HOME/jars/hadoop-aws-2.7.3.jar
RUN curl -JL http://central.maven.org/maven2/com/amazonaws/aws-java-sdk/1.7.4/aws-java-sdk-1.7.4.jar -o $SPARK_HOME/jars/aws-java-sdk-1.7.4.jar
RUN rm -rf /opt/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz
RUN rm -rf /opt/apache-livy-$LIVY_VERSION-bin.zip
USER root
RUN ln -s /opt/apache-livy-$LIVY_VERSION-bin /opt/livy
RUN mkdir /opt/livy/logs /opt/scripts
RUN mkdir -p /var/log/supervisor
ENV SPARK_HOME /opt/spark
RUN mkdir -p /opt/logs
RUN mkdir -p /opt/bin
RUN mkdir -p /opt/etc/supervisor/conf.d
RUN addgroup --gid 1001 spark && adduser --disabled-password --ingroup spark --uid 1001 spark
COPY .cachebust /opt/
COPY supervisor-conf/* /opt/etc/supervisor/conf.d/
COPY shell-scripts/* /opt/bin/
COPY livy-conf/* /opt/livy/conf/
RUN chown -R spark:spark /opt
USER spark
ENV SPARK_HOME=/opt/spark
CMD ["/opt/bin/run.sh"]