Docker tip

Docker image build/usage tips

command

build, local tagging

cd your-image-directory
DATE=`date "+%Y%m%d"`

docker build --build-arg base_image=repo.com/base-image:latest -t your-image:$DATE .
docker image tag your-image:$DATE your-image:latest

deploy

docker tag your-image:$DATE repo.com/your-image:$DATE
docker push repo.com/your-image:$DATE

docker tag your-image:$DATE repo.com/your-image:latest
docker push repo.com/your-image:latest

simple Dockerfile example

install basic tools like java, scala, maven, sbt, conda, spark

ARG base_image
FROM $base_image

USER root

RUN yum install -y http://opensource.wandisco.com/centos/7/git/x86_64/wandisco-git-release-7-2.noarch.rpm
RUN yum install -y git
RUN yum install -y krb5-devel
RUN yum install -y epel-release
RUN yum install -y gcc

USER your_account
WORKDIR /your_dir

ENV SPARK_VER=spark-2.4.5
ENV SPARK_DST=${SPARK_VER}-bin-without-hadoop

COPY --chown=user_account:user_account ./resource ./install_resource
RUN ./install_resource/setup.sh
RUN mv ./install_resource/.git-prompt-colors.sh .
RUN cat ./install_resource/bashrc_add.sh >> .bashrc
RUN rm -rf ./install_resource

ENV LANG=en_US.UTF-8
ENV SCALA_HOME=/.../opt/scala-2.11.12

ENV PATH=$JAVA_HOME/bin:$PATH
ENV PATH=$SCALA_HOME/bin:$PATH
ENV PATH=/.../opt/sbt/bin:$PATH
ENV PATH=/.../opt/apache-maven-3.6.3/bin:$PATH

ENV PATH=/.../opt/node-v12.16.1-linux-x64/bin:$:$PATH
ENV SPARK_HOME=/.../opt/${SPARK_DST}
ENV SPARK_CONF_DIR=$SPARK_HOME/conf
ENV SPARK_SUBMIT_OPTS="-Dhdp.version=3.1.0.0-78"

COPY --chown=your_account:your_account ./install_resource/spark_jars_add/* ./opt/${SPARK_DST}/jars/

COPY ./entrypoint.sh ./opt/
ENTRYPOINT ["/.../opt/entrypoint.sh"]

CMD ["/bin/bash"]

in ./install_resource/setup.sh

#!/usr/bin/bash

mkdir -p opt

cd opt || exit

# scala
wget https://downloads.lightbend.com/scala/2.11.12/scala-2.11.12.tgz
tar -zxvf scala-2.11.12.tgz
rm -f scala-2.11.12.tgz

# maven
wget http://apache.mirror.cdnetworks.com/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
tar -zxvf apache-maven-3.6.3-bin.tar.gz
rm -f apache-maven-3.6.3-bin.tar.gz

# sbt
wget https://github.com/sbt/sbt/releases/download/v1.2.8/sbt-1.2.8.tgz
tar -zxvf sbt-1.2.8.tgz
rm -f sbt-1.2.8.tgz

# spark
wget https://archive.apache.org/dist/spark/${SPARK_VER}/${SPARK_DST}.tgz
tar -xvf ${SPARK_DST}.tgz
rm -f ${SPARK_DST}.tgz

cd ..

# git bash prompt
git clone https://github.com/magicmonty/bash-git-prompt.git ~/.bash-git-prompt --depth=1

# sometimes following files should exist for git's ssh action
mkdir -p .ssh
touch .ssh/authorized_keys
touch .ssh/config
touch .ssh/known_hosts

in ./install_resource/bashrc_add.sh

export PS1='\[\e[36m\]\u\[\e[0m\]@\[\e[36m\]your-prompt \[\e[36m\]\W \[\e[33m\]$\[\e[0m\] '

export LANG=en_US.UTF-8
export SCALA_HOME=/.../opt/scala-2.11.12

export PATH=$JAVA_HOME/bin:$PATH
export PATH=$SCALA_HOME/bin:$PATH
export PATH=/.../opt/sbt/bin:$PATH
export PATH=/.../opt/apache-maven-3.6.3/bin:$PATH

# bash git prompt
GIT_PROMPT_ONLY_IN_REPO=1
GIT_PROMPT_THEME=Custom
GIT_PROMPT_THEME_FILE=/.../.git-prompt-colors.sh
source ~/.bash-git-prompt/gitprompt.sh

export SPARK_HOME=/.../opt/spark-2.4.5-bin-without-hadoop
export SPARK_CONF_DIR=$SPARK_HOME/conf

alias spark-submit='$SPARK_HOME/bin/spark-submit'

source /.../miniconda/etc/profile.d/conda.sh
conda activate py2

in ./entrypoint.sh

#!/usr/bin/bash

export LANG=en_US.UTF-8
export SCALA_HOME=/.../opt/scala-2.11.12
export SPARK_HOME=/.../opt/spark-2.4.5-bin-without-hadoop
export SPARK_CONF_DIR=$SPARK_HOME/conf

export SPARK_SUBMIT_OPTS="-Dhdp.version=3.1.0.0-78 -Dfile.encoding=utf-8"
export OOZIE_TIMEZONE=Asia/Seoul

export PATH=$JAVA_HOME/bin:$PATH
export PATH=$SCALA_HOME/bin:$PATH
export PATH=/.../opt/sbt/bin:$PATH
export PATH=/.../opt/apache-maven-3.6.3/bin:$PATH
export PATH=/.../opt/node-v12.16.1-linux-x64/bin:$:$PATH

export EXTERN_CLUSTER_CONF="-Dipc.client.fallback-to-simple-auth-allowed=true
-Ddfs.nameservices=xxx,..,..
-Ddfs.ha.namenodes.xxx=nn1,nn1
-Ddfs.client.failover.proxy.provider.xxx=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
-Ddfs.namenode.http-address.xxx.nn1=...
-Ddfs.namenode.http-address.xxx.nn2=...
-Ddfs.namenode.https-address.xxx.nn1=...
-Ddfs.namenode.https-address.xxx.nn2=...
-Ddfs.namenode.lifeline.rpc-address.xxx.nn1=...
-Ddfs.namenode.lifeline.rpc-address.xxx.nn2=...
-Ddfs.namenode.rpc-address.xxx.nn1=...
-Ddfs.namenode.rpc-address.xxx.nn2=...
-Ddfs.namenode.service.rpc-address.xxx.nn1=...
-Ddfs.namenode.service.rpc-address.xxx.nn2=...

alias spark-submit='$SPARK_HOME/bin/spark-submit'

source /.../opt/miniconda/etc/profile.d/conda.sh
conda activate py2

exec "$@"