$ sudo apt update
$ sudo apt install nvidia-driver-430
******** cuda-drivers
$ sudo apt install cuda-drivers
******** Docker Engine - Community
$ sudo apt remove docker docker-engine docker.io containerd runc
$ sudo apt update
$ sudo apt install apt-transport-https ca-certificates curl gnupg-agent software-properties-common
$ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
$ sudo apt-key fingerprint 0EBFCD88
$ sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
$ sudo apt update
$ sudo apt install docker-ce docker-ce-cli containerd.io
$ sudo docker run hello-world
$ docker -v
Docker version 19.03.2, build 6a30dfc
$ sudo groupadd docker
$ sudo usermod -a -G docker soh
// check group
$ vi /etc/group
******** nvidia-container-toolkit
$ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
$ curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
$ curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
$ sudo apt update
$ sudo apt install nvidia-container-toolkit
$ sudo systemctl restart docker
// sudo still needed. after reboot, no need to use sudo:
$ docker run --gpus all nvidia/cuda:10.0-base nvidia-smi
Unable to find image 'nvidia/cuda:10.0-base' locally
10.0-base: Pulling from nvidia/cuda
...
******** tensorflow docker image
$ docker pull tensorflow/tensorflow:2.0.0-gpu-py3
// test
$ docker run -it tensorflow/tensorflow:2.0.0-gpu-py3 python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([10, 10])))"
$ docker run -it -v $PWD:/tmp -w /tmp tensorflow/tensorflow:2.0.0-gpu-py3 python3 ./tflist1.py
$ vi tflist1.py
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
...
sessionCf.gpu_options.allow_growth = True # to prevent cudnn error
// to use GPU
$ docker run --gpus all -it -v $PWD:/ws -w /ws tensorflow/tensorflow:2.0.0-gpu-py3 python3 ./cnn.py 2>&1 |tee o_wGpu
// GPU state can be checked with nvidia-smi
// downloaded dataset is removed in the next docker run as the image's change is not kept.
// to keep the image's change, use commit
$ docker ps -a
// shows container_ids, eg. 45caefd2442d. choose an container_name, eg. sytf20 and
$ docker commit 45caefd2442d sytf20
// and then, the next run uses downloaded dataset:
$ docker run --gpus all -it -v $PWD:/ws -w /ws sytf20 python3 ./cnn.py 2>&1 |tee o_wGpu