#!bin/bash # Make non interactive export DEBIAN_FRONTEND=noninteractive # Update package lists apt-get update -y apt-get install python3-pip python3-venv -y # Install Docker #!/bin/bash echo "---------------------------------------------------" echo "Started installing Docker." echo "---------------------------------------------------" # Install prerequisites for adding repositories over HTTPS apt-get install ca-certificates curl gnupg -y git # Add Docker's official GPG key: curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg chmod a+r /etc/apt/keyrings/docker.gpg # Add the Docker repository to Apt sources: echo \ "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list >/dev/null # Install Docker packages apt-get update -y apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y # Install nvidia container toolkit curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list apt-get update -y apt-get install -y nvidia-container-toolkit # Make Docker available to non-root users usermod -aG docker ubuntu # Restart Docker service systemctl restart docker echo "---------------------------------------------------" echo "Docker installed successfully." echo "---------------------------------------------------" echo "---------------------------------------------------" echo "Starting vLLM docker container" echo "---------------------------------------------------" MODEL_NAME="NousResearch/Meta-Llama-3-70B-Instruct" HF_TOKEN_ARG="" # alternatively uncomment below, but you require a valid HF token # MODEL_NAME="meta-llama/Meta-Llama-3-70B-Instruct # HF_TOKEN="[insert-your-hf-token]" # HF_TOKEN_ARG="--env HF_TOKEN=$HF_TOKEN" NUM_GPUS=$(nvidia-smi -L | wc -l) # Construct the Docker run command DOCKER_CMD="docker run -d --gpus all \ -v /ephemeral/.cache/huggingface:/root/.cache/huggingface \ -v /home/ubuntu/vllm:/vllm_repo \ -p 8000:8000 \ --ipc=host \ --restart always " # Append HF_TOKEN_ARG if it is not empty if [ -n "$HF_TOKEN_ARG" ]; then DOCKER_CMD="$DOCKER_CMD $HF_TOKEN_ARG" fi DOCKER_CMD="$DOCKER_CMD \ vllm/vllm-openai:latest \ --tensor-parallel-size $NUM_GPUS \ --model \"$MODEL_NAME\"" # Run the Docker command as ubuntu user echo "Executing Docker command: $DOCKER_CMD" sudo -u ubuntu bash -c "$DOCKER_CMD" # Test the API (wait +- 7 minutes for model download and start up) #MODEL_NAME="NousResearch/Meta-Llama-3-70B-Instruct" #curl -X POST http://localhost:8000/v1/chat/completions \ # -H "Content-Type: application/json" \ # -d '{ # "model": "'$MODEL_NAME'", # "messages": [ # { # "role": "user", # "content": "Hello, how are you?" # } # ] # }'