# ------------------------------------------------------------------------------------------------------------------------------
# 1. Install conda
#   see: https://docs.anaconda.com/miniconda/miniconda-install/
# ------------------------------------------------------------------------------------------------------------------------------
wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
sudo chmod +x Miniconda3-latest-Linux-x86_64.sh
./Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/miniconda3
export PATH="$HOME/miniconda3/bin:$PATH"
rm Miniconda3-latest-Linux-x86_64.sh
conda init bash
source ~/.bashrc


# ------------------------------------------------------------------------------------------------------------------------------
# 2. Install llama stack
#   see: https://github.com/meta-llama/llama-stack?tab=readme-ov-file#installation)
# ------------------------------------------------------------------------------------------------------------------------------
HF_TOKEN="[INSERT YOUR HUGGINGFACE TOKEN HERE]"
MODEL_NAME="Llama3.2-11B-Vision-Instruct"
# uncomment this line to use the 90B model
# You need 8 GPUs, llama stack does not support less, see also: https://github.com/meta-llama/llama-stack/blob/eb2d8a31a5927589197c794855d7323f8f4700bc/llama_stack/providers/impls/meta_reference/inference/config.py#L40
# MODEL_NAME="Llama3.2-90B-Vision-Instruct"
BASE_NAME="llama3-2"
MAX_SEQ_LEN=4096
MAX_BATCH_SIZE=1

mkdir -p /home/ubuntu/local
cd /home/ubuntu/local
git clone https://github.com/meta-llama/llama-stack.git

conda create -n stack python=3.10 -y
conda activate stack

cd llama-stack
$CONDA_PREFIX/bin/pip install -e .

# Download model to ephemeral storage so we can store more models
# Note: after restarting the instance from hibernation, we need to re-download the model
export LLAMA_STACK_CONFIG_DIR=/ephemeral/.llama
sudo chown -R ubuntu:ubuntu /ephemeral
llama download --source huggingface --model-id $MODEL_NAME --hf-token $HF_TOKEN 

# Build stack
# llama stack build --template local --name $BASE_NAME 
# Using pty to simulate a terminal
sudo python3 -c "
import pty; 
pty.spawn(['/bin/bash', '-c', 
    'source /miniconda3/etc/profile.d/conda.sh && \
     conda activate /miniconda3/envs/stack && \
     export LLAMA_STACK_CONFIG_DIR=/ephemeral/.llama && \
     /usr/local/bin/llama stack build --template local --name $BASE_NAME'])"


# Configure (interactively)
# llama stack configure "$BASE_NAME"

# Configure (non-interactively)
CONFIG_YML_PATH=/ephemeral/.llama/builds/conda/$BASE_NAME-run.yaml
mkdir -p $(dirname $CONFIG_YML_PATH)
sudo cat <<EOF > $CONFIG_YML_PATH
built_at: '2024-09-30T10:40:07.669022'
image_name: $BASE_NAME
docker_image: null
conda_env: $BASE_NAME
apis_to_serve:
- shields
- safety
- agents
- memory_banks
- models
- inference
- memory
api_providers:
  inference:
    providers:
    - meta-reference
  memory:
    providers:
    - meta-reference
  safety:
    providers:
    - meta-reference
  agents:
    provider_id: meta-reference
    config:
      persistence_store:
        namespace: null
        type: sqlite
        db_path: /home/ubuntu/.llama/runtime/kvstore.db
  telemetry:
    provider_id: meta-reference
    config: {}
routing_table:
  inference:
  - provider_id: meta-reference
    config:
      model: $MODEL_NAME
      quantization: null
      torch_seed: 1
      max_seq_len: 4096
      max_batch_size: 1

    routing_key: $MODEL_NAME
  memory:
  - provider_id: meta-reference
    config: {}
    routing_key: vector
  safety:
  - provider_id: meta-reference
    config:
      llama_guard_shield: null
      prompt_guard_shield: null
    routing_key: llama_guard
  - provider_id: meta-reference
    config:
      llama_guard_shield: null
      prompt_guard_shield: null
    routing_key: code_scanner_guard
  - provider_id: meta-reference
    config:
      llama_guard_shield: null
      prompt_guard_shield: null
    routing_key: injection_shield
  - provider_id: meta-reference
    config:
      llama_guard_shield: null
      prompt_guard_shield: null
    routing_key: jailbreak_shield
EOF

# Make sure Ubuntu user has access to all directories
sudo chown -R ubuntu:ubuntu /home/ubuntu

# Run llama stack
echo "Running llama stack run for model $MODEL_NAME"

# Using pty to simulate a terminal
sudo python3 -c "
import pty; 
pty.spawn(['/bin/bash', '-c', 
    'source /miniconda3/etc/profile.d/conda.sh && \
     conda activate /miniconda3/envs/stack && \
     export LLAMA_STACK_CONFIG_DIR=/ephemeral/.llama && \
     /usr/local/bin/llama stack run $BASE_NAME --port 8000'])"


# Try out the API (image)

# Download the image
IMAGE_URL="https://www.hyperstack.cloud/hs-fs/hubfs/deploy-vm-11-ecd8c53003182041d3a2881d0010f6c6-1.png?width=3352&height=1852&name=deploy-vm-11-ecd8c53003182041d3a2881d0010f6c6-1.png"
IMAGE_EXTENSION=$(echo "$IMAGE_URL" | awk -F. '{print $NF}' | cut -d'?' -f1)
FILE_NAME="/home/ubuntu/downloaded_image.$IMAGE_EXTENSION"
curl -o $FILE_NAME $IMAGE_URL

# Write the JSON payload to payload.json file
cat <<EOF > payload.json
{
    "model": "Llama3.2-11B-Vision-Instruct",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "image": {
                        "uri": "file://$FILE_NAME"
                    }
                },
                "Describe this image in two sentences"
            ]
        }
    ]
}
EOF

# Use the JSON payload file in the curl command
curl -X POST http://localhost:8000/inference/chat_completion \
    -H "Content-Type: application/json" \
    -d @payload.json


# Try out the API (text)
# curl -X POST http://localhost:8000/inference/chat_completion \
#     -H "Content-Type: application/json" \
#     -d '{
#         "model": "'Llama3.2-11B-Vision-Instruct'",
#         "messages": [
#             {
#                 "role": "user",
#                 "content": "Hello, how are you?"
#             }
#         ]
#     }'