# version: "3.9"

services:
  dl:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        BASE_IMAGE: "pytorch/pytorch:2.9.0-cuda13.0-cudnn9-devel"
        USER: "dev"
        UID: "1000"
        GID: "1000"
    container_name: dl
    # GPUs + large DataLoader shared memory
    gpus: all
    shm_size: "12g"
    ipc: host

    environment:
      # Always use GPUs (you can limit to some: e.g., "0,1")
      - NVIDIA_VISIBLE_DEVICES=0
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility,video
      # Prefer NCCL for multi-GPU
      - TORCH_DISTRIBUTED_DEBUG=INFO
      - NCCL_P2P_DISABLE=0
      - NCCL_ASYNC_ERROR_HANDLING=1
      # Persisted virtualenv on PATH (lives in a named volume)
      - VIRTUAL_ENV=./venv
      - PATH=./venv/bin:/usr/local/bin:/usr/bin:/bin
      - PYTHONUNBUFFERED=1
      - TZ=America/Los_Angeles

    volumes:
      # your code/data
      - .:/workspace
      - /home/image1325/ssd1/dataset/coco:/data
      # persisted venv: your pip installs live here and survive image/container removal
      - venv:./venv
      # (optional) speed up installs
      - pip-cache:/home/dev/.cache/pip

    working_dir: /workspace
    ulimits:
      memlock: -1
      stack: 67108864

    # On first run, create the venv if it doesn't exist; then drop to a shell.
    command: >
      bash -lc "
        if [ ! -d /opt/venv/bin ]; then
          python -m venv /opt/venv;
          /opt/venv/bin/python -m pip install --upgrade pip;
        fi;
        exec bash
      "

volumes:
  venv:
  pip-cache: