services: llm: build: . container_name: server deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped environment: - NVIDIA_VISIBLE_DEVICES=all volumes: - ./models:/models ports: - 3000:8080 command: --models-max 1 --models-preset /models/model-presets.ini --fit on --fit-target 1024