25 lines
651 B
YAML
25 lines
651 B
YAML
services:
|
|
rabbitmq:
|
|
image: rabbitmq:3-management
|
|
ports:
|
|
- 5672:5672
|
|
- 15672:15672
|
|
|
|
vllm:
|
|
image: vllm/vllm-openai:latest
|
|
command: "--model Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4 --quantization gptq "
|
|
#command: "--model HuggingFaceTB/SmolVLM-Instruct --max_model_len 4098"
|
|
volumes:
|
|
- ~/.cache/huggingface:/root/.cache/huggingface
|
|
ports:
|
|
- 8002:8000
|
|
environment:
|
|
- HUGGING_FACE_HUB_TOKEN=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
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["0"]
|
|
capabilities: [gpu]
|