2024-12-08 12:09:48 +00:00
|
|
|
services:
|
|
|
|
rabbitmq:
|
|
|
|
image: rabbitmq:3-management
|
|
|
|
ports:
|
|
|
|
- 5672:5672
|
|
|
|
- 15672:15672
|
2024-12-10 12:04:26 +00:00
|
|
|
|
|
|
|
vllm:
|
|
|
|
image: vllm/vllm-openai:latest
|
2024-12-15 06:35:39 +00:00
|
|
|
command: "--model Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4 --quantization gptq "
|
2024-12-18 10:14:16 +00:00
|
|
|
#command: "--model HuggingFaceTB/SmolVLM-Instruct --max_model_len 4098"
|
2024-12-10 12:04:26 +00:00
|
|
|
volumes:
|
|
|
|
- ~/.cache/huggingface:/root/.cache/huggingface
|
|
|
|
ports:
|
|
|
|
- 8002:8000
|
|
|
|
environment:
|
|
|
|
- HUGGING_FACE_HUB_TOKEN=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
|
|
|
|
deploy:
|
|
|
|
resources:
|
|
|
|
reservations:
|
|
|
|
devices:
|
|
|
|
- driver: nvidia
|
|
|
|
device_ids: ["0"]
|
2024-12-11 16:12:52 +00:00
|
|
|
capabilities: [gpu]
|