implement local vllm model usage

2024-12-11 16:12:52 +00:00 · 2024-12-11 16:12:52 +00:00 · f7db98d91e
parent 9222739df1
commit f7db98d91e
5 changed files with 18 additions and 11 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,7 +8,7 @@ services:
  vllm:
    image: vllm/vllm-openai:latest
-    command: "--model Qwen/Qwen2-VL-2B-Instruct"
+    command: "--model Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4 --quantization gptq"
    volumes:
      - ~/.cache/huggingface:/root/.cache/huggingface
    ports:
--- a/penparse/penparse/settings.py
+++ b/penparse/penparse/settings.py
@ -149,5 +149,6 @@ CELERY_BROKER_URL = "amqp://guest:guest@localhost/"
 OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 OPENAI_API_KEY = "test"
 OPENAI_MODEL = os.getenv("OPENAI_MODEL", "openai/gpt-4o")
--- a/penparse/webui/tasks.py
+++ b/penparse/webui/tasks.py
@ -1,15 +1,12 @@
 import requests
 import base64
 import litellm
 import os
 from loguru import logger
-from celery import shared_task, Task
+from celery import shared_task
 from django.db import transaction
 from django.core.files.storage import default_storage
 from django.conf import settings
 from .models import ImageMemo, MemoStatus
 from datetime import datetime
 TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown.
@ -18,9 +15,12 @@ Do not use a fence, simply respond using markdown.
 If any words or letters are unclear, denote them  with a '?<word>?'.
 For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'
 Please include whitespace and formatting for headings too.
 """
@shared_task
 def process_memo(memo_id: str):
    """Run OCR on a memo and store the output"""
@ -67,6 +67,7 @@ def process_memo(memo_id: str):
    response = litellm.completion(
        model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),
        messages=[message],
        temperature=0.01
    )
    response.choices[0].message["content"]
--- a/penparse/webui/templates/dashboard.html
+++ b/penparse/webui/templates/dashboard.html
@ -36,11 +36,11 @@
        </tr>
        <tr>
          <td class="font-medium pr-4">Created:</td>
-          <td class="text-gray-600">{{ document.created_at|date:"d/m/Y H:i" }}</td>
+          <td class="text-gray-600">{{ document.created_at|date:"d/m/Y H:i:s" }}</td>
        </tr>
        <tr>
          <td class="font-medium pr-4">Updated:</td>
-          <td class="text-gray-600">{{ document.updated_at|date:"d/m/Y H:i" }}</td>
+          <td class="text-gray-600">{{ document.updated_at|date:"d/m/Y H:i:s" }}</td>
        </tr>
      </table>
  {% if document.content %}
--- a/uv.lock
+++ b/uv.lock
@ -1,5 +1,10 @@
 version = 1
 requires-python = ">=3.9"
 resolution-markers = [
    "python_full_version < '3.11'",
    "python_full_version == '3.11.*'",
    "python_full_version >= '3.12'",
 ]
 [[package]]
 name = "aiohappyeyeballs"