78 lines
2.3 KiB
Python
78 lines
2.3 KiB
Python
|
import requests
|
||
|
import base64
|
||
|
import litellm
|
||
|
import os
|
||
|
|
||
|
from loguru import logger
|
||
|
from celery import shared_task, Task
|
||
|
from django.db import transaction
|
||
|
from django.core.files.storage import default_storage
|
||
|
from django.conf import settings
|
||
|
from .models import ImageMemo, MemoStatus
|
||
|
from datetime import datetime
|
||
|
|
||
|
TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown inside a fence like so
|
||
|
|
||
|
```markdown
|
||
|
<Content>
|
||
|
```
|
||
|
|
||
|
If any words or letters are unclear, denote them with a '?<word>?'. For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'
|
||
|
"""
|
||
|
|
||
|
|
||
|
@shared_task
|
||
|
def process_memo(memo_id: str):
|
||
|
"""Run OCR on a memo and store the output"""
|
||
|
|
||
|
logger.info(f"Looking up memo with id={memo_id}")
|
||
|
memo = ImageMemo.objects.get(id=memo_id)
|
||
|
|
||
|
with transaction.atomic():
|
||
|
logger.info(f"Set status=processing for memo {memo.id}")
|
||
|
memo.status = MemoStatus.Processing
|
||
|
memo.save()
|
||
|
|
||
|
# check that the image exists
|
||
|
logger.info(f"Checking that image {memo.image.name} exists")
|
||
|
if not default_storage.exists(memo.image.name):
|
||
|
memo.status = MemoStatus.Error
|
||
|
memo.error_message = f"Image file {memo.image.name} does not exist"
|
||
|
memo.save()
|
||
|
return
|
||
|
|
||
|
# read the image into memory
|
||
|
logger.info(f"Reading image {memo.image.name}")
|
||
|
bytearray = default_storage.open(memo.image.name).read()
|
||
|
|
||
|
# call the OCR API
|
||
|
logger.info(f"Calling OCR API for memo {memo.id}")
|
||
|
|
||
|
b64img = base64.b64encode(bytearray).decode("utf-8")
|
||
|
|
||
|
message = {
|
||
|
"role": "user",
|
||
|
"content": [
|
||
|
{"type": "text", "text": TRANSCRIBE_PROMPT},
|
||
|
{
|
||
|
"type": "image_url",
|
||
|
"image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"},
|
||
|
},
|
||
|
],
|
||
|
}
|
||
|
|
||
|
litellm.api_base = settings.OPENAI_API_BASE # os.environ.get("OPENAI_API_BASE")
|
||
|
litellm.api_key = settings.OPENAI_API_KEY
|
||
|
|
||
|
response = litellm.completion(
|
||
|
model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),
|
||
|
messages=[message],
|
||
|
)
|
||
|
|
||
|
response.choices[0].message["content"]
|
||
|
|
||
|
with transaction.atomic():
|
||
|
memo.content = response.choices[0].message["content"]
|
||
|
memo.status = MemoStatus.Done
|
||
|
memo.save()
|