94 lines
3.0 KiB
Python
94 lines
3.0 KiB
Python
import base64
|
|
import litellm
|
|
import openai
|
|
|
|
from loguru import logger
|
|
from celery import shared_task
|
|
from django.db import transaction
|
|
from django.core.files.storage import default_storage
|
|
from django.conf import settings
|
|
from .models import ImageMemo, MemoStatus
|
|
|
|
TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown.
|
|
|
|
Do not use a fence, simply respond using markdown.
|
|
|
|
If any words or letters are unclear, denote them with a '?<word>?'.
|
|
|
|
For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'
|
|
|
|
If a text is underlined followed by a newline that indicates that it is a header. Use markdown H2 to denote it as such.
|
|
|
|
Make sure to add 2 newlines newlines between sections.
|
|
|
|
Anything that looks visually like a bullet point should be treated as such. This includes lines starting with hyphens. Replace bullet point indicators with * in the interpretted text.
|
|
|
|
Please include whitespace and formatting for headings too.
|
|
"""
|
|
|
|
|
|
@shared_task
|
|
def process_memo(memo_id: str):
|
|
"""Run OCR on a memo and store the output"""
|
|
|
|
logger.info(f"Looking up memo with id={memo_id}")
|
|
memo = ImageMemo.objects.get(id=memo_id)
|
|
|
|
with transaction.atomic():
|
|
logger.info(f"Set status=processing for memo {memo.id}")
|
|
memo.status = MemoStatus.Processing
|
|
memo.save()
|
|
|
|
# check that the image exists
|
|
logger.info(f"Checking that image {memo.image.name} exists")
|
|
if not default_storage.exists(memo.image.name):
|
|
memo.status = MemoStatus.Error
|
|
memo.error_message = f"Image file {memo.image.name} does not exist"
|
|
memo.save()
|
|
return
|
|
|
|
# read the image into memory
|
|
logger.info(f"Reading image {memo.image.name}")
|
|
bytearray = default_storage.open(memo.image.name).read()
|
|
|
|
# call the OCR API
|
|
logger.info(f"Calling OCR API for memo {memo.id}")
|
|
|
|
b64img = base64.b64encode(bytearray).decode("utf-8")
|
|
|
|
message = {
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": TRANSCRIBE_PROMPT},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"},
|
|
},
|
|
],
|
|
}
|
|
|
|
litellm.api_base = settings.OPENAI_API_BASE # os.environ.get("OPENAI_API_BASE")
|
|
litellm.api_key = settings.OPENAI_API_KEY
|
|
|
|
try:
|
|
response = litellm.completion(
|
|
model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),
|
|
messages=[message],
|
|
temperature=0.01
|
|
)
|
|
|
|
response.choices[0].message["content"]
|
|
|
|
with transaction.atomic():
|
|
memo.content = response.choices[0].message["content"]
|
|
memo.status = MemoStatus.Done
|
|
memo.model_name = settings.OPENAI_MODEL
|
|
memo.save()
|
|
except openai.OpenAIError as e:
|
|
|
|
with transaction.atomic():
|
|
memo.status = MemoStatus.Error
|
|
memo.error_message = e.__repr__()
|
|
memo.save()
|
|
logger.error(e)
|