PenParse/penparse/webui/tasks.py

import base64
import litellm
import openai

from loguru import logger
from celery import shared_task
from django.db import transaction
from django.core.files.storage import default_storage
from django.conf import settings
from .models import ImageMemo, MemoStatus

TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown.

Do not use a fence, simply respond using markdown.

If any words or letters are unclear, denote them  with a '?<word>?'.

For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'

If a text is underlined followed by a newline that indicates that it is a header. Use markdown H2 to denote it as such.

Make sure to add 2 newlines newlines between sections.

Anything that looks visually like a bullet point should be treated as such. This includes lines starting with hyphens. Replace bullet point indicators with * in the interpretted text.

Please include whitespace and formatting for headings too.
"""


@shared_task
def process_memo(memo_id: str):
    """Run OCR on a memo and store the output"""

    logger.info(f"Looking up memo with id={memo_id}")
    memo = ImageMemo.objects.get(id=memo_id)

    with transaction.atomic():
        logger.info(f"Set status=processing for memo {memo.id}")
        memo.status = MemoStatus.Processing
        memo.save()

    # check that the image exists
    logger.info(f"Checking that image {memo.image.name} exists")
    if not default_storage.exists(memo.image.name):
        memo.status = MemoStatus.Error
        memo.error_message = f"Image file {memo.image.name} does not exist"
        memo.save()
        return

    # read the image into memory
    logger.info(f"Reading image {memo.image.name}")
    bytearray = default_storage.open(memo.image.name).read()

    # call the OCR API
    logger.info(f"Calling OCR API for memo {memo.id}")

    b64img = base64.b64encode(bytearray).decode("utf-8")

    message = {
        "role": "user",
        "content": [
            {"type": "text", "text": TRANSCRIBE_PROMPT},
            {
                "type": "image_url",
                "image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"},
            },
        ],
    }

    litellm.api_base = settings.OPENAI_API_BASE  # os.environ.get("OPENAI_API_BASE")
    litellm.api_key = settings.OPENAI_API_KEY

    try:
        response = litellm.completion(
            model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),
            messages=[message],
            temperature=0.01
        )

        response.choices[0].message["content"]

        with transaction.atomic():
            memo.content = response.choices[0].message["content"]
            memo.status = MemoStatus.Done
            memo.model_name = settings.OPENAI_MODEL
            memo.save()
    except openai.OpenAIError as e:

        with transaction.atomic():
            memo.status = MemoStatus.Error
            memo.error_message = e.__repr__()
            memo.save()
            logger.error(e)