PenParse/penparse/webui/tasks.py

94 lines
3.0 KiB
Python

import base64
import litellm
import openai
from loguru import logger
from celery import shared_task
from django.db import transaction
from django.core.files.storage import default_storage
from django.conf import settings
from .models import ImageMemo, MemoStatus
TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown.
Do not use a fence, simply respond using markdown.
If any words or letters are unclear, denote them with a '?<word>?'.
For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'
If a text is underlined followed by a newline that indicates that it is a header. Use markdown H2 to denote it as such.
Make sure to add 2 newlines newlines between sections.
Anything that looks visually like a bullet point should be treated as such. This includes lines starting with hyphens. Replace bullet point indicators with * in the interpretted text.
Please include whitespace and formatting for headings too.
"""
@shared_task
def process_memo(memo_id: str):
"""Run OCR on a memo and store the output"""
logger.info(f"Looking up memo with id={memo_id}")
memo = ImageMemo.objects.get(id=memo_id)
with transaction.atomic():
logger.info(f"Set status=processing for memo {memo.id}")
memo.status = MemoStatus.Processing
memo.save()
# check that the image exists
logger.info(f"Checking that image {memo.image.name} exists")
if not default_storage.exists(memo.image.name):
memo.status = MemoStatus.Error
memo.error_message = f"Image file {memo.image.name} does not exist"
memo.save()
return
# read the image into memory
logger.info(f"Reading image {memo.image.name}")
bytearray = default_storage.open(memo.image.name).read()
# call the OCR API
logger.info(f"Calling OCR API for memo {memo.id}")
b64img = base64.b64encode(bytearray).decode("utf-8")
message = {
"role": "user",
"content": [
{"type": "text", "text": TRANSCRIBE_PROMPT},
{
"type": "image_url",
"image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"},
},
],
}
litellm.api_base = settings.OPENAI_API_BASE # os.environ.get("OPENAI_API_BASE")
litellm.api_key = settings.OPENAI_API_KEY
try:
response = litellm.completion(
model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),
messages=[message],
temperature=0.01
)
response.choices[0].message["content"]
with transaction.atomic():
memo.content = response.choices[0].message["content"]
memo.status = MemoStatus.Done
memo.model_name = settings.OPENAI_MODEL
memo.save()
except openai.OpenAIError as e:
with transaction.atomic():
memo.status = MemoStatus.Error
memo.error_message = e.__repr__()
memo.save()
logger.error(e)