PenParse/penparse/webui/tasks.py

79 lines
2.3 KiB
Python
Raw Normal View History

2024-12-10 12:04:26 +00:00
import base64
import litellm
from loguru import logger
2024-12-11 16:12:52 +00:00
from celery import shared_task
2024-12-10 12:04:26 +00:00
from django.db import transaction
from django.core.files.storage import default_storage
from django.conf import settings
from .models import ImageMemo, MemoStatus
2024-12-11 16:12:52 +00:00
TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown.
2024-12-10 12:04:26 +00:00
2024-12-10 16:05:24 +00:00
Do not use a fence, simply respond using markdown.
2024-12-10 12:04:26 +00:00
2024-12-11 16:12:52 +00:00
If any words or letters are unclear, denote them with a '?<word>?'.
2024-12-10 16:05:24 +00:00
For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'
2024-12-11 16:12:52 +00:00
Please include whitespace and formatting for headings too.
2024-12-10 12:04:26 +00:00
"""
2024-12-11 16:12:52 +00:00
2024-12-10 12:04:26 +00:00
@shared_task
def process_memo(memo_id: str):
"""Run OCR on a memo and store the output"""
logger.info(f"Looking up memo with id={memo_id}")
memo = ImageMemo.objects.get(id=memo_id)
with transaction.atomic():
logger.info(f"Set status=processing for memo {memo.id}")
memo.status = MemoStatus.Processing
memo.save()
# check that the image exists
logger.info(f"Checking that image {memo.image.name} exists")
if not default_storage.exists(memo.image.name):
memo.status = MemoStatus.Error
memo.error_message = f"Image file {memo.image.name} does not exist"
memo.save()
return
# read the image into memory
logger.info(f"Reading image {memo.image.name}")
bytearray = default_storage.open(memo.image.name).read()
# call the OCR API
logger.info(f"Calling OCR API for memo {memo.id}")
b64img = base64.b64encode(bytearray).decode("utf-8")
message = {
"role": "user",
"content": [
{"type": "text", "text": TRANSCRIBE_PROMPT},
{
"type": "image_url",
"image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"},
},
],
}
litellm.api_base = settings.OPENAI_API_BASE # os.environ.get("OPENAI_API_BASE")
litellm.api_key = settings.OPENAI_API_KEY
response = litellm.completion(
model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),
messages=[message],
2024-12-11 16:12:52 +00:00
temperature=0.01
2024-12-10 12:04:26 +00:00
)
response.choices[0].message["content"]
with transaction.atomic():
memo.content = response.choices[0].message["content"]
memo.status = MemoStatus.Done
memo.save()