import requests import base64 import litellm import os from loguru import logger from celery import shared_task, Task from django.db import transaction from django.core.files.storage import default_storage from django.conf import settings from .models import ImageMemo, MemoStatus from datetime import datetime TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown inside a fence like so ```markdown ``` If any words or letters are unclear, denote them with a '??'. For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?' """ @shared_task def process_memo(memo_id: str): """Run OCR on a memo and store the output""" logger.info(f"Looking up memo with id={memo_id}") memo = ImageMemo.objects.get(id=memo_id) with transaction.atomic(): logger.info(f"Set status=processing for memo {memo.id}") memo.status = MemoStatus.Processing memo.save() # check that the image exists logger.info(f"Checking that image {memo.image.name} exists") if not default_storage.exists(memo.image.name): memo.status = MemoStatus.Error memo.error_message = f"Image file {memo.image.name} does not exist" memo.save() return # read the image into memory logger.info(f"Reading image {memo.image.name}") bytearray = default_storage.open(memo.image.name).read() # call the OCR API logger.info(f"Calling OCR API for memo {memo.id}") b64img = base64.b64encode(bytearray).decode("utf-8") message = { "role": "user", "content": [ {"type": "text", "text": TRANSCRIBE_PROMPT}, { "type": "image_url", "image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"}, }, ], } litellm.api_base = settings.OPENAI_API_BASE # os.environ.get("OPENAI_API_BASE") litellm.api_key = settings.OPENAI_API_KEY response = litellm.completion( model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"), messages=[message], ) response.choices[0].message["content"] with transaction.atomic(): memo.content = response.choices[0].message["content"] memo.status = MemoStatus.Done memo.save()