PenParse/penparse/webui/tasks.py

import base64
import litellm

from loguru import logger
from celery import shared_task
from django.db import transaction
from django.core.files.storage import default_storage
from django.conf import settings
from .models import ImageMemo, MemoStatus

TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown.

Do not use a fence, simply respond using markdown.

If any words or letters are unclear, denote them  with a '?<word>?'.

For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'

Please include whitespace and formatting for headings too.
"""


@shared_task
def process_memo(memo_id: str):
    """Run OCR on a memo and store the output"""

    logger.info(f"Looking up memo with id={memo_id}")
    memo = ImageMemo.objects.get(id=memo_id)

    with transaction.atomic():
        logger.info(f"Set status=processing for memo {memo.id}")
        memo.status = MemoStatus.Processing
        memo.save()

    # check that the image exists
    logger.info(f"Checking that image {memo.image.name} exists")
    if not default_storage.exists(memo.image.name):
        memo.status = MemoStatus.Error
        memo.error_message = f"Image file {memo.image.name} does not exist"
        memo.save()
        return

    # read the image into memory
    logger.info(f"Reading image {memo.image.name}")
    bytearray = default_storage.open(memo.image.name).read()

    # call the OCR API
    logger.info(f"Calling OCR API for memo {memo.id}")

    b64img = base64.b64encode(bytearray).decode("utf-8")

    message = {
        "role": "user",
        "content": [
            {"type": "text", "text": TRANSCRIBE_PROMPT},
            {
                "type": "image_url",
                "image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"},
            },
        ],
    }

    litellm.api_base = settings.OPENAI_API_BASE  # os.environ.get("OPENAI_API_BASE")
    litellm.api_key = settings.OPENAI_API_KEY

    response = litellm.completion(
        model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),
        messages=[message],
        temperature=0.01
    )

    response.choices[0].message["content"]

    with transaction.atomic():
        memo.content = response.choices[0].message["content"]
        memo.status = MemoStatus.Done
        memo.save()
implemented LLM-based OCR 2024-12-10 12:04:26 +00:00			`import base64`
			`import litellm`

			`from loguru import logger`
implement local vllm model usage 2024-12-11 16:12:52 +00:00			`from celery import shared_task`
implemented LLM-based OCR 2024-12-10 12:04:26 +00:00			`from django.db import transaction`
			`from django.core.files.storage import default_storage`
			`from django.conf import settings`
			`from .models import ImageMemo, MemoStatus`

implement local vllm model usage 2024-12-11 16:12:52 +00:00			`TRANSCRIBE_PROMPT = """Transcribe the hand written notes in the attached image and present them as markdown.`
implemented LLM-based OCR 2024-12-10 12:04:26 +00:00
implement document view 2024-12-10 16:05:24 +00:00			`Do not use a fence, simply respond using markdown.`
implemented LLM-based OCR 2024-12-10 12:04:26 +00:00
implement local vllm model usage 2024-12-11 16:12:52 +00:00			`If any words or letters are unclear, denote them with a '?<word>?'.`
implement document view 2024-12-10 16:05:24 +00:00
			`For example if you were not sure whether a word is blow or blew you would transcribe it as '?blow?'`
implement local vllm model usage 2024-12-11 16:12:52 +00:00
			`Please include whitespace and formatting for headings too.`
implemented LLM-based OCR 2024-12-10 12:04:26 +00:00			`"""`


implement local vllm model usage 2024-12-11 16:12:52 +00:00
implemented LLM-based OCR 2024-12-10 12:04:26 +00:00			`@shared_task`
			`def process_memo(memo_id: str):`
			`"""Run OCR on a memo and store the output"""`

			`logger.info(f"Looking up memo with id={memo_id}")`
			`memo = ImageMemo.objects.get(id=memo_id)`

			`with transaction.atomic():`
			`logger.info(f"Set status=processing for memo {memo.id}")`
			`memo.status = MemoStatus.Processing`
			`memo.save()`

			`# check that the image exists`
			`logger.info(f"Checking that image {memo.image.name} exists")`
			`if not default_storage.exists(memo.image.name):`
			`memo.status = MemoStatus.Error`
			`memo.error_message = f"Image file {memo.image.name} does not exist"`
			`memo.save()`
			`return`

			`# read the image into memory`
			`logger.info(f"Reading image {memo.image.name}")`
			`bytearray = default_storage.open(memo.image.name).read()`

			`# call the OCR API`
			`logger.info(f"Calling OCR API for memo {memo.id}")`

			`b64img = base64.b64encode(bytearray).decode("utf-8")`

			`message = {`
			`"role": "user",`
			`"content": [`
			`{"type": "text", "text": TRANSCRIBE_PROMPT},`
			`{`
			`"type": "image_url",`
			`"image_url": {"url": f"data:{memo.image_mimetype};base64,{b64img}"},`
			`},`
			`],`
			`}`

			`litellm.api_base = settings.OPENAI_API_BASE # os.environ.get("OPENAI_API_BASE")`
			`litellm.api_key = settings.OPENAI_API_KEY`

			`response = litellm.completion(`
			`model=settings.OPENAI_MODEL, #os.getenv("MODEL", "openai/gpt-4o"),`
			`messages=[message],`
implement local vllm model usage 2024-12-11 16:12:52 +00:00			`temperature=0.01`
implemented LLM-based OCR 2024-12-10 12:04:26 +00:00			`)`

			`response.choices[0].message["content"]`

			`with transaction.atomic():`
			`memo.content = response.choices[0].message["content"]`
			`memo.status = MemoStatus.Done`
			`memo.save()`