From 6593098b178223de2fe17b9fd87c6c369872d8e5 Mon Sep 17 00:00:00 2001
From: tdanielles <trisha.d.sia@gmail.com>
Date: Sat, 23 Nov 2024 22:05:23 +0900
Subject: [PATCH] add flask endpoint in transcriptions folder

---
 .DS_Store                | Bin 0 -> 6148 bytes
 transcription/.gitignore |   0
 transcription/app.py     |  40 +++++++++++++++++++++++++++++++++++++++
 transcription/testing.py |   2 +-
 4 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 .DS_Store
 create mode 100644 transcription/.gitignore
 create mode 100644 transcription/app.py
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..6451d8cb4f2d571f567ca262c61a01ae6e2140d3
GIT binary patch
literal 6148
zcmeHKL2uJA6n^f?nleqw0i<0fMdDhGjuj-dOV_oBNx)%2Z~#=2r9iYSu9}n{s!BPJ
z|G;11$}i!+aDwmIu9URpI#tN8dj9O^_iVpUv0W39=uM+zBA199C}aITiVed3tZPy;
zmL^cBF>=Z-5_uISGF<bv2FrkDVA~kry}L^RW%!M#x_|jlID;PjQKFV!@eOC$*zEP6
z;W&x2yxsmKYCH9NyA7w|>^UF&FLLJRK|alT!Q>scUQ3yT%X$#LiAS@c``}!rc@U?g
zi3*6L5nSHCjnhcZdUBdZnTqvwhtqVLL-$}lf7&@|dB>g3qUFt>KWVqT&dIaIqUk(*
z^!W60c%9s&@>Z>y1kNWV`v#Zr4V6kxU;Rm%$n-Pp9Mhl>>|D_`*hwiWoo>v|4``Vk
z7oHM+IZY@=<P^17Su#4Jo7H%6xDxNsCq(KIHq0k@wdMI5^nxzvoCfrYd>T|&Cw{MZ
zH|Y|dXVj;czzyrEan|MW_f;kVGRlw{=QLhsRGmwGC*Moo2?nyV<qViT;p}bsI_>f-
z1D1iiXMoQK56ajzSZGv72MYBB0M=nvf;wM}9OD`68Z0!T2O@MRP=^Zh#1J|hcF*{A
z4Hg=8I0^Ik5N2j!UMND%j`2O^PNJ*PmX-m_K$U?V-E{f>AN>3MUk$P?%YbFzzhXet
z`+mQNl+4|_mK@);9`qxWh5ZVRDg=eOj#Y)P;sdA>^m$wXb`2I9VS(5`0*VG(SO)Ht
FfxjT2t3v<)

literal 0
HcmV?d00001

diff --git a/transcription/.gitignore b/transcription/.gitignore
new file mode 100644
index 00000000..e69de29b
diff --git a/transcription/app.py b/transcription/app.py
new file mode 100644
index 00000000..bff6161e
--- /dev/null
+++ b/transcription/app.py
@@ -0,0 +1,40 @@
+from flask import Flask, request, jsonify
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForCausalLM
+import torch
+
+app = Flask(__name__)
+
+# load model and processor once during init
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+
+model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True).to(device)
+processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
+
+@app.route("/transcribe", methods=["POST"])
+def transcribe():
+    if "image" not in request.files:
+        return jsonify({"error": "No image file provided"}), 400
+
+    image_file = request.files["image"]
+    try:
+        # open and preprocess image
+        image = Image.open(image_file).convert("RGB")
+        prompt = "<OCR>"
+        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            num_beams=3,
+            do_sample=False
+        )
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+
+        return jsonify({"transcription": generated_text})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+if __name__ == "__main__":
+    app.run(debug=True)
\ No newline at end of file
diff --git a/transcription/testing.py b/transcription/testing.py
index f569788a..499e0d66 100644
--- a/transcription/testing.py
+++ b/transcription/testing.py
@@ -33,7 +33,7 @@ def florence():
 
     prompt = "<OCR>"
 
-    url = "../assets/kkl.jpg"
+    url = "../assets/Filled_Logbook_page-0001.jpg"
     image = Image.open(url).convert("RGB")
 
     inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)