From 6593098b178223de2fe17b9fd87c6c369872d8e5 Mon Sep 17 00:00:00 2001 From: tdanielles Date: Sat, 23 Nov 2024 22:05:23 +0900 Subject: [PATCH] add flask endpoint in transcriptions folder --- .DS_Store | Bin 0 -> 6148 bytes transcription/.gitignore | 0 transcription/app.py | 40 +++++++++++++++++++++++++++++++++++++++ transcription/testing.py | 2 +- 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 .DS_Store create mode 100644 transcription/.gitignore create mode 100644 transcription/app.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6451d8cb4f2d571f567ca262c61a01ae6e2140d3 GIT binary patch literal 6148 zcmeHKL2uJA6n^f?nleqw0i<0fMdDhGjuj-dOV_oBNx)%2Z~#=2r9iYSu9}n{s!BPJ z|G;11$}i!+aDwmIu9URpI#tN8dj9O^_iVpUv0W39=uM+zBA199C}aITiVed3tZPy; zmL^cBF>=Z-5_uISGF^UF&FLLJRK|alT!Q>scUQ3yT%X$#LiAS@c``}!rc@U?g zi3*6L5nSHCjnhcZdUBdZnTqvwhtqVLL-$}lf7&@|dB>g3qUFt>KWVqT&dIaIqUk(* z^!W60c%9s&@>Z>y1kNWV`v#Zr4V6kxU;Rm%$n-Pp9Mhl>>|D_`*hwiWoo>v|4``Vk z7oHM+IZY@=T|&Cw{MZ zH|Y|dXVj;czzyrEan|MW_f;kVGRlw{=QLhsRGmwGC*Moo2?nyVf- z1D1iiXMoQK56ajzSZGv72MYBB0M=nvf;wM}9OD`68Z0!T2O@MRP=^Zh#1J|hcF*{A z4Hg=8I0^Ik5N2j!UMND%j`2O^PNJ*PmX-m_K$U?V-E{f>AN>3MUk$P?%YbFzzhXet z`+mQNl+4|_mK@);9`qxWh5ZVRDg=eOj#Y)P;sdA>^m$wXb`2I9VS(5`0*VG(SO)Ht FfxjT2t3v<) literal 0 HcmV?d00001 diff --git a/transcription/.gitignore b/transcription/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/transcription/app.py b/transcription/app.py new file mode 100644 index 00000000..bff6161e --- /dev/null +++ b/transcription/app.py @@ -0,0 +1,40 @@ +from flask import Flask, request, jsonify +from PIL import Image +from transformers import AutoProcessor, AutoModelForCausalLM +import torch + +app = Flask(__name__) + +# load model and processor once during init +device = "cuda:0" if torch.cuda.is_available() else "cpu" +torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + +model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True).to(device) +processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True) + +@app.route("/transcribe", methods=["POST"]) +def transcribe(): + if "image" not in request.files: + return jsonify({"error": "No image file provided"}), 400 + + image_file = request.files["image"] + try: + # open and preprocess image + image = Image.open(image_file).convert("RGB") + prompt = "" + inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype) + generated_ids = model.generate( + input_ids=inputs["input_ids"], + pixel_values=inputs["pixel_values"], + max_new_tokens=1024, + num_beams=3, + do_sample=False + ) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] + + return jsonify({"transcription": generated_text}) + except Exception as e: + return jsonify({"error": str(e)}), 500 + +if __name__ == "__main__": + app.run(debug=True) \ No newline at end of file diff --git a/transcription/testing.py b/transcription/testing.py index f569788a..499e0d66 100644 --- a/transcription/testing.py +++ b/transcription/testing.py @@ -33,7 +33,7 @@ def florence(): prompt = "" - url = "../assets/kkl.jpg" + url = "../assets/Filled_Logbook_page-0001.jpg" image = Image.open(url).convert("RGB") inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)