Skip to content

Commit

Permalink
Added: customize timeout and max_concurrency in services via env vars…
Browse files Browse the repository at this point in the history
…; cleanup legacy_trainer (#551)
  • Loading branch information
mexicat authored Nov 10, 2024
1 parent 72766e9 commit 4d6f4e2
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
4 changes: 2 additions & 2 deletions konfuzio_sdk/bento/categorization/categorizationai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@

@bentoml.service(
traffic={
'timeout': 3600, # Hard limit for categorization calls is 1 hour
'timeout': int(os.environ.get('BENTO_SERVICE_TIMEOUT', '3600')),
# Don't process more than 2 documents at a time. Will respond with 429 if more come.
# Clients should implement a retry strategy for 429.
# Servers should implement a scaling strategy and start multiple services when high load is present.
'max_concurrency': 2,
'max_concurrency': int(os.environ.get('BENTO_SERVICE_MAX_CONCURRENCY', '2')),
}
)
@bentoml.mount_asgi_app(app, path='/v1')
Expand Down
4 changes: 2 additions & 2 deletions konfuzio_sdk/bento/extraction/rfextractionai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@

@bentoml.service(
traffic={
'timeout': 3600, # Hard limit for extraction calls is 1 hour
'timeout': int(os.environ.get('BENTO_SERVICE_TIMEOUT', '3600')),
# Don't process more than 2 documents at a time. Will respond with 429 if more come.
# Clients should implement a retry strategy for 429.
# Servers should implement a scaling strategy and start multiple services when high load is present.
'max_concurrency': 2,
'max_concurrency': int(os.environ.get('BENTO_SERVICE_MAX_CONCURRENCY', '2')),
}
)
@bentoml.mount_asgi_app(app, path='/v1')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import json
import os
import typing as t
import bentoml
from concurrent.futures import ThreadPoolExecutor

import bentoml
from fastapi import Depends, FastAPI, HTTPException

from .schemas import ExtractRequest20240117, ExtractResponseForLegacyTrainer20240912
Expand All @@ -16,7 +17,7 @@
except (ImportError, ValueError):
from base.utils import handle_exceptions

from konfuzio_sdk.data import Project, Category
from konfuzio_sdk.data import Category, Project

# load ai model name from AI_MODEL_NAME file in parent directory
ai_model_name_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'AI_MODEL_NAME')
Expand All @@ -25,7 +26,15 @@
app = FastAPI()


@bentoml.service
@bentoml.service(
traffic={
'timeout': int(os.environ.get('BENTO_SERVICE_TIMEOUT', '3600')),
# Don't process more than 2 documents at a time. Will respond with 429 if more come.
# Clients should implement a retry strategy for 429.
# Servers should implement a scaling strategy and start multiple services when high load is present.
'max_concurrency': int(os.environ.get('BENTO_SERVICE_MAX_CONCURRENCY', '2')),
}
)
@bentoml.mount_asgi_app(app, path='/v1')
class ExtractionService:
model_ref = bentoml.models.get(ai_model_name)
Expand Down Expand Up @@ -86,10 +95,11 @@ async def extract(self, ctx: bentoml.Context, **request: t.Any) -> ExtractRespon
for _page in request.pages:
pages.append(dict(_page))

result = await asyncio.get_event_loop().run_in_executor(self.executor, extraction_model.extract, request.text, bboxes, pages)
result = await asyncio.get_event_loop().run_in_executor(
self.executor, extraction_model.extract, request.text, bboxes, pages
)
json_result = process_response(result, schema=ExtractResponseForLegacyTrainer20240912)

project._documents = [d for d in project._documents if d.id_ != document.id_ and d.copy_of_id != document.id_]
return json_result


Expand Down

0 comments on commit 4d6f4e2

Please sign in to comment.