From dcdbf65282a91ee149e87cd6a9be4520a57bd964 Mon Sep 17 00:00:00 2001 From: daveminer Date: Sun, 28 Jul 2024 12:39:19 -0400 Subject: [PATCH] add scores to sentiment data model --- .vscode/settings.json | 5 +++ README.md | 34 +++++++++++++++- docker-compose.yml | 6 ++- requirements.txt | 26 ++++++------- sentiment/celery.py | 2 +- sentiment/migrations/0001_initial.py | 10 +++-- .../0002_alter_sentiment_sentiment.py | 18 --------- sentiment/models.py | 8 ++-- sentiment/tasks.py | 39 +++++++++++-------- sentiment/templates/detail.html | 2 +- sentiment/templates/list.html | 2 +- sentiment/views/createview.py | 14 +++++-- sentiment/views/listview.py | 4 +- 13 files changed, 106 insertions(+), 64 deletions(-) create mode 100644 .vscode/settings.json delete mode 100644 sentiment/migrations/0002_alter_sentiment_sentiment.py diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..43ae7b2 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "yaml.schemas": { + "https://www.artillery.io/schema.json": [] + } +} \ No newline at end of file diff --git a/README.md b/README.md index 614ba1f..4f28e5b 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ - Results are saved and can be queried - Analysis requests are handled asynchronously -## How to Run +## Run the Local Demo Install [Docker Desktop](https://www.docker.com/products/docker-desktop/) if needed. @@ -76,3 +76,35 @@ curl --request GET \ --url http://localhost:8000/sentiment/1/ \ --header 'Accept: application/json' ``` + +## Development Environment Setup + +The `make services` command will start all of the services besides the app. This allows for the app to be started and stopped (with `make app`) in the terminal for convenience during development. + +### Setting up the environment + +Local development requires that the local environment is set up alongside the +containerized services. + +##### Create the virtualenv + +``` +python3 -m virtualenv env +``` + +##### Load + +``` +python3 -m venv .venv +``` + +##### Activate + +``` +source .venv/bin/activate +``` + +Notes: + +- `make services` requires [Docker Desktop](https://www.docker.com/products/docker-desktop/) +- `make deps` will install dependencies via pip3 and must be run before `make app`. This can take a few minutes as the PyTorch dependencies are sizable. diff --git a/docker-compose.yml b/docker-compose.yml index 1de759b..076f479 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,3 +1,4 @@ +# Development environment services: web: build: . @@ -16,12 +17,15 @@ services: environment: POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres + ports: + - "5432:5432" volumes: - db:/var/lib/postgresql/data - ./postgres-init.sql:/docker-entrypoint-initdb.d/postgres-init.sql rabbitmq: image: rabbitmq:3.11.5-management ports: + - "5672:5672" - "15672:15672" redis: image: "redis:7-alpine" @@ -29,7 +33,7 @@ services: build: . command: celery -A sentiment worker --pool=solo --loglevel=INFO volumes: - - .:/code + - .:/code environment: CELERY_HOST: rabbitmq DB_HOST: db diff --git a/requirements.txt b/requirements.txt index ff79125..bc1d4b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ -celery==5.2.7 -coverage==7.0.0 -django-environ==0.9.0 -Django==4.1.4 -flake8==6.0.0 -psycopg2==2.9.5 -python-dotenv==0.21.0 -pytest==7.2.0 -pytest-django==4.5.2 -pytest-mock==3.10.0 -torch==1.13.1 -transformers==4.18.0 -SQLAlchemy==1.4.45 +celery==5.4.0 +coverage==7.6.0 +django-environ==0.11.2 +Django==5.0.7 +flake8==7.1.0 +psycopg2==2.9.9 +python-dotenv==1.0.1 +pytest==8.3.1 +pytest-django==4.8.0 +pytest-mock==3.14.0 +torch==2.4.0 +transformers==4.43.2 +SQLAlchemy==2.0.31 diff --git a/sentiment/celery.py b/sentiment/celery.py index 8479d97..b674910 100644 --- a/sentiment/celery.py +++ b/sentiment/celery.py @@ -4,7 +4,7 @@ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'bert_serv.settings') os.environ.setdefault('CELERY_CONFIG_MODULE', 'celeryconfig') -app = Celery('sentiment') +app = Celery('bert_serv') app.config_from_envvar('CELERY_CONFIG_MODULE') app.autodiscover_tasks() diff --git a/sentiment/migrations/0001_initial.py b/sentiment/migrations/0001_initial.py index 2428cc4..a55a77c 100644 --- a/sentiment/migrations/0001_initial.py +++ b/sentiment/migrations/0001_initial.py @@ -1,7 +1,7 @@ # Generated by Django 4.1.4 on 2022-12-18 06:30 from django.db import migrations, models - +from django.contrib.postgres.fields import ArrayField class Migration(migrations.Migration): @@ -15,9 +15,11 @@ class Migration(migrations.Migration): name='Sentiment', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('text', models.TextField()), - ('sentiment', models.TextField()), - ('created', models.DateTimeField(auto_now=True)), + ('label', models.TextField(null=False)), + ('score', models.FloatField(null=False)), + ('tags', ArrayField(base_field=models.TextField(null=False), size=None)), + ('text', models.TextField(null=False)), + ('created_at', models.DateTimeField(auto_now=True)), ], ), ] diff --git a/sentiment/migrations/0002_alter_sentiment_sentiment.py b/sentiment/migrations/0002_alter_sentiment_sentiment.py deleted file mode 100644 index 52890e4..0000000 --- a/sentiment/migrations/0002_alter_sentiment_sentiment.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 4.1.4 on 2022-12-18 06:58 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('sentiment', '0001_initial'), - ] - - operations = [ - migrations.AlterField( - model_name='sentiment', - name='sentiment', - field=models.CharField(max_length=100), - ), - ] diff --git a/sentiment/models.py b/sentiment/models.py index f1c0c3e..40a5b6e 100644 --- a/sentiment/models.py +++ b/sentiment/models.py @@ -1,7 +1,9 @@ from django.db import models - +from django.contrib.postgres.fields import ArrayField class Sentiment(models.Model): + label = models.TextField(null=False) + score = models.FloatField(null=False) + tags = ArrayField(models.TextField(), null=False) text = models.TextField(null=False) - sentiment = models.CharField(max_length=100, null=False) - created = models.DateTimeField(auto_now=True) + created_at = models.DateTimeField(auto_now=True) diff --git a/sentiment/tasks.py b/sentiment/tasks.py index 86b103a..bbb1352 100644 --- a/sentiment/tasks.py +++ b/sentiment/tasks.py @@ -1,6 +1,6 @@ from .celery import Celery from .models import Sentiment -from transformers import BertTokenizer, BertForSequenceClassification +from transformers import BertTokenizer, BertForSequenceClassification, pipeline import json import numpy as np import requests @@ -14,23 +14,30 @@ tokenizer = BertTokenizer.from_pretrained(model) -labels = {0: 'neutral', 1: 'positive', 2: 'negative'} - +nlp = pipeline("text-classification", model=finbert, tokenizer=tokenizer) @celery.task -def run_sentiment(sentences): - inputs = tokenizer(sentences, return_tensors="pt", padding=True) - outputs = finbert(**inputs)[0] - - sentiments = [] - - for idx, sent in enumerate(sentences): - results = outputs.detach().numpy() - label = labels[np.argmax(results[idx])] - sentiment = Sentiment.objects.create(text=sent, sentiment=label) - sentiments.append(sentiment) - - return json.dumps({'ids': list(map(lambda s: s.id, sentiments))}) +def run_sentiment(sentences, tags): + results = nlp(sentences) + + sentiment_objects = [] + + for idx, result in enumerate(results): + label = result['label'] + score = result['score'] + sentiment = Sentiment( + label=label, + text=sentences[idx], + score=score, + tags=tags + ) + sentiment_objects.append(sentiment) + + # Bulk create all sentiment objects in one query + Sentiment.objects.bulk_create(sentiment_objects) + + # Return the IDs of the created sentiments + return json.dumps({'ids': [sentiment.id for sentiment in sentiment_objects]}) @celery.task diff --git a/sentiment/templates/detail.html b/sentiment/templates/detail.html index dcada41..082a29f 100644 --- a/sentiment/templates/detail.html +++ b/sentiment/templates/detail.html @@ -1,4 +1,4 @@ {% block content %}

Sentiment {{sentiment.id}}

-

{{ sentiment.created }} / {{ sentiment.sentiment }} / {{sentiment.text}}

+

{{ sentiment.created_at }} / {{ sentiment.score }} / {{sentiment.text}}

{% endblock %} diff --git a/sentiment/templates/list.html b/sentiment/templates/list.html index 2bda24a..463b231 100644 --- a/sentiment/templates/list.html +++ b/sentiment/templates/list.html @@ -1,7 +1,7 @@ {% block content %}

Sentiments

{% if sentiment_list %} {% for record in sentiment_list %} -

{{ record.created }} / {{ record.sentiment }} / {{record.text}}

+

{{ record.created_at }} / {{ record.score }} / {{record.text}}

{% endfor %} {% else %}

There are no sentiments to show.

{% endif %} {% endblock %} diff --git a/sentiment/views/createview.py b/sentiment/views/createview.py index e6ce3f8..56c5e59 100644 --- a/sentiment/views/createview.py +++ b/sentiment/views/createview.py @@ -4,6 +4,7 @@ from django.views.generic import View from celery import signature import json +import logging class SentimentCreate(View): @@ -11,12 +12,19 @@ class SentimentCreate(View): def post(self, request, *args, **kwargs): body = parse_request_body(request) + print(body, "BODY") + print(request, "REQ") try: - signature("sentiment.tasks.run_sentiment", args=( - body,), link=callback_task(request)).delay() + text = body.get('text', []) + tags = body.get('tags', []) + print(text, "TEXT") + signature("sentiment.tasks.run_sentiment", args=( + text,tags,), link=callback_task(request)).delay() + print("SENTIMENT TASK") return HttpResponse(status=201) - except: + except Exception as e: + logging.error(f"Error occurred: {e}") return HttpResponse(status=500) diff --git a/sentiment/views/listview.py b/sentiment/views/listview.py index fb57858..d17306c 100644 --- a/sentiment/views/listview.py +++ b/sentiment/views/listview.py @@ -7,14 +7,14 @@ class SentimentList(ListView): model = Sentiment - queryset: QuerySet[Sentiment] = Sentiment.objects.all().order_by('-created')[:100] + queryset: QuerySet[Sentiment] = Sentiment.objects.all().order_by('-created_at')[:100] template_name: str = '../templates/list.html' def get(self, request, *args, **kwargs) -> HttpResponse | JsonResponse: if 'application/json' in request.META.get('HTTP_ACCEPT'): return JsonResponse(list( - self.get_queryset().values('created', 'id', 'sentiment', 'text') + self.get_queryset().values('created_at', 'label', 'score', 'tags', 'text') ), safe=False) return super().get(request, *args, **kwargs)