Skip to content

Commit

Permalink
add scores to sentiment data model
Browse files Browse the repository at this point in the history
  • Loading branch information
daveminer committed Jul 28, 2024
1 parent e9d49e1 commit dcdbf65
Show file tree
Hide file tree
Showing 13 changed files with 106 additions and 64 deletions.
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"yaml.schemas": {
"https://www.artillery.io/schema.json": []
}
}
34 changes: 33 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
- Results are saved and can be queried
- Analysis requests are handled asynchronously

## How to Run
## Run the Local Demo

Install [Docker Desktop](https://www.docker.com/products/docker-desktop/) if needed.

Expand Down Expand Up @@ -76,3 +76,35 @@ curl --request GET \
--url http://localhost:8000/sentiment/1/ \
--header 'Accept: application/json'
```

## Development Environment Setup

The `make services` command will start all of the services besides the app. This allows for the app to be started and stopped (with `make app`) in the terminal for convenience during development.

### Setting up the environment

Local development requires that the local environment is set up alongside the
containerized services.

##### Create the virtualenv

```
python3 -m virtualenv env
```

##### Load

```
python3 -m venv .venv
```

##### Activate

```
source .venv/bin/activate
```

Notes:

- `make services` requires [Docker Desktop](https://www.docker.com/products/docker-desktop/)
- `make deps` will install dependencies via pip3 and must be run before `make app`. This can take a few minutes as the PyTorch dependencies are sizable.
6 changes: 5 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Development environment
services:
web:
build: .
Expand All @@ -16,20 +17,23 @@ services:
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- "5432:5432"
volumes:
- db:/var/lib/postgresql/data
- ./postgres-init.sql:/docker-entrypoint-initdb.d/postgres-init.sql
rabbitmq:
image: rabbitmq:3.11.5-management
ports:
- "5672:5672"
- "15672:15672"
redis:
image: "redis:7-alpine"
celery_worker:
build: .
command: celery -A sentiment worker --pool=solo --loglevel=INFO
volumes:
- .:/code
- .:/code
environment:
CELERY_HOST: rabbitmq
DB_HOST: db
Expand Down
26 changes: 13 additions & 13 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
celery==5.2.7
coverage==7.0.0
django-environ==0.9.0
Django==4.1.4
flake8==6.0.0
psycopg2==2.9.5
python-dotenv==0.21.0
pytest==7.2.0
pytest-django==4.5.2
pytest-mock==3.10.0
torch==1.13.1
transformers==4.18.0
SQLAlchemy==1.4.45
celery==5.4.0
coverage==7.6.0
django-environ==0.11.2
Django==5.0.7
flake8==7.1.0
psycopg2==2.9.9
python-dotenv==1.0.1
pytest==8.3.1
pytest-django==4.8.0
pytest-mock==3.14.0
torch==2.4.0
transformers==4.43.2
SQLAlchemy==2.0.31
2 changes: 1 addition & 1 deletion sentiment/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'bert_serv.settings')
os.environ.setdefault('CELERY_CONFIG_MODULE', 'celeryconfig')

app = Celery('sentiment')
app = Celery('bert_serv')

app.config_from_envvar('CELERY_CONFIG_MODULE')
app.autodiscover_tasks()
Expand Down
10 changes: 6 additions & 4 deletions sentiment/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by Django 4.1.4 on 2022-12-18 06:30

from django.db import migrations, models

from django.contrib.postgres.fields import ArrayField

class Migration(migrations.Migration):

Expand All @@ -15,9 +15,11 @@ class Migration(migrations.Migration):
name='Sentiment',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('text', models.TextField()),
('sentiment', models.TextField()),
('created', models.DateTimeField(auto_now=True)),
('label', models.TextField(null=False)),
('score', models.FloatField(null=False)),
('tags', ArrayField(base_field=models.TextField(null=False), size=None)),
('text', models.TextField(null=False)),
('created_at', models.DateTimeField(auto_now=True)),
],
),
]
18 changes: 0 additions & 18 deletions sentiment/migrations/0002_alter_sentiment_sentiment.py

This file was deleted.

8 changes: 5 additions & 3 deletions sentiment/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from django.db import models

from django.contrib.postgres.fields import ArrayField

class Sentiment(models.Model):
label = models.TextField(null=False)
score = models.FloatField(null=False)
tags = ArrayField(models.TextField(), null=False)
text = models.TextField(null=False)
sentiment = models.CharField(max_length=100, null=False)
created = models.DateTimeField(auto_now=True)
created_at = models.DateTimeField(auto_now=True)
39 changes: 23 additions & 16 deletions sentiment/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .celery import Celery
from .models import Sentiment
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
import json
import numpy as np
import requests
Expand All @@ -14,23 +14,30 @@

tokenizer = BertTokenizer.from_pretrained(model)

labels = {0: 'neutral', 1: 'positive', 2: 'negative'}

nlp = pipeline("text-classification", model=finbert, tokenizer=tokenizer)

@celery.task
def run_sentiment(sentences):
inputs = tokenizer(sentences, return_tensors="pt", padding=True)
outputs = finbert(**inputs)[0]

sentiments = []

for idx, sent in enumerate(sentences):
results = outputs.detach().numpy()
label = labels[np.argmax(results[idx])]
sentiment = Sentiment.objects.create(text=sent, sentiment=label)
sentiments.append(sentiment)

return json.dumps({'ids': list(map(lambda s: s.id, sentiments))})
def run_sentiment(sentences, tags):
results = nlp(sentences)

sentiment_objects = []

for idx, result in enumerate(results):
label = result['label']
score = result['score']
sentiment = Sentiment(
label=label,
text=sentences[idx],
score=score,
tags=tags
)
sentiment_objects.append(sentiment)

# Bulk create all sentiment objects in one query
Sentiment.objects.bulk_create(sentiment_objects)

# Return the IDs of the created sentiments
return json.dumps({'ids': [sentiment.id for sentiment in sentiment_objects]})


@celery.task
Expand Down
2 changes: 1 addition & 1 deletion sentiment/templates/detail.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% block content %}
<h1>Sentiment {{sentiment.id}}</h1>
<p>{{ sentiment.created }} / {{ sentiment.sentiment }} / {{sentiment.text}}</p>
<p>{{ sentiment.created_at }} / {{ sentiment.score }} / {{sentiment.text}}</p>
{% endblock %}
2 changes: 1 addition & 1 deletion sentiment/templates/list.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{% block content %}
<h1>Sentiments</h1>
{% if sentiment_list %} {% for record in sentiment_list %}
<p>{{ record.created }} / {{ record.sentiment }} / {{record.text}}</p>
<p>{{ record.created_at }} / {{ record.score }} / {{record.text}}</p>
{% endfor %} {% else %}
<p>There are no sentiments to show.</p>
{% endif %} {% endblock %}
14 changes: 11 additions & 3 deletions sentiment/views/createview.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,27 @@
from django.views.generic import View
from celery import signature
import json
import logging


class SentimentCreate(View):

def post(self, request, *args, **kwargs):
body = parse_request_body(request)

print(body, "BODY")
print(request, "REQ")
try:
signature("sentiment.tasks.run_sentiment", args=(
body,), link=callback_task(request)).delay()
text = body.get('text', [])
tags = body.get('tags', [])

print(text, "TEXT")
signature("sentiment.tasks.run_sentiment", args=(
text,tags,), link=callback_task(request)).delay()
print("SENTIMENT TASK")
return HttpResponse(status=201)
except:
except Exception as e:
logging.error(f"Error occurred: {e}")
return HttpResponse(status=500)


Expand Down
4 changes: 2 additions & 2 deletions sentiment/views/listview.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
class SentimentList(ListView):
model = Sentiment

queryset: QuerySet[Sentiment] = Sentiment.objects.all().order_by('-created')[:100]
queryset: QuerySet[Sentiment] = Sentiment.objects.all().order_by('-created_at')[:100]

template_name: str = '../templates/list.html'

def get(self, request, *args, **kwargs) -> HttpResponse | JsonResponse:
if 'application/json' in request.META.get('HTTP_ACCEPT'):
return JsonResponse(list(
self.get_queryset().values('created', 'id', 'sentiment', 'text')
self.get_queryset().values('created_at', 'label', 'score', 'tags', 'text')
), safe=False)

return super().get(request, *args, **kwargs)

0 comments on commit dcdbf65

Please sign in to comment.