Skip to content

Commit

Permalink
Merge branch 'main' into feat/COG-544-eval-on-swe-bench
Browse files Browse the repository at this point in the history
  • Loading branch information
Rita Aleksziev committed Nov 18, 2024
2 parents d986e7c + ced5385 commit d07ebbe
Show file tree
Hide file tree
Showing 74 changed files with 1,635 additions and 320 deletions.
Binary file removed .DS_Store
Binary file not shown.
63 changes: 63 additions & 0 deletions .github/workflows/test_cognee_llama_index_notebook.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: test | llama index notebook

on:
workflow_dispatch:
pull_request:
branches:
- main
types: [labeled, synchronize]


concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

env:
RUNTIME__LOG_LEVEL: ERROR

jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml

run_notebook_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && github.event.label.name == 'run-checks'
runs-on: ubuntu-latest
defaults:
run:
shell: bash
steps:
- name: Check out
uses: actions/checkout@master

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11.x'

- name: Install Poetry
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

- name: Install dependencies
run: |
poetry install --no-interaction --all-extras --no-root
poetry add jupyter --no-interaction
- name: Execute Jupyter Notebook
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
run: |
poetry run jupyter nbconvert \
--to notebook \
--execute notebooks/cognee_llama_index.ipynb \
--output executed_notebook.ipynb \
--ExecutePreprocessor.timeout=1200
8 changes: 8 additions & 0 deletions cognee-frontend/src/modules/chat/getHistory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { fetch } from '@/utils';

export default function getHistory() {
return fetch(
'/v1/search',
)
.then((response) => response.json());
}
15 changes: 13 additions & 2 deletions cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
'use client';

import { v4 } from 'uuid';
import classNames from 'classnames';
import { useCallback, useState } from 'react';
import { useCallback, useEffect, useState } from 'react';
import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean } from 'ohmy-ui';
import { fetch } from '@/utils';
import styles from './SearchView.module.css';
import getHistory from '@/modules/chat/getHistory';

interface Message {
id: string;
Expand Down Expand Up @@ -52,6 +55,14 @@ export default function SearchView() {
}, 300);
}, []);

useEffect(() => {
getHistory()
.then((history) => {
setMessages(history);
scrollToBottom();
});
}, [scrollToBottom]);

const handleSearchSubmit = useCallback((event: React.FormEvent<HTMLFormElement>) => {
event.preventDefault();

Expand All @@ -78,7 +89,7 @@ export default function SearchView() {
'Content-Type': 'application/json',
},
body: JSON.stringify({
query: inputValue,
query: inputValue.trim(),
searchType: searchTypeValue,
}),
})
Expand Down
Binary file removed cognee/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion cognee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .api.v1.add import add
from .api.v1.cognify import cognify
from .api.v1.datasets.datasets import datasets
from .api.v1.search import search, SearchType
from .api.v1.search import search, SearchType, get_search_history
from .api.v1.prune import prune

# Pipelines
Expand Down
2 changes: 1 addition & 1 deletion cognee/api/v1/add/add_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_nam
pipeline = run_tasks(tasks, data, "add_pipeline")

async for result in pipeline:
print(result)
print(result)
1 change: 1 addition & 0 deletions cognee/api/v1/search/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .search_v2 import search, SearchType
from .get_search_history import get_search_history
9 changes: 9 additions & 0 deletions cognee/api/v1/search/get_search_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User

async def get_search_history(user: User = None) -> list:
if not user:
user = await get_default_user()

return await get_history(user.id)
29 changes: 25 additions & 4 deletions cognee/api/v1/search/routers/get_search_router.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from cognee.api.v1.search import SearchType
from uuid import UUID
from datetime import datetime
from fastapi import Depends, APIRouter
from fastapi.responses import JSONResponse
from cognee.api.v1.search import SearchType
from cognee.api.DTO import InDTO, OutDTO
from cognee.modules.users.models import User
from fastapi import Depends, APIRouter
from cognee.api.DTO import InDTO
from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_authenticated_user


Expand All @@ -13,6 +16,24 @@ class SearchPayloadDTO(InDTO):
def get_search_router() -> APIRouter:
router = APIRouter()

class SearchHistoryItem(OutDTO):
id: UUID
text: str
user: str
created_at: datetime

@router.get("/", response_model = list[SearchHistoryItem])
async def get_search_history(user: User = Depends(get_authenticated_user)):
try:
history = await get_history(user.id)

return history
except Exception as error:
return JSONResponse(
status_code = 500,
content = {"error": str(error)}
)

@router.post("/", response_model = list)
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
""" This endpoint is responsible for searching for nodes in the graph."""
Expand All @@ -28,4 +49,4 @@ async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticat
content = {"error": str(error)}
)

return router
return router
17 changes: 12 additions & 5 deletions cognee/api/v1/search/search_v2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import json
from uuid import UUID
from enum import Enum
from typing import Callable, Dict
from cognee.modules.search.operations import log_query, log_result
from cognee.modules.storage.utils import JSONEncoder
from cognee.shared.utils import send_telemetry
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_default_user
Expand All @@ -14,15 +17,17 @@ class SearchType(Enum):
INSIGHTS = "INSIGHTS"
CHUNKS = "CHUNKS"

async def search(search_type: SearchType, query: str, user: User = None) -> list:
async def search(query_type: SearchType, query_text: str, user: User = None) -> list:
if user is None:
user = await get_default_user()

if user is None:
raise PermissionError("No user found in the system. Please create a user.")

query = await log_query(query_text, str(query_type), user.id)

own_document_ids = await get_document_ids_for_user(user.id)
search_results = await specific_search(search_type, query, user)
search_results = await specific_search(query_type, query_text, user)

filtered_search_results = []

Expand All @@ -33,19 +38,21 @@ async def search(search_type: SearchType, query: str, user: User = None) -> list
if document_id is None or document_id in own_document_ids:
filtered_search_results.append(search_result)

await log_result(query.id, json.dumps(filtered_search_results, cls = JSONEncoder), user.id)

return filtered_search_results

async def specific_search(search_type: SearchType, query: str, user) -> list:
async def specific_search(query_type: SearchType, query: str, user) -> list:
search_tasks: Dict[SearchType, Callable] = {
SearchType.SUMMARIES: query_summaries,
SearchType.INSIGHTS: query_graph_connections,
SearchType.CHUNKS: query_chunks,
}

search_task = search_tasks.get(search_type)
search_task = search_tasks.get(query_type)

if search_task is None:
raise ValueError(f"Unsupported search type: {search_type}")
raise ValueError(f"Unsupported search type: {query_type}")

send_telemetry("cognee.search EXECUTION STARTED", user.id)

Expand Down
3 changes: 0 additions & 3 deletions cognee/infrastructure/databases/relational/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,3 @@
from .config import get_relational_config
from .create_db_and_tables import create_db_and_tables
from .get_relational_engine import get_relational_engine

# Global data types
from .data_types.UUID import UUID
45 changes: 0 additions & 45 deletions cognee/infrastructure/databases/relational/data_types/UUID.py

This file was deleted.

7 changes: 5 additions & 2 deletions cognee/infrastructure/engine/models/DataPoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ class DataPoint(BaseModel):
def get_embeddable_data(self):
if self._metadata and len(self._metadata["index_fields"]) > 0 \
and hasattr(self, self._metadata["index_fields"][0]):

return getattr(self, self._metadata["index_fields"][0])
attribute = getattr(self, self._metadata["index_fields"][0])
if isinstance(attribute, str):
return(attribute.strip())
else:
return (attribute)
4 changes: 2 additions & 2 deletions cognee/infrastructure/pipeline/models/Operation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import datetime, timezone
from sqlalchemy.orm import Mapped, MappedColumn
from sqlalchemy import Column, DateTime, ForeignKey, Enum, JSON
from cognee.infrastructure.databases.relational import Base, UUID
Expand All @@ -24,4 +24,4 @@ class Operation(Base):
data_id = Column(UUID, ForeignKey("data.id"))
meta_data: Mapped[dict] = MappedColumn(type_ = JSON)

created_at = Column(DateTime, default = datetime.utcnow)
created_at = Column(DateTime, default = datetime.now(timezone.utc))
21 changes: 10 additions & 11 deletions cognee/modules/chunking/TextChunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,25 @@ class TextChunker():

chunk_index = 0
chunk_size = 0
paragraph_chunks = []

def __init__(self, document, get_text: callable, chunk_size: int = 1024):
self.document = document
self.max_chunk_size = chunk_size
self.get_text = get_text

def read(self):
self.paragraph_chunks = []
paragraph_chunks = []
for content_text in self.get_text():
for chunk_data in chunk_by_paragraph(
content_text,
self.max_chunk_size,
batch_paragraphs = True,
):
if self.chunk_size + chunk_data["word_count"] <= self.max_chunk_size:
self.paragraph_chunks.append(chunk_data)
paragraph_chunks.append(chunk_data)
self.chunk_size += chunk_data["word_count"]
else:
if len(self.paragraph_chunks) == 0:
if len(paragraph_chunks) == 0:
yield DocumentChunk(
id = chunk_data["chunk_id"],
text = chunk_data["text"],
Expand All @@ -37,35 +36,35 @@ def read(self):
chunk_index = self.chunk_index,
cut_type = chunk_data["cut_type"],
)
self.paragraph_chunks = []
paragraph_chunks = []
self.chunk_size = 0
else:
chunk_text = " ".join(chunk["text"] for chunk in self.paragraph_chunks)
chunk_text = " ".join(chunk["text"] for chunk in paragraph_chunks)
try:
yield DocumentChunk(
id = uuid5(NAMESPACE_OID, f"{str(self.document.id)}-{self.chunk_index}"),
text = chunk_text,
word_count = self.chunk_size,
is_part_of = self.document,
chunk_index = self.chunk_index,
cut_type = self.paragraph_chunks[len(self.paragraph_chunks) - 1]["cut_type"],
cut_type = paragraph_chunks[len(paragraph_chunks) - 1]["cut_type"],
)
except Exception as e:
print(e)
self.paragraph_chunks = [chunk_data]
paragraph_chunks = [chunk_data]
self.chunk_size = chunk_data["word_count"]

self.chunk_index += 1

if len(self.paragraph_chunks) > 0:
if len(paragraph_chunks) > 0:
try:
yield DocumentChunk(
id = uuid5(NAMESPACE_OID, f"{str(self.document.id)}-{self.chunk_index}"),
text = " ".join(chunk["text"] for chunk in self.paragraph_chunks),
text = " ".join(chunk["text"] for chunk in paragraph_chunks),
word_count = self.chunk_size,
is_part_of = self.document,
chunk_index = self.chunk_index,
cut_type = self.paragraph_chunks[len(self.paragraph_chunks) - 1]["cut_type"],
cut_type = paragraph_chunks[len(paragraph_chunks) - 1]["cut_type"],
)
except Exception as e:
print(e)
Loading

0 comments on commit d07ebbe

Please sign in to comment.