-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable LLM inference with llama.cpp and llama-cpp-python (#33)
* Add models tab ui for model conversion * Add changes to models tab UI * Add changes to models tab UI * Feature/llamacpp (#22) * Initial download snapshot & covert to GGUF using LLama 🐑 * Chat 💬 completion with llama cpp * added llama.cpp requirement * model conversion * HF snapshot download fix * Implement CMake build support and enhance text generation using gguf model. * Implement dynamic model path and make quantized_model directory * Add py_cmd to configs using Makefile * Add py_cmd to configs with cmake, dynamic python command for conversion --------- Co-authored-by: parveen kumar <parveen297m210@gmail.com> Co-authored-by: Subhanshu0027 <subhanshu0027@gmail.com> * Feat: download and convert, select model from models tab (#24) * Feat: download and convert, select model from models tab * Refactor: remove unused line * Remove Converted gguf Models & Enhance UI in Models Tab (#26) * Add feature to remove converted gguf models & UI changes in models tab * Add remove model functionality to core.py * Optimize code formatting * Update README.md with new features * Select model from chat tab (#25) * feat: Add support for selecting execution provider, CPU or GPU with CUDA (#29) * refactor: Remove all remnants of transformers inference logic and associated code, fix removeModel (#30) * docs: Add installation and development tips, update (#32) * fix: dropdown for initially empty saved_gguf_models_list (#34) * fix: Model list not updating after download snapshot, remove model (#35) * fix: Model list not updating after download snapshot * fix: remove model --------- Co-authored-by: Subhanshu0027 <subhanshu0027@gmail.com> Co-authored-by: Subhanshu0027 <91900622+Subhanshu0027@users.noreply.github.com> Co-authored-by: Juggernaut <108272701+ashish-aesthisia@users.noreply.github.com>
- Loading branch information
1 parent
b1b7053
commit 01417f3
Showing
11 changed files
with
399 additions
and
161 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Minimum required CMake version | ||
cmake_minimum_required(VERSION 3.15) | ||
|
||
# Project name | ||
project(llama_cpp) | ||
|
||
# Git repository location | ||
set(REPO_URL "https://github.com/ggerganov/llama.cpp") | ||
|
||
# Requirements file | ||
set(REQUIREMENTS_FILE "requirements.txt") | ||
|
||
# Llama directory | ||
set(LLAMA_DIR "${PROJECT_SOURCE_DIR}/src/llama_cpp") | ||
|
||
# Check for Python and Git using CMake's FIND_PACKAGE | ||
find_package(PythonLibs REQUIRED) | ||
find_package(Git REQUIRED) | ||
|
||
# Download and clone the llama.cpp repository | ||
execute_process( | ||
COMMAND git clone ${REPO_URL} ${LLAMA_DIR} | ||
RESULT_VARIABLE git_result | ||
) | ||
|
||
# Error handling for Git clone | ||
if(NOT ${git_result} EQUAL 0) | ||
message(FATAL_ERROR "Failed to clone llama.cpp repository") | ||
endif() | ||
|
||
# Install Python requirements | ||
execute_process( | ||
COMMAND pip install -r "${LLAMA_DIR}/${REQUIREMENTS_FILE}" | ||
) | ||
|
||
file(MAKE_DIRECTORY "${PROJECT_SOURCE_DIR}/src/quantized_model") | ||
|
||
find_program(PYTHON NAMES python python3 2>/dev/null) | ||
|
||
if(PYTHON) | ||
file(APPEND "${PROJECT_SOURCE_DIR}/configs/config.ini" "py_cmd = ${PYTHON}") | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Makefile to clone llama.cpp repository and install requirements | ||
|
||
# Variables | ||
REPO_URL := https://github.com/ggerganov/llama.cpp | ||
REQUIREMENTS_FILE := requirements.txt | ||
LLAMA_DIR := src/llama_cpp | ||
|
||
# Determine pip command | ||
PIP := $(shell command -v pip3 2>/dev/null || command -v pip) | ||
|
||
# Check if python and git are installed | ||
PYTHON := $(shell command -v python 2>/dev/null || command -v python3 2>/dev/null) | ||
GIT := $(shell command -v git) | ||
|
||
ifeq ($(PYTHON),) | ||
$(error Python is not installed. Please install Python before running this Makefile.) | ||
endif | ||
|
||
ifeq ($(GIT),) | ||
$(error Git is not installed. Please install Git before running this Makefile.) | ||
endif | ||
|
||
# Targets | ||
.PHONY: all clone install clean quantized_model_dir append_to_configs | ||
|
||
all: clone install quantized_model_dir append_to_configs | ||
|
||
clone: | ||
mkdir -p $(LLAMA_DIR) | ||
git clone $(REPO_URL) $(LLAMA_DIR) | ||
|
||
install: | ||
cd $(LLAMA_DIR) && \ | ||
$(PIP) install -r $(REQUIREMENTS_FILE) | ||
|
||
quantized_model_dir: | ||
mkdir -p src/quantized_model | ||
|
||
append_to_configs: | ||
echo "py_cmd = $(PYTHON)" >> configs/config.ini | ||
|
||
clean: | ||
rm -rf $(LLAMA_DIR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
[Settings] | ||
execution_provider = | ||
repo_id = | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
gradio==4.24.0 | ||
gradio==4.27.0 | ||
huggingface_hub==0.21.1 | ||
langchain==0.1.14 | ||
torch==2.2.1 | ||
transformers==4.39.1 | ||
torch==2.1.2 | ||
llama-cpp-python==0.1.9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import subprocess, os | ||
from huggingface_hub import snapshot_download | ||
from configparser import ConfigParser | ||
|
||
config_path = "./configs/config.ini" | ||
|
||
def get_py_cmd(): | ||
config = ConfigParser() | ||
config.read(config_path) | ||
py_cmd = config.get('Settings', 'py_cmd') | ||
if "python3" in py_cmd: | ||
return 'python3' | ||
else: | ||
return 'python' | ||
|
||
def quantize_model(repo_id): | ||
original_models_path = "./src/original_model/" | ||
quantized_path = "./src/quantized_model/" | ||
|
||
repo_id_parts = repo_id.split("/") | ||
model_folder = f"model--{'--'.join(repo_id_parts)}" | ||
model_path = original_models_path + model_folder | ||
|
||
outfile = quantized_path + repo_id.replace("/", "__") + ".gguf" | ||
|
||
if os.path.isfile(outfile): | ||
return outfile | ||
|
||
snapshot_download(repo_id=repo_id, local_dir=model_path , local_dir_use_symlinks=True) | ||
|
||
command = [ | ||
get_py_cmd(), | ||
'./src/llama_cpp/convert-hf-to-gguf.py', | ||
model_path, | ||
'--outtype', 'f16', | ||
'--outfile', outfile | ||
] | ||
|
||
subprocess.run(command, check=True) | ||
|
||
return outfile |
Oops, something went wrong.