LLMs/RAG + AGENT.ipynb
2025-02-02 12:54:04 +03:30

523 lines
20 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "6a2d4624-4d76-4c52-a0f8-b353b6848549",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#Test Dorna\n",
"response: ChatResponse = chat(model='llama3.2', messages=[\n",
" {\n",
" 'role': 'user',\n",
" 'content': 'چرا اینترنت قطع میشه؟',\n",
" },\n",
"])\n",
"print(response['message']['content'])\n",
"print(response.message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a312f740-400a-49a5-a79e-41195aa49746",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!pip install faiss-cpu sentence-transformers ollama numpy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9100712e-4b06-4168-b8ac-36f0c2865e42",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# MANUALLY RAG\n",
"from ollama import chat\n",
"import numpy as np\n",
"import faiss\n",
"from sentence_transformers import SentenceTransformer\n",
"import torch\n",
"import os\n",
"import re\n",
"\n",
"DOCUMENT_PATHS = [\n",
" r'/home/masih/rag_data/Hamrah.txt', #replace path\n",
" r'/home/masih/rag_data/vape.txt',\n",
" r'/home/masih/rag_data/Shah.txt',\n",
" r'/home/masih/rag_data/Khalife.txt',\n",
" r'/home/masih/rag_data/carbon.txt',\n",
" r'/home/masih/rag_data/takapoo.txt'\n",
"]\n",
"\n",
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
"LLM_MODEL = 'llama3.2'\n",
"CHUNK_SIZE = 1000\n",
"OVERLAP = 200\n",
"INDEX_PATH = r'C:\\Users\\ASUS\\Downloads\\doc_index.faiss'\n",
"CHUNK_MAP_PATH = r'C:\\Users\\ASUS\\Downloads\\chunk_map.npy'\n",
"\n",
"class AdvancedRAG:\n",
" def __init__(self):\n",
" self.encoder = SentenceTransformer(EMBEDDING_MODEL, device='cuda' if torch.cuda.is_available() else 'cpu')\n",
" self.index = None\n",
" self.chunk_map = []\n",
" \n",
" def create_index(self):\n",
" \"\"\"Create FAISS index with cosine similarity and document mapping\"\"\"\n",
" all_chunks = []\n",
" doc_mapping = []\n",
" \n",
" # Process via CHUNKING (REQ 4 RAG)\n",
" for doc_idx, path in enumerate(DOCUMENT_PATHS):\n",
" with open(path, 'r', encoding='utf-8') as f:\n",
" text = re.sub(r'\\s+', ' ', f.read()).strip()\n",
" chunks = [text[i:i+CHUNK_SIZE] for i in range(0, len(text), CHUNK_SIZE - OVERLAP)]\n",
" all_chunks.extend(chunks)\n",
" doc_mapping.extend([doc_idx] * len(chunks))\n",
" \n",
" # Normalized embeddings (REQ 4 cosine similarity)\n",
" embeddings = self.encoder.encode(all_chunks, convert_to_tensor=True, device='cuda' if torch.cuda.is_available() else 'cpu')\n",
" embeddings = embeddings.cpu().numpy() # Move back to CPU for FAISS\n",
" \n",
" faiss.normalize_L2(embeddings) \n",
" \n",
" # FAISS index & Mapping\n",
" self.index = faiss.IndexFlatIP(embeddings.shape[1])\n",
" self.index.add(embeddings.astype(np.float32))\n",
" self.chunk_map = np.array(doc_mapping)\n",
" \n",
" # Index \n",
" faiss.write_index(self.index, INDEX_PATH)\n",
" # Mapping \n",
" np.save(CHUNK_MAP_PATH, self.chunk_map)\n",
" \n",
" def load_index(self):\n",
" \"\"\"LOAD EXISTING DATA\"\"\"\n",
" self.index = faiss.read_index(INDEX_PATH)\n",
" self.chunk_map = np.load(CHUNK_MAP_PATH, allow_pickle=True)\n",
" \n",
" def query(self, question, doc_index, top_k=5):\n",
" \"\"\"DOCUMENT-SPECIFIC QUERY WITH COSINE SIMILARITY \"\"\"\n",
" # Encode \n",
" query_embed = self.encoder.encode([question], convert_to_tensor=True, device='cuda' if torch.cuda.is_available() else 'cpu')\n",
" query_embed = query_embed.cpu().numpy() # Move back to CPU for FAISS\n",
" \n",
" # Normalize \n",
" faiss.normalize_L2(query_embed)\n",
" \n",
" distances, indices = self.index.search(query_embed.astype(np.float32), top_k*3)\n",
" \n",
" relevant_chunks = []\n",
" for idx in indices[0]:\n",
" if self.chunk_map[idx] == doc_index:\n",
" relevant_chunks.append(idx)\n",
" if len(relevant_chunks) >= top_k:\n",
" break\n",
" \n",
" return relevant_chunks\n",
"\n",
"class AnswerGenerator:\n",
" def __init__(self, rag_system):\n",
" self.rag = rag_system\n",
" self.chunks = [] \n",
" \n",
" def get_answer(self, question, doc_index):\n",
" \"\"\"GENERATING CONTEXT-AWARE ANSWER\"\"\"\n",
" if not self.chunks:\n",
" self._load_chunks()\n",
" \n",
" chunk_indices = self.rag.query(question, doc_index)\n",
" context = \"\\n\".join([self.chunks[idx] for idx in chunk_indices])\n",
" \n",
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
"{context}\n",
"\n",
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
"\n",
"سوال: {question}\n",
"پاسخ:\"\"\"\n",
" \n",
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
" return response['message']['content']\n",
" \n",
" def _load_chunks(self):\n",
" \"\"\"LOAD ALL CHUNKS(LAZY)\"\"\"\n",
" self.chunks = []\n",
" for path in DOCUMENT_PATHS:\n",
" with open(path, 'r', encoding='utf-8') as f:\n",
" text = re.sub(r'\\s+', ' ', f.read()).strip()\n",
" self.chunks.extend([text[i:i+CHUNK_SIZE] for i in range(0, len(text), CHUNK_SIZE - OVERLAP)])\n",
"\n",
"# MAIN EXE of RAG\n",
"if __name__ == \"__main__\":\n",
" # RAG init\n",
" rag = AdvancedRAG()\n",
" \n",
" if not os.path.exists(INDEX_PATH):\n",
" print(\"Building optimized index...\")\n",
" rag.create_index()\n",
" else:\n",
" print(\"Loading existing index...\")\n",
" rag.load_index()\n",
" # Answer Generator init\n",
" generator = AnswerGenerator(rag)\n",
" \n",
" queries = [\n",
" (\"چرا اینترنت همراه اول گوشی وصل نمیشود؟\", 0),\n",
" (\"چطوری ویپ مورد نظرمو پیدا کنم؟\", 1),\n",
" (\"شاه عباس که بود؟\", 2),\n",
" (\"خلیفه سلطان که بود و چه کرد؟\", 3),\n",
" (\"کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟\", 4),\n",
" (\"شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟\", 5)\n",
" ]\n",
" \n",
" with open(r'C:\\Users\\ASUS\\Downloads\\representation.txt', 'w', encoding='utf-8') as f: #replace path\n",
" for q_idx, (query, doc_idx) in enumerate(queries):\n",
" answer = generator.get_answer(query, doc_idx)\n",
" f.write(f\"سوال {q_idx+1} ({doc_idx+1}):\\n{query}\\n\\nپاسخ:\\n{answer}\\n\\n{'='*50}\\n\\n\")\n",
" print(f\"پردازش سوال {q_idx+1}/{len(queries)} تکمیل شد\")\n",
"\n",
"print(\"تمامی سوالات با موفقیت پردازش شدند!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1acad95b-6ae6-480a-98cc-29b0e38d2646",
"metadata": {},
"outputs": [],
"source": [
"!pip install langchain chromadb sentence-transformers ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5fd21a3-d820-4e6e-aef4-9ce4955ce2ff",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!pip install -U langchain-community"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "960bc8ae-5d3d-401c-93ae-85b45fe9adee",
"metadata": {},
"outputs": [],
"source": [
"# CHROMOA\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.embeddings import SentenceTransformerEmbeddings\n",
"from langchain.schema import Document\n",
"from ollama import chat\n",
"import os\n",
"import re\n",
"\n",
"DOCUMENT_PATHS = [\n",
" r'/home/masih/rag_data/Hamrah.txt', #replace path\n",
" r'/home/masih/rag_data/vape.txt',\n",
" r'/home/masih/rag_data/Shah.txt',\n",
" r'/home/masih/rag_data/Khalife.txt',\n",
" r'/home/masih/rag_data/carbon.txt',\n",
" r'/home/masih/rag_data/takapoo.txt'\n",
"]\n",
"\n",
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
"LLM_MODEL = 'llama3.2'\n",
"CHUNK_SIZE = 1000\n",
"OVERLAP = 200\n",
"CHROMA_PERSIST_DIR = r'\\home\\Masih\\chroma_db\\chroma_db' \n",
"\n",
"class ChromaRAGSystem:\n",
" def __init__(self):\n",
" # Init embedding model\n",
" self.embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)\n",
" # Vector store instance\n",
" self.vector_db = None\n",
" \n",
" def build_vector_store(self):\n",
" \"\"\"Process documents and create Chroma vector store\"\"\"\n",
" all_docs = []\n",
" \n",
"\n",
" for doc_idx, path in enumerate(DOCUMENT_PATHS):\n",
" with open(path, 'r', encoding='utf-8') as f:\n",
" text = re.sub(r'\\s+', ' ', f.read()).strip()\n",
" # sliding window chunking\n",
" chunks = [\n",
" text[i:i+CHUNK_SIZE] \n",
" for i in range(0, len(text), CHUNK_SIZE - OVERLAP)\n",
" ]\n",
" # LangChain documents with metadata\n",
" for chunk in chunks:\n",
" all_docs.append(Document(\n",
" page_content=chunk,\n",
" metadata={\"source_doc\": doc_idx}\n",
" ))\n",
" \n",
" # Chroma vector store\n",
" self.vector_db = Chroma.from_documents(\n",
" documents=all_docs,\n",
" embedding=self.embeddings,\n",
" persist_directory=CHROMA_PERSIST_DIR\n",
" )\n",
" self.vector_db.persist()\n",
" \n",
" def load_vector_store(self):\n",
" \"\"\"Load existing Chroma vector store\"\"\"\n",
" self.vector_db = Chroma(\n",
" persist_directory=CHROMA_PERSIST_DIR,\n",
" embedding_function=self.embeddings\n",
" )\n",
" \n",
" def document_query(self, query, doc_index, top_k=5):\n",
" \"\"\"Retrieve context from specific document\"\"\"\n",
" # Chroma metadata filtering\n",
" results = self.vector_db.similarity_search(\n",
" query=query,\n",
" k=top_k,\n",
" filter={\"source_doc\": doc_index}\n",
" )\n",
" return [doc.page_content for doc in results]\n",
"\n",
"class AnswerGenerator:\n",
" def __init__(self, rag_system):\n",
" self.rag = rag_system\n",
" \n",
" def generate_response(self, question, doc_index):\n",
" \"\"\"Generate context-aware answer using LLM\"\"\"\n",
" # Retrieve relevant context\n",
" context_chunks = self.rag.document_query(question, doc_index)\n",
" context = \"\\n\".join(context_chunks)\n",
" \n",
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
"{context}\n",
"\n",
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
"\n",
"سوال: {question}\n",
"پاسخ:\"\"\"\n",
" \n",
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
" return response['message']['content']\n",
"\n",
"if __name__ == \"__main__\":\n",
" rag_system = ChromaRAGSystem()\n",
" \n",
" # Init vector store\n",
" if not os.path.exists(CHROMA_PERSIST_DIR):\n",
" print(\"Creating new vector store...\")\n",
" rag_system.build_vector_store()\n",
" else:\n",
" print(\"Loading existing vector store...\")\n",
" rag_system.load_vector_store()\n",
" \n",
" # Init answer generator\n",
" answer_engine = AnswerGenerator(rag_system)\n",
"\n",
" queries = [\n",
" (\"چرا اینترنت همراه اول گوشی وصل نمیشود؟\", 0),\n",
" (\"چطوری ویپ مورد نظرمو پیدا کنم؟\", 1),\n",
" (\"شاه عباس که بود؟\", 2),\n",
" (\"خلیفه سلطان که بود و چه کرد؟\", 3),\n",
" (\"کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟\", 4),\n",
" (\"شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟\", 5)\n",
" ]\n",
" \n",
" with open( r'/home/masih/rag_data/response.txt', 'w', encoding='utf-8') as output_file: #repalce path\n",
" for q_num, (query, doc_idx) in enumerate(queries):\n",
" answer = answer_engine.generate_response(query, doc_idx)\n",
" output_file.write(f\"سوال {q_num+1} ({doc_idx+1}):\\n{query}\\n\\nپاسخ:\\n{answer}\\n\\n{'='*50}\\n\\n\")\n",
" print(f\"پردازش سوال {q_num+1}/{len(queries)} تکمیل شد\")\n",
"\n",
"print(\"تمامی سوالات با موفقیت پردازش شدند!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8858c90-1d8d-4b7b-9c25-d94370930f04",
"metadata": {},
"outputs": [],
"source": [
"# AGENT\n",
"from langchain.agents import Tool, initialize_agent, AgentType\n",
"from langchain.memory import ConversationBufferMemory\n",
"from langchain_community.llms import Ollama\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"\n",
"# 1. Add web browsing tool\n",
"def web_browser_tool(url: str) -> str:\n",
" \"\"\"Fetch webpage content (5000 character limit)\"\"\"\n",
" try:\n",
" response = requests.get(url, timeout=10)\n",
" soup = BeautifulSoup(response.text, 'html.parser')\n",
" \n",
" # Clean HTML\n",
" for element in soup(['script', 'style', 'header', 'footer', 'nav']):\n",
" element.decompose()\n",
" \n",
" text = soup.get_text(separator='\\n', strip=True)\n",
" return text[:5000] # Prevent token overflow\n",
" except Exception as e:\n",
" return f\"Error accessing website: {str(e)}\"\n",
"\n",
"# 2. Agent \n",
"class AgentEnhancedGenerator:\n",
" def __init__(self, rag_system):\n",
" self.rag = rag_system # Use RAG \n",
" self.llm = Ollama(model=LLM_MODEL)\n",
" self.memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
" \n",
" # Define tools\n",
" self.tools = [\n",
" Tool(\n",
" name=\"Document_Search\",\n",
" func=self._document_search, # Directly use RAG\n",
" description=\"For questions about mobile networks, historical figures, materials, or companies\"\n",
" ),\n",
" Tool(\n",
" name=\"Web_Browser\",\n",
" func=web_browser_tool,\n",
" description=\"For live web data or current information\"\n",
" )\n",
" ]\n",
" \n",
" # Init agent\n",
" self.agent = initialize_agent(\n",
" tools=self.tools,\n",
" llm=self.llm,\n",
" agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,\n",
" memory=self.memory,\n",
" verbose=True,\n",
" handle_parsing_errors=True,\n",
" max_iterations=3 # Prevent infinite loops\n",
" )\n",
" \n",
" def _document_search(self, query: str) -> str:\n",
" \"\"\"Search across ALL documents in your existing RAG\"\"\"\n",
" results = self.rag.vector_db.similarity_search(query, k=5)\n",
" return \"\\n\".join([doc.page_content for doc in results])\n",
" \n",
" def generate_response(self, question):\n",
" \"\"\"Generate answer using agent system\"\"\"\n",
" try:\n",
" response = self.agent.run(question)\n",
" return response\n",
" except Exception as e:\n",
" return f\"خطا در پردازش: {str(e)}\"\n",
"\n",
"# 3. Example\n",
"if __name__ == \"__main__\":\n",
" # Init RAG\n",
" rag_system = ChromaRAGSystem()\n",
" \n",
" if not os.path.exists(CHROMA_PERSIST_DIR):\n",
" rag_system.build_vector_store()\n",
" else:\n",
" rag_system.load_vector_store()\n",
" \n",
" # Create agent-enhanced generator\n",
" enhanced_agent = AgentEnhancedGenerator(rag_system)\n",
" \n",
" # Test questions (mix of document and web queries)\n",
" test_questions = [\n",
" \"آخرین اخبار درباره شرکت تکاپو صنعت چیست؟\", # Will use web browser\n",
" \"تفاوت کربن اکتیو و کربن بلک چیست؟\", # Uses document search\n",
" \"آیا شاه عباس با خلیفه سلطان همکاری داشت؟\" # Uses conversation memory\n",
" ]\n",
" # Run queries\n",
" with open(r'~\\Desktop\\agent_results.txt', 'w', encoding='utf-8') as output_file:\n",
" for idx, question in enumerate(test_questions):\n",
" answer = enhanced_agent.generate_response(question)\n",
" output_file.write(f\"سوال {idx+1}:\\n{question}\\n\\nپاسخ:\\n{answer}\\n\\n{'='*50}\\n\\n\")\n",
" print(f\"پردازش سوال {idx+1}/{len(test_questions)} تکمیل شد\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbb77f3d-6aec-4414-ae74-4c849eda1ffc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef439935-9a7a-43fb-a314-8adde514551a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c37998a3-c371-4dcd-a8b8-c7ee2b574266",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe3a2891-f668-4105-901e-808d28ba7657",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ecdd22e-d9eb-4d37-8222-d90a4aa8ea3c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a05aea00-3ad5-4b67-8924-87adbb935e25",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:base] *",
"language": "python",
"name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}