Compare commits
No commits in common. "3990c0bc032694df8911f4aa9cf0fb50af2a9649" and "1656df1dc6483af5b8ca3ee60cab41c8b7aadc9c" have entirely different histories.
3990c0bc03
...
1656df1dc6
@ -1,126 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "51aa96d8-9958-465c-8b0d-cd4f35584307",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"from ollama import ChatResponse\n",
|
|
||||||
"\n",
|
|
||||||
"# Define file path \n",
|
|
||||||
"file_path = r'C:\\Users\\ASUS\\Downloads\\responses.txt'\n",
|
|
||||||
"\n",
|
|
||||||
"# First query and save\n",
|
|
||||||
"response = chat(model='partai/dorna-llama3', messages=[\n",
|
|
||||||
" {'role': 'user', 'content': 'چرا آسمان آبیست؟?'},\n",
|
|
||||||
"])\n",
|
|
||||||
"with open(file_path, 'w', encoding='utf-8') as f:\n",
|
|
||||||
" f.write(response['message']['content'] + '\\n\\n')\n",
|
|
||||||
"\n",
|
|
||||||
"# Second query and append\n",
|
|
||||||
"response = chat(model='partai/dorna-llama3', messages=[\n",
|
|
||||||
" {'role': 'user', 'content': 'چرا اینترنت قطع میشه؟'},\n",
|
|
||||||
"])\n",
|
|
||||||
"with open(file_path, 'a', encoding='utf-8') as f:\n",
|
|
||||||
" f.write(response['message']['content'] + '\\n\\n')\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Responses saved to {file_path}\")\n",
|
|
||||||
"\n",
|
|
||||||
"Masih Moafi, [1/24/2025 11:57 PM]\n",
|
|
||||||
"from ollama import chat, embeddings\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
|
||||||
"\n",
|
|
||||||
"# 1. Load and chunk document\n",
|
|
||||||
"with open(r'C:\\Users\\ASUS\\Downloads\\data.txt', 'r', encoding='utf-8') as f:\n",
|
|
||||||
" text = f.read()\n",
|
|
||||||
"\n",
|
|
||||||
"# Split with overlap\n",
|
|
||||||
"chunk_size = 1000\n",
|
|
||||||
"overlap = 200\n",
|
|
||||||
"chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - overlap)]\n",
|
|
||||||
"\n",
|
|
||||||
"# 2. Create embeddings using dorna-llama3\n",
|
|
||||||
"chunk_embeddings = []\n",
|
|
||||||
"for chunk in chunks:\n",
|
|
||||||
" response = embeddings(model='partai/dorna-llama3', prompt=chunk)\n",
|
|
||||||
" chunk_embeddings.append(response['embedding'])\n",
|
|
||||||
"\n",
|
|
||||||
"# 3. Context retrieval system\n",
|
|
||||||
"def find_relevant_chunks(query, top_k=3):\n",
|
|
||||||
" # Generate query embedding\n",
|
|
||||||
" query_embed = embeddings(model='partai/dorna-llama3', prompt=query)['embedding']\n",
|
|
||||||
" \n",
|
|
||||||
" # Calculate similarities\n",
|
|
||||||
" scores = cosine_similarity([query_embed], chunk_embeddings)[0]\n",
|
|
||||||
" \n",
|
|
||||||
" # Return top chunks\n",
|
|
||||||
" best_indices = np.argsort(scores)[-top_k:][::-1]\n",
|
|
||||||
" return \"\\n---\\n\".join([chunks[i] for i in best_indices])\n",
|
|
||||||
"\n",
|
|
||||||
"# 4. RAG-enhanced chat function\n",
|
|
||||||
"def rag_chat(query):\n",
|
|
||||||
" # Retrieve context\n",
|
|
||||||
" context = find_relevant_chunks(query)\n",
|
|
||||||
" \n",
|
|
||||||
" # Create augmented prompt\n",
|
|
||||||
" prompt = f\"\"\"Answer the question using this context:\n",
|
|
||||||
"{context}\n",
|
|
||||||
"\n",
|
|
||||||
"Question: {query}\n",
|
|
||||||
"Answer clearly and concisely in Persian:\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" # Get response\n",
|
|
||||||
" response = chat(model='partai/dorna-llama3', messages=[\n",
|
|
||||||
" {'role': 'user', 'content': prompt}\n",
|
|
||||||
" ])\n",
|
|
||||||
" \n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
"\n",
|
|
||||||
"# Example usage\n",
|
|
||||||
"response = rag_chat(\"چرا اینترنت قطع میشود؟\")\n",
|
|
||||||
"print(\"پاسخ:\", response)\n",
|
|
||||||
"\n",
|
|
||||||
"Masih Moafi, [1/24/2025 11:57 PM]\n",
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"from ollama import ChatResponse\n",
|
|
||||||
"\n",
|
|
||||||
"# Define file path \n",
|
|
||||||
"file_path = r'C:\\Users\\ASUS\\Downloads\\responses.txt'\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"response = chat(model='partai/dorna-llama3', messages=[\n",
|
|
||||||
" {'role': 'user', 'content': 'چرا اینترنت قطع میشه؟'},\n",
|
|
||||||
"])\n",
|
|
||||||
"with open(file_path, 'a', encoding='utf-8') as f:\n",
|
|
||||||
" f.write(response['message']['content'] + '\\n\\n')\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Responses saved to {file_path}\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1 +0,0 @@
|
|||||||

|
|
||||||
@ -1,178 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "636bba8f-4de0-434f-9064-818d96f628bf",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# ADVANCED RAG INTEGRATION\n",
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import faiss\n",
|
|
||||||
"from sentence_transformers import SentenceTransformer\n",
|
|
||||||
"import os\n",
|
|
||||||
"import re\n",
|
|
||||||
"\n",
|
|
||||||
"DOCUMENT_PATHS = [\n",
|
|
||||||
" r'C:\\Users\\ASUS\\Downloads\\Hamrah.txt', #replace path\n",
|
|
||||||
" r'C:\\Users\\ASUS\\Downloads\\vape.txt',\n",
|
|
||||||
" r'C:\\Users\\ASUS\\Downloads\\Shah.txt',\n",
|
|
||||||
" r'C:\\Users\\ASUS\\Downloads\\Khalife.txt',\n",
|
|
||||||
" r'C:\\Users\\ASUS\\Downloads\\carbon.txt',\n",
|
|
||||||
" r'C:\\Users\\ASUS\\Downloads\\takapoo.txt',\n",
|
|
||||||
" r'C:\\Users\\ASUS\\Downloads\\mahmood.txt'\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
|
|
||||||
"LLM_MODEL = 'llama3.2'\n",
|
|
||||||
"CHUNK_SIZE = 1000\n",
|
|
||||||
"OVERLAP = 200\n",
|
|
||||||
"INDEX_PATH = r'C:\\Users\\ASUS\\Downloads\\doc_index.faiss'\n",
|
|
||||||
"CHUNK_MAP_PATH = r'C:\\Users\\ASUS\\Downloads\\chunk_map.npy'\n",
|
|
||||||
"\n",
|
|
||||||
"class AdvancedRAG:\n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" self.encoder = SentenceTransformer(EMBEDDING_MODEL)\n",
|
|
||||||
" self.index = None\n",
|
|
||||||
" self.chunk_map = []\n",
|
|
||||||
" \n",
|
|
||||||
" def create_index(self):\n",
|
|
||||||
" \"\"\"Create FAISS index with cosine similarity and document mapping\"\"\"\n",
|
|
||||||
" all_chunks = []\n",
|
|
||||||
" doc_mapping = []\n",
|
|
||||||
" \n",
|
|
||||||
" # Process via CHUNKING (REQ 4 RAG)\n",
|
|
||||||
" for doc_idx, path in enumerate(DOCUMENT_PATHS):\n",
|
|
||||||
" with open(path, 'r', encoding='utf-8') as f:\n",
|
|
||||||
" text = re.sub(r'\\s+', ' ', f.read()).strip()\n",
|
|
||||||
" chunks = [text[i:i+CHUNK_SIZE] for i in range(0, len(text), CHUNK_SIZE - OVERLAP)]\n",
|
|
||||||
" all_chunks.extend(chunks)\n",
|
|
||||||
" doc_mapping.extend([doc_idx] * len(chunks))\n",
|
|
||||||
" \n",
|
|
||||||
" # Normalized embeddings (REQ 4 cosine similarity)\n",
|
|
||||||
" embeddings = self.encoder.encode(all_chunks)\n",
|
|
||||||
" faiss.normalize_L2(embeddings) \n",
|
|
||||||
" \n",
|
|
||||||
" # FAISS index & Mapping\n",
|
|
||||||
" self.index = faiss.IndexFlatIP(embeddings.shape[1])\n",
|
|
||||||
" self.index.add(embeddings.astype(np.float32))\n",
|
|
||||||
" self.chunk_map = np.array(doc_mapping)\n",
|
|
||||||
" \n",
|
|
||||||
" # Index \n",
|
|
||||||
" faiss.write_index(self.index, INDEX_PATH)\n",
|
|
||||||
" # Mapping \n",
|
|
||||||
" np.save(CHUNK_MAP_PATH, self.chunk_map)\n",
|
|
||||||
" \n",
|
|
||||||
" def load_index(self):\n",
|
|
||||||
" \"\"\"LOAD EXISTING DATA\"\"\"\n",
|
|
||||||
" self.index = faiss.read_index(INDEX_PATH)\n",
|
|
||||||
" self.chunk_map = np.load(CHUNK_MAP_PATH, allow_pickle=True)\n",
|
|
||||||
" \n",
|
|
||||||
" def query(self, question, doc_index, top_k=6):\n",
|
|
||||||
" \"\"\"DOCUMENT-SPECIFIC QUERY WITH COSINE SIMILARITY \"\"\"\n",
|
|
||||||
" # Encode \n",
|
|
||||||
" query_embed = self.encoder.encode([question])\n",
|
|
||||||
" # Normalize \n",
|
|
||||||
" faiss.normalize_L2(query_embed)\n",
|
|
||||||
" \n",
|
|
||||||
" distances, indices = self.index.search(query_embed.astype(np.float32), top_k*3)\n",
|
|
||||||
" \n",
|
|
||||||
" relevant_chunks = []\n",
|
|
||||||
" for idx in indices[0]:\n",
|
|
||||||
" if self.chunk_map[idx] == doc_index:\n",
|
|
||||||
" relevant_chunks.append(idx)\n",
|
|
||||||
" if len(relevant_chunks) >= top_k:\n",
|
|
||||||
" break\n",
|
|
||||||
" \n",
|
|
||||||
" return relevant_chunks\n",
|
|
||||||
"\n",
|
|
||||||
"class AnswerGenerator:\n",
|
|
||||||
" def __init__(self, rag_system):\n",
|
|
||||||
" self.rag = rag_system\n",
|
|
||||||
" self.chunks = [] \n",
|
|
||||||
" \n",
|
|
||||||
" def get_answer(self, question, doc_index):\n",
|
|
||||||
" \"\"\"GENERATING CONTEXT-AWARE ANSWER\"\"\"\n",
|
|
||||||
" if not self.chunks:\n",
|
|
||||||
" self._load_chunks()\n",
|
|
||||||
" \n",
|
|
||||||
" chunk_indices = self.rag.query(question, doc_index)\n",
|
|
||||||
" context = \"\\n\".join([self.chunks[idx] for idx in chunk_indices])\n",
|
|
||||||
" \n",
|
|
||||||
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
|
|
||||||
"{context}\n",
|
|
||||||
"\n",
|
|
||||||
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
|
|
||||||
"\n",
|
|
||||||
"سوال: {question}\n",
|
|
||||||
"پاسخ:\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
" \n",
|
|
||||||
" def _load_chunks(self):\n",
|
|
||||||
" \"\"\"LOAD ALL CHUNKS(LAZY)\"\"\"\n",
|
|
||||||
" self.chunks = []\n",
|
|
||||||
" for path in DOCUMENT_PATHS:\n",
|
|
||||||
" with open(path, 'r', encoding='utf-8') as f:\n",
|
|
||||||
" text = re.sub(r'\\s+', ' ', f.read()).strip()\n",
|
|
||||||
" self.chunks.extend([text[i:i+CHUNK_SIZE] for i in range(0, len(text), CHUNK_SIZE - OVERLAP)])\n",
|
|
||||||
"\n",
|
|
||||||
"# MAIN EXE of RAG\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" # RAG init\n",
|
|
||||||
" rag = AdvancedRAG()\n",
|
|
||||||
" \n",
|
|
||||||
" if not os.path.exists(INDEX_PATH):\n",
|
|
||||||
" print(\"Building optimized index...\")\n",
|
|
||||||
" rag.create_index()\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"Loading existing index...\")\n",
|
|
||||||
" rag.load_index()\n",
|
|
||||||
" # Answer Generator init\n",
|
|
||||||
" generator = AnswerGenerator(rag)\n",
|
|
||||||
" \n",
|
|
||||||
" queries = [\n",
|
|
||||||
" (\"چرا اینترنت همراه اول گوشی وصل نمیشود؟\", 0),\n",
|
|
||||||
" (\"چطوری ویپ مورد نظرمو پیدا کنم؟\", 1),\n",
|
|
||||||
" (\"شاه عباس که بود؟\", 2),\n",
|
|
||||||
" (\"خلیفه سلطان که بود و چه کرد؟\", 3),\n",
|
|
||||||
" (\"کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟\", 4),\n",
|
|
||||||
" (\"شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟\", 5),\n",
|
|
||||||
" (\"6 ,\"سید محمود خلیفه سلطانی کیست؟\"),\n",
|
|
||||||
" ]\n",
|
|
||||||
" \n",
|
|
||||||
" with open(r'C:\\Users\\ASUS\\Downloads\\representation.txt', 'w', encoding='utf-8') as f: #replace path\n",
|
|
||||||
" for q_idx, (query, doc_idx) in enumerate(queries):\n",
|
|
||||||
" answer = generator.get_answer(query, doc_idx)\n",
|
|
||||||
" f.write(f\"سوال {q_idx+1} ({doc_idx+1}):\\n{query}\\n\\nپاسخ:\\n{answer}\\n\\n{'='*50}\\n\\n\")\n",
|
|
||||||
" print(f\"پردازش سوال {q_idx+1}/{len(queries)} تکمیل شد\")\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"تمامی سوالات با موفقیت پردازش شدند!\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1 +0,0 @@
|
|||||||

|
|
||||||
@ -1,160 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "20da3ce4-6291-40de-8068-e66beb639137",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# CHROMOA\n",
|
|
||||||
"from langchain.vectorstores import Chroma\n",
|
|
||||||
"from langchain.embeddings import SentenceTransformerEmbeddings\n",
|
|
||||||
"from langchain.schema import Document\n",
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"import os\n",
|
|
||||||
"import re\n",
|
|
||||||
"\n",
|
|
||||||
"DOCUMENT_PATHS = [\n",
|
|
||||||
" r'/home/masih/rag_data/Hamrah.txt', #replace path\n",
|
|
||||||
" r'/home/masih/rag_data/vape.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/Shah.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/Khalife.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/carbon.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/takapoo.txt'\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
|
|
||||||
"LLM_MODEL = 'gemma2:9b'\n",
|
|
||||||
"CHUNK_SIZE = 1000\n",
|
|
||||||
"OVERLAP = 200\n",
|
|
||||||
"CHROMA_PERSIST_DIR = r'\\home\\Masih\\chroma_db\\chroma_db' \n",
|
|
||||||
"\n",
|
|
||||||
"class ChromaRAGSystem:\n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" # Init embedding model\n",
|
|
||||||
" self.embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)\n",
|
|
||||||
" # Vector store instance\n",
|
|
||||||
" self.vector_db = None\n",
|
|
||||||
" \n",
|
|
||||||
" def build_vector_store(self):\n",
|
|
||||||
" \"\"\"Process documents and create Chroma vector store\"\"\"\n",
|
|
||||||
" all_docs = []\n",
|
|
||||||
" \n",
|
|
||||||
"\n",
|
|
||||||
" for doc_idx, path in enumerate(DOCUMENT_PATHS):\n",
|
|
||||||
" with open(path, 'r', encoding='utf-8') as f:\n",
|
|
||||||
" text = re.sub(r'\\s+', ' ', f.read()).strip()\n",
|
|
||||||
" # sliding window chunking\n",
|
|
||||||
" chunks = [\n",
|
|
||||||
" text[i:i+CHUNK_SIZE] \n",
|
|
||||||
" for i in range(0, len(text), CHUNK_SIZE - OVERLAP)\n",
|
|
||||||
" ]\n",
|
|
||||||
" # LangChain documents with metadata\n",
|
|
||||||
" for chunk in chunks:\n",
|
|
||||||
" all_docs.append(Document(\n",
|
|
||||||
" page_content=chunk,\n",
|
|
||||||
" metadata={\"source_doc\": doc_idx}\n",
|
|
||||||
" ))\n",
|
|
||||||
" \n",
|
|
||||||
" # Chroma vector store\n",
|
|
||||||
" self.vector_db = Chroma.from_documents(\n",
|
|
||||||
" documents=all_docs,\n",
|
|
||||||
" embedding=self.embeddings,\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR\n",
|
|
||||||
" )\n",
|
|
||||||
" self.vector_db.persist()\n",
|
|
||||||
" \n",
|
|
||||||
" def load_vector_store(self):\n",
|
|
||||||
" \"\"\"Load existing Chroma vector store\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma(\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR,\n",
|
|
||||||
" embedding_function=self.embeddings\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" def document_query(self, query, doc_index, top_k=5):\n",
|
|
||||||
" \"\"\"Retrieve context from specific document\"\"\"\n",
|
|
||||||
" # Chroma metadata filtering\n",
|
|
||||||
" results = self.vector_db.similarity_search(\n",
|
|
||||||
" query=query,\n",
|
|
||||||
" k=top_k,\n",
|
|
||||||
" filter={\"source_doc\": doc_index}\n",
|
|
||||||
" )\n",
|
|
||||||
" return [doc.page_content for doc in results]\n",
|
|
||||||
"\n",
|
|
||||||
"class AnswerGenerator:\n",
|
|
||||||
" def __init__(self, rag_system):\n",
|
|
||||||
" self.rag = rag_system\n",
|
|
||||||
" \n",
|
|
||||||
" def generate_response(self, question, doc_index):\n",
|
|
||||||
" \"\"\"Generate context-aware answer using LLM\"\"\"\n",
|
|
||||||
" # Retrieve relevant context\n",
|
|
||||||
" context_chunks = self.rag.document_query(question, doc_index)\n",
|
|
||||||
" context = \"\\n\".join(context_chunks)\n",
|
|
||||||
" \n",
|
|
||||||
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
|
|
||||||
"{context}\n",
|
|
||||||
"\n",
|
|
||||||
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
|
|
||||||
"\n",
|
|
||||||
"سوال: {question}\n",
|
|
||||||
"پاسخ:\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" rag_system = ChromaRAGSystem()\n",
|
|
||||||
" \n",
|
|
||||||
" # Init vector store\n",
|
|
||||||
" if not os.path.exists(CHROMA_PERSIST_DIR):\n",
|
|
||||||
" print(\"Creating new vector store...\")\n",
|
|
||||||
" rag_system.build_vector_store()\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"Loading existing vector store...\")\n",
|
|
||||||
" rag_system.load_vector_store()\n",
|
|
||||||
" \n",
|
|
||||||
" # Init answer generator\n",
|
|
||||||
" answer_engine = AnswerGenerator(rag_system)\n",
|
|
||||||
"\n",
|
|
||||||
" queries = [\n",
|
|
||||||
" (\"چرا اینترنت همراه اول گوشی وصل نمیشود؟\", 0),\n",
|
|
||||||
" (\"چطوری ویپ مورد نظرمو پیدا کنم؟\", 1),\n",
|
|
||||||
" (\"شاه عباس که بود؟\", 2),\n",
|
|
||||||
" (\"خلیفه سلطان که بود و چه کرد؟\", 3),\n",
|
|
||||||
" (\"کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟\", 4),\n",
|
|
||||||
" (\"شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟\", 5)\n",
|
|
||||||
" ]\n",
|
|
||||||
" \n",
|
|
||||||
" with open( r'/home/masih/rag_data/response.txt', 'w', encoding='utf-8') as output_file: #repalce path\n",
|
|
||||||
" for q_num, (query, doc_idx) in enumerate(queries):\n",
|
|
||||||
" answer = answer_engine.generate_response(query, doc_idx)\n",
|
|
||||||
" output_file.write(f\"سوال {q_num+1} ({doc_idx+1}):\\n{query}\\n\\nپاسخ:\\n{answer}\\n\\n{'='*50}\\n\\n\")\n",
|
|
||||||
" print(f\"پردازش سوال {q_num+1}/{len(queries)} تکمیل شد\")\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"تمامی سوالات با موفقیت پردازش شدند!\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
@ -1,155 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "0d92bf90-2548-4a24-87f7-2d87a7dbbd4c",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.vectorstores import Chroma\n",
|
|
||||||
"from langchain.embeddings import SentenceTransformerEmbeddings\n",
|
|
||||||
"from langchain.schema import Document\n",
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"import os\n",
|
|
||||||
"import re\n",
|
|
||||||
"# CHANGED THE ORDER OF PATHS AND IT RETRIEVED THE RESPONSES CORRECTLY. \n",
|
|
||||||
"DOCUMENT_PATHS = [\n",
|
|
||||||
" r'/home/masih/rag_data/vape.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/Hamrah.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/Shah.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/Khalife.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/takapoo.txt',\n",
|
|
||||||
" r'/home/masih/rag_data/carbon.txt',\n",
|
|
||||||
"\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
|
|
||||||
"LLM_MODEL = 'gemma2:9b'\n",
|
|
||||||
"CHUNK_SIZE = 1000\n",
|
|
||||||
"OVERLAP = 200\n",
|
|
||||||
"CHROMA_PERSIST_DIR = r'\\home\\Masih\\chroma_db\\chroma_db'\n",
|
|
||||||
"\n",
|
|
||||||
"class ChromaRAGSystem:\n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" # Init embedding model\n",
|
|
||||||
" self.embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)\n",
|
|
||||||
" # Vector store instance\n",
|
|
||||||
" self.vector_db = None\n",
|
|
||||||
" \n",
|
|
||||||
" def build_vector_store(self):\n",
|
|
||||||
" \"\"\"Process documents and create Chroma vector store\"\"\"\n",
|
|
||||||
" all_docs = []\n",
|
|
||||||
"\n",
|
|
||||||
" for doc_idx, path in enumerate(DOCUMENT_PATHS):\n",
|
|
||||||
" with open(path, 'r', encoding='utf-8') as f:\n",
|
|
||||||
" text = re.sub(r'\\s+', ' ', f.read()).strip()\n",
|
|
||||||
" # sliding window chunking\n",
|
|
||||||
" chunks = [\n",
|
|
||||||
" text[i:i+CHUNK_SIZE] \n",
|
|
||||||
" for i in range(0, len(text), CHUNK_SIZE - OVERLAP)\n",
|
|
||||||
" ]\n",
|
|
||||||
" # LangChain documents with metadata\n",
|
|
||||||
" for chunk in chunks:\n",
|
|
||||||
" all_docs.append(Document(\n",
|
|
||||||
" page_content=chunk,\n",
|
|
||||||
" metadata={\"source_doc\": doc_idx}\n",
|
|
||||||
" ))\n",
|
|
||||||
"\n",
|
|
||||||
" # Chroma vector store\n",
|
|
||||||
" self.vector_db = Chroma.from_documents(\n",
|
|
||||||
" documents=all_docs,\n",
|
|
||||||
" embedding=self.embeddings,\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR\n",
|
|
||||||
" )\n",
|
|
||||||
" self.vector_db.persist()\n",
|
|
||||||
" \n",
|
|
||||||
" def load_vector_store(self):\n",
|
|
||||||
" \"\"\"Load existing Chroma vector store\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma(\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR,\n",
|
|
||||||
" embedding_function=self.embeddings\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" def document_query(self, query, top_k=5):\n",
|
|
||||||
" \"\"\"Retrieve context from all documents based on query\"\"\"\n",
|
|
||||||
" # Perform similarity search across all documents\n",
|
|
||||||
" results = self.vector_db.similarity_search(query=query, k=top_k)\n",
|
|
||||||
" return [doc.page_content for doc in results]\n",
|
|
||||||
"\n",
|
|
||||||
"class AnswerGenerator:\n",
|
|
||||||
" def __init__(self, rag_system):\n",
|
|
||||||
" self.rag = rag_system\n",
|
|
||||||
" \n",
|
|
||||||
" def generate_response(self, question):\n",
|
|
||||||
" \"\"\"Generate context-aware answer using LLM\"\"\"\n",
|
|
||||||
" # Retrieve relevant context from the best matching documents\n",
|
|
||||||
" context_chunks = self.rag.document_query(question)\n",
|
|
||||||
" context = \"\\n\".join(context_chunks)\n",
|
|
||||||
" \n",
|
|
||||||
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
|
|
||||||
"{context}\n",
|
|
||||||
"\n",
|
|
||||||
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
|
|
||||||
"\n",
|
|
||||||
"سوال: {question}\n",
|
|
||||||
"پاسخ:\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" rag_system = ChromaRAGSystem()\n",
|
|
||||||
" \n",
|
|
||||||
" # Init vector store\n",
|
|
||||||
" if not os.path.exists(CHROMA_PERSIST_DIR):\n",
|
|
||||||
" print(\"Creating new vector store...\")\n",
|
|
||||||
" rag_system.build_vector_store()\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"Loading existing vector store...\")\n",
|
|
||||||
" rag_system.load_vector_store()\n",
|
|
||||||
" \n",
|
|
||||||
" # Init answer generator\n",
|
|
||||||
" answer_engine = AnswerGenerator(rag_system)\n",
|
|
||||||
"\n",
|
|
||||||
" queries = [\n",
|
|
||||||
" \"چرا اینترنت همراه اول گوشی وصل نمیشود؟\",\n",
|
|
||||||
" \"چطوری ویپ مورد نظرمو پیدا کنم؟\",\n",
|
|
||||||
" \"شاه عباس که بود؟\",\n",
|
|
||||||
" \"خلیفه سلطان که بود و چه کرد؟\",\n",
|
|
||||||
" \"کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟\",\n",
|
|
||||||
" \"شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟\"\n",
|
|
||||||
" ]\n",
|
|
||||||
" \n",
|
|
||||||
" with open( r'/home/masih/rag_data/response2.txt', 'w', encoding='utf-8') as output_file: \n",
|
|
||||||
" for q_num, query in enumerate(queries):\n",
|
|
||||||
" answer = answer_engine.generate_response(query)\n",
|
|
||||||
" output_file.write(f\"سوال {q_num+1}:\\n{query}\\n\\nپاسخ:\\n{answer}\\n\\n{'='*50}\\n\\n\")\n",
|
|
||||||
" print(f\"پردازش سوال {q_num+1}/{len(queries)} تکمیل شد\")\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"تمامی سوالات با موفقیت پردازش شدند!\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
110
4. Readme.md
110
4. Readme.md
@ -1,110 +0,0 @@
|
|||||||
سوال 1:
|
|
||||||
چرا اینترنت همراه اول گوشی وصل نمیشود؟
|
|
||||||
|
|
||||||
پاسخ:
|
|
||||||
یکی از دلایل وصل نشدن اینترنت همراه اول گوشی به APN (Access Point Name) مربوط می شود. برای حل این مشکل باید مراحل زیر را انجام دهید:
|
|
||||||
|
|
||||||
1. به مسیر Settings > More > Mobile Network > Access Point Names بروید.
|
|
||||||
2. با توجه به اسم سیم کارت، آن را حذف کنید.
|
|
||||||
3. کلید Menu را لمس کرده و سپس new APN را انتخاب کرده و عبارات زیر را وارد کنید: name:mci-gprs apn:mcinet
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
سوال 2:
|
|
||||||
چطوری ویپ مورد نظرمو پیدا کنم؟
|
|
||||||
|
|
||||||
پاسخ:
|
|
||||||
برای انتخاب ویپ، به ویژگیهایی مثل بودجه، نیازها و سلیقه خود توجه کنید.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
سوال 3:
|
|
||||||
شاه عباس که بود؟
|
|
||||||
|
|
||||||
پاسخ:
|
|
||||||
شاه عباس اوّل، فرزند سلطان محمّد خدابنده صفوی و پنجمین پادشاه سلسله صفویّه، در اوّل رمضان ۹۷۸ق در هرات به دنیا آمد.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
سوال 4:
|
|
||||||
خلیفه سلطان که بود و چه کرد؟
|
|
||||||
|
|
||||||
پاسخ:
|
|
||||||
خلیفه سلطان از شاعران و منشیان معروف عصر صفوی بود که توسط خلیفه سلطان به منصب واقعهنویسی منصوب شد. آثار ادبی و تاریخی مهمی از او به جا مانده است.
|
|
||||||
|
|
||||||
او نه تنها از ادیبان و شاعران حمایت میکرد، بلکه از بخششها و صلههای خود محروم نمیساخت. خلیفه سلطان فردی عالم و روحانی بود که در امور نظامی و لشکر نیز نقش موثر داشت.
|
|
||||||
|
|
||||||
نقش او در فتح قندهار به حدی مهم بود که برخی این انتساب را به خاطر جدش امیر علاءالدین، یکی از خلفاى صوفى بر طریقت سلسله متصوفه صفویه میدانند.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
سوال 5:
|
|
||||||
کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟
|
|
||||||
|
|
||||||
پاسخ:
|
|
||||||
## تفاوت کربن سیاه (بلک) و کربن فعال
|
|
||||||
|
|
||||||
|
|
||||||
**کربن سیاه** (همچنین به عنوان کربن بلک شناخته میشود) نوعی کربن بی شکل است. این پودر سیاه، سبک، شل و بسیار ریز با مساحت سطح بالایی بین 10 تا 3000 متر مربع در گرم است. از احتراق ناقص فرآوردههای نفتی سنگین مانند قیر FCC، قیر زغال سنگ یا قیر کراکینگ اتیلن تولید میشود.
|
|
||||||
|
|
||||||
**ویژگیهای کربن سیاه:**
|
|
||||||
|
|
||||||
* نوعی کربن پاراکریستالین است با نسبت سطح به حجم بالایی (اما کمتر از کربن فعال).
|
|
||||||
* نسبت به دوده دارای نسبت سطح به حجم پایین تری و محتوای هیدروکربن های آروماتیک چند حلقه ای نیز بطور قابل توجهی کمتر است.
|
|
||||||
* شکل دانه ها کروی است، اما نظم بلوری آن نسبت به گرافیت در سطح پائینتری قرار دارد.
|
|
||||||
* با حرارت دادن در دمای ۳۰۰۰ درجه سانتیگراد به مدت طولانی به گرافیت تبدیل می شود.
|
|
||||||
|
|
||||||
**کاربردهای کربن سیاه:**
|
|
||||||
|
|
||||||
* رنگدانه سیاه در جوهرها، رنگ آمیزی رزین و تونرها
|
|
||||||
* تولید فلاپی دیسکها و غشاهای ضد الکتریسته ساکن
|
|
||||||
* الیاف
|
|
||||||
* پودر خوراکی (رنگدانه سانست یلو، بتاکاروتن، بیت روت)
|
|
||||||
|
|
||||||
**کربن فعال** یک کربن تصفیه شده است که از مواد اولیه ارگانیک (پوسته، زغال سنگ، چوب و غیره) در غیاب هوا گرم می شود تا اجزای غیرکربنی را کاهش دهد. سپس با گاز و سطح واکنش می دهد تا ساختاری با ریز حفره های توسعه یافته تولید کند.
|
|
||||||
|
|
||||||
**کاربردهای کربن فعال:**
|
|
||||||
|
|
||||||
* تصفیه آب و هوا
|
|
||||||
* تصفیه فاضلاب
|
|
||||||
* جداسازی مواد شیمیایی
|
|
||||||
|
|
||||||
**تفاوت کربن سیاه و کربن فعال:**
|
|
||||||
|
|
||||||
| ویژگی | کربن سیاه | کربن فعال |
|
|
||||||
|---|---|---|
|
|
||||||
| ساختار | بی شکل، پاراکریستالین | ساختاری با ریز حفره های توسعه یافته |
|
|
||||||
| تولید | احتراق ناقص فرآوردههای نفتی | حرارت دادن مواد ارگانیک در غیاب هوا |
|
|
||||||
| کاربرد | رنگدانه، تقویت کننده لاستیک | تصفیه، جداسازی |
|
|
||||||
|
|
||||||
|
|
||||||
**جمع بندی:**
|
|
||||||
|
|
||||||
کربن سیاه و کربن فعال هر دو از کربن ساخته شده اند اما ساختار، روش تولید و کاربردهایشان متفاوت است.
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
سوال 6:
|
|
||||||
شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟
|
|
||||||
|
|
||||||
پاسخ:
|
|
||||||
شرکت تکاپو صنعت نامی (اپتیم پارک) انواع مختلف پارکینگ مکانیزه نظیر پالت لس، پالت لس کامباین، روتاری، پازلی و ستونی (مسکونی) را تولید می کند.
|
|
||||||
|
|
||||||
این شرکت به دلیل بومی سازی تکنولوژی، بهره گیری از تجربیات صنعتی و متدهای منحصر بفرد در طراحی و تولید پارکینگ های مکانیزه و اخذ گواهینامه CE اروپا، ISO 14001, ISO9001, ISO45001 و تاییدیه از سازمان آتش نشانی و خدمات ایمنی منحصر به فرد است.
|
|
||||||
|
|
||||||
متاسفانه در متن اطلاعاتی در مورد روند سهام این شرکت ذکر نشده است.
|
|
||||||
@ -1,133 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "6b7fb76c-3c87-4f8c-9635-a2bddf5bc8ea",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.vectorstores import Chroma\n",
|
|
||||||
"from langchain.embeddings import SentenceTransformerEmbeddings\n",
|
|
||||||
"from langchain.schema import Document\n",
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"import os\n",
|
|
||||||
"import re\n",
|
|
||||||
"import requests\n",
|
|
||||||
"from bs4 import BeautifulSoup\n",
|
|
||||||
"\n",
|
|
||||||
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
|
|
||||||
"LLM_MODEL = 'gemma2:9b'\n",
|
|
||||||
"CHUNK_SIZE = 1000\n",
|
|
||||||
"OVERLAP = 200\n",
|
|
||||||
"CHROMA_PERSIST_DIR = r'\\home\\Masih\\chroma_db\\chroma_db'\n",
|
|
||||||
"\n",
|
|
||||||
"class ChromaRAGSystem:\n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" # Init embedding model\n",
|
|
||||||
" self.embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)\n",
|
|
||||||
" # Vector store instance\n",
|
|
||||||
" self.vector_db = None\n",
|
|
||||||
" \n",
|
|
||||||
" def build_vector_store(self, documents):\n",
|
|
||||||
" \"\"\"Create Chroma vector store from documents\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma.from_documents(\n",
|
|
||||||
" documents=documents,\n",
|
|
||||||
" embedding=self.embeddings,\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR\n",
|
|
||||||
" )\n",
|
|
||||||
" self.vector_db.persist()\n",
|
|
||||||
" \n",
|
|
||||||
" def load_vector_store(self):\n",
|
|
||||||
" \"\"\"Load existing Chroma vector store\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma(\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR,\n",
|
|
||||||
" embedding_function=self.embeddings\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" def document_query(self, query, top_k=5):\n",
|
|
||||||
" \"\"\"Retrieve context from documents based on query\"\"\"\n",
|
|
||||||
" # Perform similarity search across all documents\n",
|
|
||||||
" results = self.vector_db.similarity_search(query=query, k=top_k)\n",
|
|
||||||
" return [doc.page_content for doc in results]\n",
|
|
||||||
"\n",
|
|
||||||
"class AnswerGenerator:\n",
|
|
||||||
" def __init__(self, rag_system):\n",
|
|
||||||
" self.rag = rag_system\n",
|
|
||||||
" \n",
|
|
||||||
" def generate_response(self, question):\n",
|
|
||||||
" \"\"\"Generate context-aware answer using LLM\"\"\"\n",
|
|
||||||
" # Retrieve relevant context from the best matching documents\n",
|
|
||||||
" context_chunks = self.rag.document_query(question)\n",
|
|
||||||
" context = \"\\n\".join(context_chunks)\n",
|
|
||||||
" \n",
|
|
||||||
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
|
|
||||||
"{context}\n",
|
|
||||||
"\n",
|
|
||||||
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
|
|
||||||
"\n",
|
|
||||||
"سوال: {question}\n",
|
|
||||||
"پاسخ:\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
"\n",
|
|
||||||
"def scrape_url(url):\n",
|
|
||||||
" \"\"\"Scrape the content from a given URL\"\"\"\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
|
||||||
"\n",
|
|
||||||
" # Extract the article text (adjust this as per the specific page's structure)\n",
|
|
||||||
" paragraphs = soup.find_all('p')\n",
|
|
||||||
" article_text = \"\\n\".join([para.get_text() for para in paragraphs])\n",
|
|
||||||
"\n",
|
|
||||||
" return article_text\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" url = \"https://tosinso.com/articles/40596\"\n",
|
|
||||||
" article_content = scrape_url(url)\n",
|
|
||||||
"\n",
|
|
||||||
" # Process the scraped content and create a vector store\n",
|
|
||||||
" rag_system = ChromaRAGSystem()\n",
|
|
||||||
"\n",
|
|
||||||
" # Chunk the article content\n",
|
|
||||||
" chunks = [article_content[i:i+CHUNK_SIZE] for i in range(0, len(article_content), CHUNK_SIZE - OVERLAP)]\n",
|
|
||||||
" documents = [Document(page_content=chunk) for chunk in chunks]\n",
|
|
||||||
"\n",
|
|
||||||
" # Build vector store\n",
|
|
||||||
" rag_system.build_vector_store(documents)\n",
|
|
||||||
"\n",
|
|
||||||
" # Init answer generator\n",
|
|
||||||
" answer_engine = AnswerGenerator(rag_system)\n",
|
|
||||||
"\n",
|
|
||||||
" # The query to be answered\n",
|
|
||||||
" query = \"تفاوت زیروکلاینت و تین کلاینت با PC در چیست؟\"\n",
|
|
||||||
"\n",
|
|
||||||
" # Generate and print the response\n",
|
|
||||||
" answer = answer_engine.generate_response(query)\n",
|
|
||||||
" print(answer)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python [conda env:base] *",
|
|
||||||
"language": "python",
|
|
||||||
"name": "conda-base-py"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
11
5.Readme.md
11
5.Readme.md
@ -1,11 +0,0 @@
|
|||||||
درست مانند رایانههای شخصی (PC)، زیرو و تین کلاینتها کاربردهای مخصوص به خود را دارند.
|
|
||||||
|
|
||||||
رایانههای شخصی دارای تمامی امکانات لازم هستند؛ اما از نظر امنیتی دچار ضعفهایی میباشند. از آنجا که نیازی به تمامی امکانات موجود در رایانههای شخصی وجود ندارد و کاهش سطح امنیت در این سیستمها مدنظر است، سیستمهای زیرو و تین کلاینت شکل گرفتند.
|
|
||||||
|
|
||||||
**تین کلاینت:**
|
|
||||||
این سیستمها به منابع خود از طریق سرورها دسترسی پیدا میکنند و از قدرت پردازش بیشتری نسبت به زیرو کلاینتها برخوردارند.
|
|
||||||
|
|
||||||
**زیرو کلاینت:**
|
|
||||||
تمامی پردازشها به صورت متمرکز بر روی سرور انجام میشود و دستگاه تنها به عنوان رابط کاربری برای نمایش خروجی عمل میکند.
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,187 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "f9327343-3e11-4a88-b798-95ff4644e2a5",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.vectorstores import Chroma\n",
|
|
||||||
"from langchain.embeddings import SentenceTransformerEmbeddings\n",
|
|
||||||
"from langchain.schema import Document\n",
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"import os\n",
|
|
||||||
"import re\n",
|
|
||||||
"import requests\n",
|
|
||||||
"from bs4 import BeautifulSoup\n",
|
|
||||||
"\n",
|
|
||||||
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
|
|
||||||
"LLM_MODEL = 'gemma2:9b'\n",
|
|
||||||
"CHUNK_SIZE = 1000\n",
|
|
||||||
"OVERLAP = 200\n",
|
|
||||||
"CHROMA_PERSIST_DIR = r'\\home\\Masih\\chroma_db\\chroma_db'\n",
|
|
||||||
"\n",
|
|
||||||
"class ChromaRAGSystem:\n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" # Init embedding model\n",
|
|
||||||
" self.embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)\n",
|
|
||||||
" # Vector store instance\n",
|
|
||||||
" self.vector_db = None\n",
|
|
||||||
" \n",
|
|
||||||
" def build_vector_store(self, documents):\n",
|
|
||||||
" \"\"\"Create Chroma vector store from documents\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma.from_documents(\n",
|
|
||||||
" documents=documents,\n",
|
|
||||||
" embedding=self.embeddings,\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR\n",
|
|
||||||
" )\n",
|
|
||||||
" self.vector_db.persist()\n",
|
|
||||||
" \n",
|
|
||||||
" def load_vector_store(self):\n",
|
|
||||||
" \"\"\"Load existing Chroma vector store\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma(\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR,\n",
|
|
||||||
" embedding_function=self.embeddings\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" def document_query(self, query, top_k=5):\n",
|
|
||||||
" \"\"\"Retrieve context from documents based on query\"\"\"\n",
|
|
||||||
" # Perform similarity search across all documents\n",
|
|
||||||
" results = self.vector_db.similarity_search(query=query, k=top_k)\n",
|
|
||||||
" return [doc.page_content for doc in results]\n",
|
|
||||||
"\n",
|
|
||||||
"class AnswerGenerator:\n",
|
|
||||||
" def __init__(self, rag_system):\n",
|
|
||||||
" self.rag = rag_system\n",
|
|
||||||
" \n",
|
|
||||||
" def generate_response(self, question):\n",
|
|
||||||
" \"\"\"Generate context-aware answer using LLM\"\"\"\n",
|
|
||||||
" # Retrieve relevant context from the best matching documents\n",
|
|
||||||
" context_chunks = self.rag.document_query(question)\n",
|
|
||||||
" context = \"\\n\".join(context_chunks)\n",
|
|
||||||
" \n",
|
|
||||||
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
|
|
||||||
"{context}\n",
|
|
||||||
"\n",
|
|
||||||
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
|
|
||||||
"\n",
|
|
||||||
"سوال: {question}\n",
|
|
||||||
"پاسخ:\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
"\n",
|
|
||||||
"def scrape_url(url):\n",
|
|
||||||
" \"\"\"Scrape the content from a given URL\"\"\"\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
|
||||||
"\n",
|
|
||||||
" # Extract the article text (adjust this as per the specific page's structure)\n",
|
|
||||||
" paragraphs = soup.find_all('p')\n",
|
|
||||||
" article_text = \"\\n\".join([para.get_text() for para in paragraphs])\n",
|
|
||||||
"\n",
|
|
||||||
" return article_text\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" url = \"https://tosinso.com/articles/40596\"\n",
|
|
||||||
" article_content = scrape_url(url)\n",
|
|
||||||
"\n",
|
|
||||||
" # Process the scraped content and create a vector store\n",
|
|
||||||
" rag_system = ChromaRAGSystem()\n",
|
|
||||||
"\n",
|
|
||||||
" # Chunk the article content\n",
|
|
||||||
" chunks = [article_content[i:i+CHUNK_SIZE] for i in range(0, len(article_content), CHUNK_SIZE - OVERLAP)]\n",
|
|
||||||
" documents = [Document(page_content=chunk) for chunk in chunks]\n",
|
|
||||||
"\n",
|
|
||||||
" # Build vector store\n",
|
|
||||||
" rag_system.build_vector_store(documents)\n",
|
|
||||||
"\n",
|
|
||||||
" # Init answer generator\n",
|
|
||||||
" answer_engine = AnswerGenerator(rag_system)\n",
|
|
||||||
"\n",
|
|
||||||
" # The query to be answered\n",
|
|
||||||
" query = \"تفاوت زیروکلاینت و تین کلاینت با PC در چیست؟\"\n",
|
|
||||||
"\n",
|
|
||||||
" # Generate and print the response\n",
|
|
||||||
" answer = answer_engine.generate_response(query)\n",
|
|
||||||
" print(answer)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "cc9f2290-d16f-4722-857a-7996d4722857",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import requests\n",
|
|
||||||
"from bs4 import BeautifulSoup\n",
|
|
||||||
"\n",
|
|
||||||
"def search_internet(query):\n",
|
|
||||||
" \"\"\"Search the web for the given query and return a relevant snippet.\"\"\"\n",
|
|
||||||
" query = query.replace(\" \", \"+\") # Format the query for URLs\n",
|
|
||||||
" url = f\"https://www.google.com/search?q={query}\"\n",
|
|
||||||
" \n",
|
|
||||||
" # Send a GET request to Google (NOTE: scraping Google directly can get blocked)\n",
|
|
||||||
" headers = {\n",
|
|
||||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\"\n",
|
|
||||||
" }\n",
|
|
||||||
" response = requests.get(url, headers=headers)\n",
|
|
||||||
"\n",
|
|
||||||
" if response.status_code != 200:\n",
|
|
||||||
" return \"Error: Unable to retrieve data from the internet.\"\n",
|
|
||||||
" \n",
|
|
||||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
|
||||||
"\n",
|
|
||||||
" # Scrape content from search result snippets (extract the first result snippet)\n",
|
|
||||||
" search_results = soup.find_all('div', {'class': 'BNeawe iBp4i AP7Wnd'})\n",
|
|
||||||
" if search_results:\n",
|
|
||||||
" return search_results[0].get_text()\n",
|
|
||||||
" \n",
|
|
||||||
" return \"No relevant information found on the web.\"\n",
|
|
||||||
"\n",
|
|
||||||
"def generate_answer(query):\n",
|
|
||||||
" \"\"\"Generate an answer by first checking Wikipedia and then searching the internet.\"\"\"\n",
|
|
||||||
" # First, check Wikipedia for Persian content\n",
|
|
||||||
" wikipedia_answer = search_wikipedia(query)\n",
|
|
||||||
" if wikipedia_answer and \"Error\" not in wikipedia_answer:\n",
|
|
||||||
" return wikipedia_answer\n",
|
|
||||||
" \n",
|
|
||||||
" # If not found in Wikipedia, search the web\n",
|
|
||||||
" internet_answer = search_internet(query)\n",
|
|
||||||
" return internet_answer\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" query = \"شاه عباس صفوی که بود و چه کرد؟\"\n",
|
|
||||||
" \n",
|
|
||||||
" # Get the answer from Wikipedia and Internet search\n",
|
|
||||||
" answer = generate_answer(query)\n",
|
|
||||||
" \n",
|
|
||||||
" # Print the answer\n",
|
|
||||||
" print(f\"Answer: {answer}\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1,7 +0,0 @@
|
|||||||
# پاسخ
|
|
||||||
|
|
||||||
عباس یکم (۲۷ ژانویهٔ ۱۵۷۱ – ۱۹ ژانویهٔ ۱۶۲۹)، مشهور به شاه عباس کبیر، پنجمین پادشاه ایران صفوی بود که از سال ۱۵۸۷ تا ۱۶۲۹ میلادی حکومت کرد. او با کمک قبایل **قزلباش**، **شاملو** و **استاجلو** و طی جنگهای داخلی شهرهای خراسان که از سال ۱۵۸۰ میلادی آغاز گشت، قدرت را از پدرش، شاه محمد خدابنده، ستاند. در سال ۱۵۸۷ میلادی، در ۱۷ سالگی پادشاه ایران شد و به مدت ۴۱ سال سلطنت کرد.
|
|
||||||
|
|
||||||
شاه عباس هنگامی به سلطنت رسید که مناطق غرب و شمال غربی ایران در تصرف امپراتوری عثمانی بودند؛ خراسان و شمال شرق ایران نیز تحت اشغال ازبکها قرار داشتند. طی ۱۲ سال پس از مرگ شاه تهماسب یکم و دوران پادشاهی شاه اسماعیل دوم و شاه محمد خدابنده، قدرت شاه کاهش یافته بود و دستهبندیهای قبیلهای قزلباشها مجدداً به شکل فزایندهای بروز کرده بود. همچنین، دوگانگی میان قزلباشان ترکمان و تاجیک در دربار شدت پیدا کرده بود.
|
|
||||||
|
|
||||||

|
|
||||||
@ -1,287 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "f9327343-3e11-4a88-b798-95ff4644e2a5",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.vectorstores import Chroma\n",
|
|
||||||
"from langchain.embeddings import SentenceTransformerEmbeddings\n",
|
|
||||||
"from langchain.schema import Document\n",
|
|
||||||
"from ollama import chat\n",
|
|
||||||
"import os\n",
|
|
||||||
"import re\n",
|
|
||||||
"import requests\n",
|
|
||||||
"from bs4 import BeautifulSoup\n",
|
|
||||||
"\n",
|
|
||||||
"EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n",
|
|
||||||
"LLM_MODEL = 'gemma2:9b'\n",
|
|
||||||
"CHUNK_SIZE = 1000\n",
|
|
||||||
"OVERLAP = 200\n",
|
|
||||||
"CHROMA_PERSIST_DIR = r'\\home\\Masih\\chroma_db\\chroma_db'\n",
|
|
||||||
"\n",
|
|
||||||
"class ChromaRAGSystem:\n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" # Init embedding model\n",
|
|
||||||
" self.embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)\n",
|
|
||||||
" # Vector store instance\n",
|
|
||||||
" self.vector_db = None\n",
|
|
||||||
" \n",
|
|
||||||
" def build_vector_store(self, documents):\n",
|
|
||||||
" \"\"\"Create Chroma vector store from documents\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma.from_documents(\n",
|
|
||||||
" documents=documents,\n",
|
|
||||||
" embedding=self.embeddings,\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR\n",
|
|
||||||
" )\n",
|
|
||||||
" self.vector_db.persist()\n",
|
|
||||||
" \n",
|
|
||||||
" def load_vector_store(self):\n",
|
|
||||||
" \"\"\"Load existing Chroma vector store\"\"\"\n",
|
|
||||||
" self.vector_db = Chroma(\n",
|
|
||||||
" persist_directory=CHROMA_PERSIST_DIR,\n",
|
|
||||||
" embedding_function=self.embeddings\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" def document_query(self, query, top_k=5):\n",
|
|
||||||
" \"\"\"Retrieve context from documents based on query\"\"\"\n",
|
|
||||||
" # Perform similarity search across all documents\n",
|
|
||||||
" results = self.vector_db.similarity_search(query=query, k=top_k)\n",
|
|
||||||
" return [doc.page_content for doc in results]\n",
|
|
||||||
"\n",
|
|
||||||
"class AnswerGenerator:\n",
|
|
||||||
" def __init__(self, rag_system):\n",
|
|
||||||
" self.rag = rag_system\n",
|
|
||||||
" \n",
|
|
||||||
" def generate_response(self, question):\n",
|
|
||||||
" \"\"\"Generate context-aware answer using LLM\"\"\"\n",
|
|
||||||
" # Retrieve relevant context from the best matching documents\n",
|
|
||||||
" context_chunks = self.rag.document_query(question)\n",
|
|
||||||
" context = \"\\n\".join(context_chunks)\n",
|
|
||||||
" \n",
|
|
||||||
" prompt = f\"\"\"با استفاده از متن زیر به سوال پاسخ دهید:\n",
|
|
||||||
"{context}\n",
|
|
||||||
"\n",
|
|
||||||
"اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید\n",
|
|
||||||
"\n",
|
|
||||||
"سوال: {question}\n",
|
|
||||||
"پاسخ:\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])\n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
"\n",
|
|
||||||
"def scrape_url(url):\n",
|
|
||||||
" \"\"\"Scrape the content from a given URL\"\"\"\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
|
||||||
"\n",
|
|
||||||
" # Extract the article text (adjust this as per the specific page's structure)\n",
|
|
||||||
" paragraphs = soup.find_all('p')\n",
|
|
||||||
" article_text = \"\\n\".join([para.get_text() for para in paragraphs])\n",
|
|
||||||
"\n",
|
|
||||||
" return article_text\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" url = \"https://tosinso.com/articles/40596\"\n",
|
|
||||||
" article_content = scrape_url(url)\n",
|
|
||||||
"\n",
|
|
||||||
" # Process the scraped content and create a vector store\n",
|
|
||||||
" rag_system = ChromaRAGSystem()\n",
|
|
||||||
"\n",
|
|
||||||
" # Chunk the article content\n",
|
|
||||||
" chunks = [article_content[i:i+CHUNK_SIZE] for i in range(0, len(article_content), CHUNK_SIZE - OVERLAP)]\n",
|
|
||||||
" documents = [Document(page_content=chunk) for chunk in chunks]\n",
|
|
||||||
"\n",
|
|
||||||
" # Build vector store\n",
|
|
||||||
" rag_system.build_vector_store(documents)\n",
|
|
||||||
"\n",
|
|
||||||
" # Init answer generator\n",
|
|
||||||
" answer_engine = AnswerGenerator(rag_system)\n",
|
|
||||||
"\n",
|
|
||||||
" # The query to be answered\n",
|
|
||||||
" query = \"تفاوت زیروکلاینت و تین کلاینت با PC در چیست؟\"\n",
|
|
||||||
"\n",
|
|
||||||
" # Generate and print the response\n",
|
|
||||||
" answer = answer_engine.generate_response(query)\n",
|
|
||||||
" print(answer)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "cc9f2290-d16f-4722-857a-7996d4722857",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import requests\n",
|
|
||||||
"from bs4 import BeautifulSoup\n",
|
|
||||||
"\n",
|
|
||||||
"def search_internet(query):\n",
|
|
||||||
" \"\"\"Search the web for the given query and return a relevant snippet.\"\"\"\n",
|
|
||||||
" query = query.replace(\" \", \"+\") # Format the query for URLs\n",
|
|
||||||
" url = f\"https://www.google.com/search?q={query}\"\n",
|
|
||||||
" \n",
|
|
||||||
" # Send a GET request to Google (NOTE: scraping Google directly can get blocked)\n",
|
|
||||||
" headers = {\n",
|
|
||||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\"\n",
|
|
||||||
" }\n",
|
|
||||||
" response = requests.get(url, headers=headers)\n",
|
|
||||||
"\n",
|
|
||||||
" if response.status_code != 200:\n",
|
|
||||||
" return \"Error: Unable to retrieve data from the internet.\"\n",
|
|
||||||
" \n",
|
|
||||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
|
||||||
"\n",
|
|
||||||
" # Scrape content from search result snippets (extract the first result snippet)\n",
|
|
||||||
" search_results = soup.find_all('div', {'class': 'BNeawe iBp4i AP7Wnd'})\n",
|
|
||||||
" if search_results:\n",
|
|
||||||
" return search_results[0].get_text()\n",
|
|
||||||
" \n",
|
|
||||||
" return \"No relevant information found on the web.\"\n",
|
|
||||||
"\n",
|
|
||||||
"def generate_answer(query):\n",
|
|
||||||
" \"\"\"Generate an answer by first checking Wikipedia and then searching the internet.\"\"\"\n",
|
|
||||||
" # First, check Wikipedia for Persian content\n",
|
|
||||||
" wikipedia_answer = search_wikipedia(query)\n",
|
|
||||||
" if wikipedia_answer and \"Error\" not in wikipedia_answer:\n",
|
|
||||||
" return wikipedia_answer\n",
|
|
||||||
" \n",
|
|
||||||
" # If not found in Wikipedia, search the web\n",
|
|
||||||
" internet_answer = search_internet(query)\n",
|
|
||||||
" return internet_answer\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" query = \"شاه عباس صفوی که بود و چه کرد؟\"\n",
|
|
||||||
" \n",
|
|
||||||
" # Get the answer from Wikipedia and Internet search\n",
|
|
||||||
" answer = generate_answer(query)\n",
|
|
||||||
" \n",
|
|
||||||
" # Print the answer\n",
|
|
||||||
" print(f\"Answer: {answer}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "091053f6-8c5d-4cd7-89a2-08690ed1f2d4",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import requests\n",
|
|
||||||
"from bs4 import BeautifulSoup\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"def search_duckduckgo(query):\n",
|
|
||||||
" \"\"\"Search DuckDuckGo for the given query and return URLs of the top results.\"\"\"\n",
|
|
||||||
" query = query.replace(\" \", \"+\") # Format the query for DuckDuckGo search URLs\n",
|
|
||||||
" url = f\"https://duckduckgo.com/html/?q={query}\"\n",
|
|
||||||
" \n",
|
|
||||||
" headers = {\n",
|
|
||||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\"\n",
|
|
||||||
" }\n",
|
|
||||||
" \n",
|
|
||||||
" try:\n",
|
|
||||||
" response = requests.get(url, headers=headers, timeout=5)\n",
|
|
||||||
" if response.status_code != 200:\n",
|
|
||||||
" print(\"Failed to fetch search results.\")\n",
|
|
||||||
" return []\n",
|
|
||||||
"\n",
|
|
||||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
|
||||||
"\n",
|
|
||||||
" # Find all result URLs\n",
|
|
||||||
" search_results = soup.find_all('a', {'class': 'result__a'}, href=True)\n",
|
|
||||||
" urls = []\n",
|
|
||||||
" for result in search_results:\n",
|
|
||||||
" href = result['href']\n",
|
|
||||||
" if href.startswith('http'):\n",
|
|
||||||
" urls.append(href)\n",
|
|
||||||
" return urls[:5] # Limit to the first 5 URLs\n",
|
|
||||||
" except requests.RequestException as e:\n",
|
|
||||||
" print(f\"Error fetching DuckDuckGo search results: {e}\")\n",
|
|
||||||
" return []\n",
|
|
||||||
"\n",
|
|
||||||
"def crawl_website(url):\n",
|
|
||||||
" \"\"\"Crawl a website and extract text content.\"\"\"\n",
|
|
||||||
" try:\n",
|
|
||||||
" headers = {\"User-Agent\": \"Mozilla/5.0\"}\n",
|
|
||||||
" response = requests.get(url, headers=headers, timeout=5)\n",
|
|
||||||
" response.raise_for_status()\n",
|
|
||||||
" \n",
|
|
||||||
" soup = BeautifulSoup(response.text, \"html.parser\")\n",
|
|
||||||
" paragraphs = soup.find_all(\"p\")\n",
|
|
||||||
" \n",
|
|
||||||
" # Extract text from the first few paragraphs\n",
|
|
||||||
" text = \" \".join([p.get_text(strip=True) for p in paragraphs[:3]]) # Limit to first 3 paragraphs\n",
|
|
||||||
" return text\n",
|
|
||||||
" except requests.RequestException as e:\n",
|
|
||||||
" print(f\"Error crawling {url}: {e}\")\n",
|
|
||||||
" return None\n",
|
|
||||||
"\n",
|
|
||||||
"def generate_answer(query):\n",
|
|
||||||
" \"\"\"Generate an answer by searching DuckDuckGo and crawling the web.\"\"\"\n",
|
|
||||||
" # Search DuckDuckGo for relevant URLs\n",
|
|
||||||
" urls = search_duckduckgo(query)\n",
|
|
||||||
" if not urls:\n",
|
|
||||||
" print(\"No search results found.\")\n",
|
|
||||||
" return\n",
|
|
||||||
"\n",
|
|
||||||
" # Crawl the first few URLs and collect the text content\n",
|
|
||||||
" data = {}\n",
|
|
||||||
" for url in urls:\n",
|
|
||||||
" print(f\"Crawling: {url}\")\n",
|
|
||||||
" text = crawl_website(url)\n",
|
|
||||||
" if text:\n",
|
|
||||||
" data[url] = text\n",
|
|
||||||
" time.sleep(2) # Delay to prevent being blocked by DuckDuckGo\n",
|
|
||||||
"\n",
|
|
||||||
" # Find the most fitting answer by choosing the content from the first URL (or the most relevant content)\n",
|
|
||||||
" if data:\n",
|
|
||||||
" print(\"\\nBest answer found:\\n\")\n",
|
|
||||||
" return list(data.values())[0] # Pick content from the first valid URL\n",
|
|
||||||
" else:\n",
|
|
||||||
" return \"No suitable content found.\"\n",
|
|
||||||
"\n",
|
|
||||||
"def save_results_to_file(content, file_path):\n",
|
|
||||||
" \"\"\"Save the generated answer to a text file.\"\"\"\n",
|
|
||||||
" try:\n",
|
|
||||||
" with open(file_path, 'w', encoding='utf-8') as f:\n",
|
|
||||||
" f.write(content)\n",
|
|
||||||
" print(f\"Results saved to {file_path}\")\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Error saving results to file: {e}\")\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" query = input(\"Enter your question: \")\n",
|
|
||||||
" answer = generate_answer(query)\n",
|
|
||||||
" \n",
|
|
||||||
" # Save the answer to the file if it exists\n",
|
|
||||||
" if answer:\n",
|
|
||||||
" file_path = r'/home/masih/rag_data/search_results.txt' \n",
|
|
||||||
" save_results_to_file(answer, file_path)\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1 +0,0 @@
|
|||||||
مجموعهی تکاپو صنعت نامی با نام تجاری اپتیم پارک، به پشتوانه تجربه 60 سالهی مدیران این شرکت در حوزهی تجهیزات صنعتی مربوط به صنایع فولاد، پتروشیمی و نیروگاهی تاسیس شد. این مجموعه در کنار ادامه فعالیت در حوزه های پیشین، فعالیت خود در زمینه ماشین سازی و ساختمان را با جدیت آغاز نمود که این امر، منجر به ورود تخصصی این شرکت به حوزه طراحی و تولید انواع مختلف پارکینگ مکانیزه نظیر پالت لس، پالت لس کامباین، روتاری، پازلی و ستونی (مسکونی) با نام تجاری اپتیم پارک گردید. محصولات طراحی و تولید شده در مجموعه اپتیم پارک با توجه به بومی سازی تکنولوژی و بهره گیری از تجربیات صنعتی و متدهای منحصر بفرد، در کمترین زمان ممکن جای خود را در میان مشتریان، شامل ارگان های دولتی و بخش خصوصی باز نمود. مجموعه اپتیم پارک در مسیر خود، همواره فرآیند ایده پردازی، طراحی و ساخت نسل های آتی محصولات را در برنامه دارد و نوآوری و خلاقیت در ارائه محصولات را رکن اساسی در مسیر تعالی خود می داند و در راستای دستیابی به اهداف خود، موفق به اخذ گواهینامه CE اروپا، ISO 14001, ISO9001, ISO45001 و تاییدیه از سازمان آتش نشانی و خدمات ایمنی شده است. خط مشی مدیریت یکپارچه شرکت تکاپو صنعت نامی (اپتیم پارک) شرکت تکاپو صنعت نامی (اپتیم پارک) به عنوان اولین شرکت دانش بنیان در زمینه طراحی، توسعه و ساخت پارکینگهای مکانیزه به منظور حصول اطمینان از بهبود فضای کسب و کار، جهتگیری راهبردی مناسب با بافت سازمانی شرکت، ترویج تفکر مبتنی بر ریسک و رویکرد فرآیندی و دستیابی به نتایج مورد انتظار از سیستم مدیریت یکپارچه، ضمن رعایت الزامات قانونی و مقرراتی، استانداردهای ISO9001:2015 را به عنوان الگویی برای سیستم مدیریت کیفیت و استاندارد ISO14001:2015 را برای سیستم مدیریت محیط زیست و ISO45001:2018 سیستم مدیریت ایمنی و بهداشت حرفه ای انتخاب نموده است.
|
|
||||||
43
8. Readme.md
43
8. Readme.md
@ -1,43 +0,0 @@
|
|||||||
|
|
||||||
شاه عباس صفوی یک شخصیت پیچیده در تاریخ ایران است.
|
|
||||||
|
|
||||||
**از نظر مثبت:**
|
|
||||||
|
|
||||||
* **اصلاحات و رونق:** او به خاطر اصلاحاتش در ارتش و حکومت، رونق اقتصادی و فرهنگی ایران در دوره اش شناخته میشود. اصفهان را به پایتخت باشکوهی تبدیل کرد که نماد شکوفایی صفوی بود.
|
|
||||||
* **گسترش قلمرو:** در جستجوها اشارت شده است که او قلمرو ایران را گسترش داد.
|
|
||||||
|
|
||||||
**از نظر منفی:**
|
|
||||||
|
|
||||||
* **بیرحمی:** در نتایج گوگل آمده است، اگرچه برخی او را بزرگترین پادشاه صفوی میدانند، اما نسبت به مخالفان سیاسی بیرحم بود.
|
|
||||||
|
|
||||||
|
|
||||||
در کل، شاه عباس صفوی یک حاکم قدرتمند و اثرگذار بود که ایران را از نظر اقتصادی و فرهنگی رو به پیش برد، اما روشهایش در برخورد با مخالفان انتقادبرانگیز بوده است.
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
**روند فکری من:**
|
|
||||||
من ابتدا به جستوجوهای گوگل، داک داک گو و ویکیپدیا در زبان فارسی نگاه کردم تا تصویر کلی از شاه عباس صفوی و اقدامات او را بدست بیاورم. با توجه به اینکه سئوال از نظر اخلاقی است، تمرکز اصلی من روی توصیفات "بیرحم" در مورد رفتار او با مخالفان سیاسی بود.
|
|
||||||
|
|
||||||
**پاسخ:**
|
|
||||||
شاه عباس صفوی به خاطر اصلاحاتش در ارتش و حکومت، رونق اقتصادی و فرهنگی ایران، و گسترش قلمرو شناخته میشود. او اصفهان را به پایتخت باشکوهی تبدیل کرد و بسیاری او را بزرگترین پادشاه صفوی میدانند. با این حال، برخی منابع او را نسبت به مخالفان سیاسی بیرحم توصیف میکنند.
|
|
||||||
|
|
||||||
بنابراین، آیا شاه عباس آدم خوبی بود؟ این یک سئوال است که پاسخ قطعی ندارد و بستگی به معیارهای شما برای "خوب" دارد. از یک سو، دستاوردهای او در زمینه توسعه و رونق ایران قابل ستایش هستند. از سوی دیگر، بیرحمی نسبت به مخالفانش، یک جنبه تاریک از شخصیت او محسوب میشود.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
**روند فکری من:**
|
|
||||||
|
|
||||||
1. پرسش به زبان فارسی است، پس ابتدا باید بر روی نتایج جستجوی فارسی تمرکز کنم.
|
|
||||||
2. نتایج گوگل و داک داک گو را برای درک مفهوم "چراغ DNS مودم" و معنی خاموش شدن آن مورد بررسی قرار دادم.
|
|
||||||
3. به نظر میرسد همه منابع به یک نتیجه مشترک رسیده اند: خاموش شدن چراغ DNS، نشانه عدم اتصال مودم به سرورهای DNS است.
|
|
||||||
|
|
||||||
**پاسخ:**
|
|
||||||
|
|
||||||
وقتی چراغ DNS روی مودم خاموش میشود، معمولاً به این معنی است که مودم نمیتواند به سرورهای DNS متصل شود. DNS سیستمی است که نامهای دامنه (مانند google.com) را به آدرسهای IP ترجمه میکند. بدون DNS، شما میتوانید به اینترنت متصل باشید، اما نمیتوانید وبسایتها را با نام باز کنید.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
@ -1,199 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "3724fa5d-3e2f-4a69-9824-0acc2443dadf",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import httpx\n",
|
|
||||||
"import json\n",
|
|
||||||
"from crewai import Agent, Task, Crew\n",
|
|
||||||
"from langchain.agents import tool\n",
|
|
||||||
"from langchain_community.llms import Ollama\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"DOCS_DIR = \"docs\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Load documents from the 'docs' directory\n",
|
|
||||||
"def load_documents():\n",
|
|
||||||
" \"\"\"Loads all .txt files from the 'docs' directory and returns a list of (filename, content) tuples.\"\"\"\n",
|
|
||||||
" if not os.path.exists(DOCS_DIR):\n",
|
|
||||||
" return []\n",
|
|
||||||
" documents = []\n",
|
|
||||||
" for filename in os.listdir(DOCS_DIR):\n",
|
|
||||||
" if filename.endswith(\".txt\"):\n",
|
|
||||||
" with open(os.path.join(DOCS_DIR, filename), \"r\", encoding=\"utf-8\") as f:\n",
|
|
||||||
" content = f.read()\n",
|
|
||||||
" documents.append((filename, content))\n",
|
|
||||||
" return documents\n",
|
|
||||||
"\n",
|
|
||||||
"DOCUMENTS = load_documents()\n",
|
|
||||||
"\n",
|
|
||||||
"@tool\n",
|
|
||||||
"def google_search(query: str) -> str:\n",
|
|
||||||
" \"\"\"Searches Google using Serper API for web results based on the query.\n",
|
|
||||||
"\n",
|
|
||||||
" Args:\n",
|
|
||||||
" query (str): The search query.\n",
|
|
||||||
"\n",
|
|
||||||
" Returns:\n",
|
|
||||||
" str: A summary of Google search results from Serper or a message if the search fails.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" url = \"https://google.serper.dev/search\"\n",
|
|
||||||
" headers = {\n",
|
|
||||||
" \"X-API-KEY\": \"ebda9508d11e4cc8c5091494a76b783fda7fe6da\",\n",
|
|
||||||
" \"Content-Type\": \"application/json\"\n",
|
|
||||||
" }\n",
|
|
||||||
" response = httpx.post(url, json={\"q\": query}, headers=headers)\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" data = response.json()\n",
|
|
||||||
" results = []\n",
|
|
||||||
" for result in data.get(\"organic\", []):\n",
|
|
||||||
" title = result.get(\"title\", \"No title\")\n",
|
|
||||||
" snippet = result.get(\"snippet\", \"\")\n",
|
|
||||||
" link = result.get(\"link\", \"\")\n",
|
|
||||||
" results.append(f\"{title}: {snippet} ({link})\")\n",
|
|
||||||
" if len(results) >= 3:\n",
|
|
||||||
" break\n",
|
|
||||||
" return \"\\n\".join(results) if results else \"No results found using Google Serper.\"\n",
|
|
||||||
" return \"Error performing Google Serper search.\"\n",
|
|
||||||
"\n",
|
|
||||||
"@tool\n",
|
|
||||||
"def search_docs(query: str) -> str:\n",
|
|
||||||
" \"\"\"Searches local documents for relevant information based on the query.\n",
|
|
||||||
" \n",
|
|
||||||
" Args:\n",
|
|
||||||
" query (str): The search query.\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" str: Snippets from documents matching the query or a message if no documents are found.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" if not DOCUMENTS:\n",
|
|
||||||
" return \"No documents found.\"\n",
|
|
||||||
" query_words = query.lower().split()\n",
|
|
||||||
" results = []\n",
|
|
||||||
" for filename, content in DOCUMENTS:\n",
|
|
||||||
" lines = content.split('\\n')\n",
|
|
||||||
" for line in lines:\n",
|
|
||||||
" if any(word in line.lower() for word in query_words):\n",
|
|
||||||
" results.append(f\"From {filename}: {line[:200]}\")\n",
|
|
||||||
" if len(results) >= 3:\n",
|
|
||||||
" break\n",
|
|
||||||
" if len(results) >= 3:\n",
|
|
||||||
" break\n",
|
|
||||||
" return \"\\n\".join(results) if results else \"No relevant information found in documents.\"\n",
|
|
||||||
"\n",
|
|
||||||
"@tool\n",
|
|
||||||
"def wikipedia(query: str) -> str:\n",
|
|
||||||
" \"\"\"Searches Persian Wikipedia for a summary based on the query.\n",
|
|
||||||
" \n",
|
|
||||||
" Args:\n",
|
|
||||||
" query (str): The search query.\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" str: The snippet from the first Wikipedia search result or a message if no results are found.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" response = httpx.get(\"https://fa.wikipedia.org/w/api.php\", params={\n",
|
|
||||||
" \"action\": \"query\",\n",
|
|
||||||
" \"list\": \"search\",\n",
|
|
||||||
" \"srsearch\": query,\n",
|
|
||||||
" \"format\": \"json\",\n",
|
|
||||||
" \"utf8\": 1\n",
|
|
||||||
" })\n",
|
|
||||||
" results = response.json().get(\"query\", {}).get(\"search\", [])\n",
|
|
||||||
" if results:\n",
|
|
||||||
" return results[0].get(\"snippet\", \"اطلاعاتی یافت نشد.\")\n",
|
|
||||||
" return \"اطلاعاتی یافت نشد در ویکیپدیا.\"\n",
|
|
||||||
"\n",
|
|
||||||
"@tool\n",
|
|
||||||
"def duckduckgo(query: str) -> str:\n",
|
|
||||||
" \"\"\"Searches DuckDuckGo for web results based on the query.\n",
|
|
||||||
" \n",
|
|
||||||
" Args:\n",
|
|
||||||
" query (str): The search query.\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" str: A placeholder message with search results or a message if the search fails.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" response = httpx.get(\"https://duckduckgo.com/html/\", params={\"q\": query})\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" return \"نتایج جستجوی DuckDuckGo: [صفحه 1, صفحه 2, صفحه 3]\"\n",
|
|
||||||
" return \"نتایجی از DuckDuckGo یافت نشد.\"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"llm = Ollama(model=\"llama3.2\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Create the agent with CrewAI prioritizing Google Serper search first\n",
|
|
||||||
"agent = Agent(\n",
|
|
||||||
" role=\"researcher\",\n",
|
|
||||||
" goal=\"Answer the user's question by primarily using Google search results from the Serper API, then using local documents, Persian Wikipedia, and DuckDuckGo if needed.\",\n",
|
|
||||||
" backstory=\"You are an AI assistant designed to efficiently gather and synthesize information from multiple sources, prioritizing Google search results from the Serper API.\",\n",
|
|
||||||
" tools=[google_search, search_docs, wikipedia, duckduckgo],\n",
|
|
||||||
" llm=llm,\n",
|
|
||||||
" verbose=True\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"def query(question):\n",
|
|
||||||
" \"\"\"Runs the query through the agent and saves the result.\n",
|
|
||||||
" \n",
|
|
||||||
" Args:\n",
|
|
||||||
" question (str): The user's question.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" task = Task(\n",
|
|
||||||
" description=question,\n",
|
|
||||||
" expected_output=\"A detailed answer to the user's question.\",\n",
|
|
||||||
" agent=agent\n",
|
|
||||||
" )\n",
|
|
||||||
" crew = Crew(\n",
|
|
||||||
" agents=[agent],\n",
|
|
||||||
" tasks=[task],\n",
|
|
||||||
" verbose=2\n",
|
|
||||||
" )\n",
|
|
||||||
" result = crew.kickoff()\n",
|
|
||||||
" print(result)\n",
|
|
||||||
" save_to_file(str(result))\n",
|
|
||||||
"\n",
|
|
||||||
"def save_to_file(text, path=\"C:/Users/ASUS/Downloads/Month 2/agents.txt\"):\n",
|
|
||||||
" \"\"\"Saves the result to a file.\n",
|
|
||||||
" \n",
|
|
||||||
" Args:\n",
|
|
||||||
" text (str): The text to save.\n",
|
|
||||||
" path (str): The file path to save the text.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" with open(path, 'a', encoding='utf-8') as f:\n",
|
|
||||||
" f.write(text + \"\\n\" + \"=\"*50 + \"\\n\")\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" # Example queries to test the agent\n",
|
|
||||||
" q1 = \"شاه عباس صفوی آدم خوبی بوده؟ چرا؟\"\n",
|
|
||||||
" q2 = \"وقتی چراغ DNS مودم قطع میشه به چه معنیه؟\"\n",
|
|
||||||
" query(q1)\n",
|
|
||||||
" query(q2)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1,111 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"id": "f65de83c-0d36-4e69-93c2-a2c5fb0f6811",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Disable Proxy\n",
|
|
||||||
"\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"def clear_proxy_settings():\n",
|
|
||||||
" for var in [\"HTTP_PROXY\", \"HTTPS_PROXY\", \"ALL_PROXY\", \"http_proxy\", \"https_proxy\", \"all_proxy\"]:\n",
|
|
||||||
" if var in os.environ:\n",
|
|
||||||
" del os.environ[var]\n",
|
|
||||||
"\n",
|
|
||||||
"clear_proxy_settings()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"id": "c4a3951c-fcd7-4a05-a141-f6e09aacc111",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"بر اساس متن موجود، سید محمود خلیفه سلطانی مدیرعامل محترم شرکت تکابو صنعت نامی (اینجن پارک) می باشد. \n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
" \n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import requests\n",
|
|
||||||
"import json\n",
|
|
||||||
"from ollama import chat \n",
|
|
||||||
"\n",
|
|
||||||
"def image_to_gemma_response(image_path, query):\n",
|
|
||||||
" # Extract text from image using Landing AI API\n",
|
|
||||||
" url = \"https://api.va.landing.ai/v1/tools/agentic-document-analysis\"\n",
|
|
||||||
" files = {\"image\": open(image_path, \"rb\")}\n",
|
|
||||||
" headers = {\"Authorization\": \"Basic YWdmbjl0d200emV3cjVicHFzeXpuOmFBWGJCTGZRUFBsMzZVVWF3aXFVYVduY0hGVUdIQmhq\"}\n",
|
|
||||||
" response = requests.post(url, files=files, headers=headers)\n",
|
|
||||||
" data = response.json()\n",
|
|
||||||
" \n",
|
|
||||||
" # Get markdown text from response\n",
|
|
||||||
" extracted_text = \"\"\n",
|
|
||||||
" if \"data\" in data and \"markdown\" in data[\"data\"]:\n",
|
|
||||||
" extracted_text = data[\"data\"][\"markdown\"]\n",
|
|
||||||
" \n",
|
|
||||||
" # Create prompt for Gemma\n",
|
|
||||||
" prompt = f\"\"\"\n",
|
|
||||||
" # متن استخراج شده از تصویر:\n",
|
|
||||||
" {extracted_text}\n",
|
|
||||||
" \n",
|
|
||||||
" # سوال:\n",
|
|
||||||
" {query}\n",
|
|
||||||
" \n",
|
|
||||||
" لطفاً با استفاده از اطلاعات بالا، به سوال پاسخ دهید. پاسخ را به فارسی بنویسید.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
" # Get response from Gemma\n",
|
|
||||||
" messages = [{\"role\": \"user\", \"content\": prompt}]\n",
|
|
||||||
" response = chat(model=\"gemma2:9b\", messages=messages)\n",
|
|
||||||
" return response['message']['content']\n",
|
|
||||||
"\n",
|
|
||||||
"# Usage\n",
|
|
||||||
"image_path = \"/home/masih/Desktop/Temp/m.jpg\"\n",
|
|
||||||
"query = \"سید محمود خلیفه سلطانی کیست؟\"\n",
|
|
||||||
"result = image_to_gemma_response(image_path, query)\n",
|
|
||||||
"print(result)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "b8920fdd-80d2-4299-ab66-2809bee95f99",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python [conda env:base] *",
|
|
||||||
"language": "python",
|
|
||||||
"name": "conda-base-py"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
11
9. Readme.md
11
9. Readme.md
@ -1,11 +0,0 @@
|
|||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
Other features such as memory were added but not specifically mentioned.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
@ -1 +0,0 @@
|
|||||||

|
|
||||||
@ -1,238 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "213b722c-b0d3-489c-a736-521e0d34dade",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import nltk\n",
|
|
||||||
"from langchain_community.document_loaders import PDFPlumberLoader, WebBaseLoader\n",
|
|
||||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
|
||||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
|
||||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
|
||||||
"from langchain_ollama import OllamaEmbeddings, ChatOllama\n",
|
|
||||||
"from langchain_community.retrievers import BM25Retriever\n",
|
|
||||||
"from langchain.retrievers import EnsembleRetriever\n",
|
|
||||||
"from typing_extensions import TypedDict\n",
|
|
||||||
"from langgraph.graph import START, END, StateGraph\n",
|
|
||||||
"\n",
|
|
||||||
"# Ensure NLTK tokenizer is available\n",
|
|
||||||
"try:\n",
|
|
||||||
" nltk.data.find('tokenizers/punkt')\n",
|
|
||||||
"except LookupError:\n",
|
|
||||||
" nltk.download('punkt')\n",
|
|
||||||
"\n",
|
|
||||||
"# Initialize model and embeddings\n",
|
|
||||||
"model = ChatOllama(model=\"gemma3:12b\", temperature=0.2)\n",
|
|
||||||
"embeddings = OllamaEmbeddings(model=\"gemma3:12b\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Vector store\n",
|
|
||||||
"vector_store = InMemoryVectorStore(embeddings)\n",
|
|
||||||
"\n",
|
|
||||||
"# Templates\n",
|
|
||||||
"qa_template = \"\"\"\n",
|
|
||||||
"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. \n",
|
|
||||||
"If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n",
|
|
||||||
"Question: {question} \n",
|
|
||||||
"Context: {context} \n",
|
|
||||||
"Answer:\n",
|
|
||||||
"\"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Text splitter\n",
|
|
||||||
"def split_text(documents):\n",
|
|
||||||
" text_splitter = RecursiveCharacterTextSplitter(\n",
|
|
||||||
" chunk_size=1000,\n",
|
|
||||||
" chunk_overlap=200,\n",
|
|
||||||
" add_start_index=True\n",
|
|
||||||
" )\n",
|
|
||||||
" return text_splitter.split_documents(documents)\n",
|
|
||||||
"\n",
|
|
||||||
"# PDF handling\n",
|
|
||||||
"def load_pdf(file_path):\n",
|
|
||||||
" if not os.path.exists(file_path):\n",
|
|
||||||
" raise FileNotFoundError(f\"File not found: {file_path}\")\n",
|
|
||||||
" loader = PDFPlumberLoader(file_path)\n",
|
|
||||||
" documents = loader.load()\n",
|
|
||||||
" return documents\n",
|
|
||||||
"\n",
|
|
||||||
"# Web page handling (using WebBaseLoader)\n",
|
|
||||||
"def load_webpage(url):\n",
|
|
||||||
" loader = WebBaseLoader(url)\n",
|
|
||||||
" documents = loader.load()\n",
|
|
||||||
" return documents\n",
|
|
||||||
"\n",
|
|
||||||
"# Hybrid retriever\n",
|
|
||||||
"def build_hybrid_retriever(documents):\n",
|
|
||||||
" vector_store.clear()\n",
|
|
||||||
" vector_store.add_documents(documents)\n",
|
|
||||||
" semantic_retriever = vector_store.as_retriever(search_kwargs={\"k\": 3})\n",
|
|
||||||
" bm25_retriever = BM25Retriever.from_documents(documents)\n",
|
|
||||||
" bm25_retriever.k = 3\n",
|
|
||||||
" hybrid_retriever = EnsembleRetriever(\n",
|
|
||||||
" retrievers=[semantic_retriever, bm25_retriever],\n",
|
|
||||||
" weights=[0.7, 0.3]\n",
|
|
||||||
" )\n",
|
|
||||||
" return hybrid_retriever\n",
|
|
||||||
"\n",
|
|
||||||
"# DuckDuckGo search implementation\n",
|
|
||||||
"def search_ddg(query, num_results=3):\n",
|
|
||||||
" from langchain_community.utilities import DuckDuckGoSearchAPIWrapper\n",
|
|
||||||
" search = DuckDuckGoSearchAPIWrapper()\n",
|
|
||||||
" results = search.results(query, num_results)\n",
|
|
||||||
" return results\n",
|
|
||||||
"\n",
|
|
||||||
"# Answer question with error handling\n",
|
|
||||||
"def answer_question(question, documents):\n",
|
|
||||||
" try:\n",
|
|
||||||
" context = \"\\n\\n\".join([doc.page_content for doc in documents])\n",
|
|
||||||
" prompt = ChatPromptTemplate.from_template(qa_template)\n",
|
|
||||||
" chain = prompt | model\n",
|
|
||||||
" return chain.invoke({\"question\": question, \"context\": context}).content\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" return f\"Error generating answer: {e}\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Simple RAG node for web search\n",
|
|
||||||
"class WebSearchState(TypedDict):\n",
|
|
||||||
" query: str\n",
|
|
||||||
" results: list\n",
|
|
||||||
" response: str\n",
|
|
||||||
"\n",
|
|
||||||
"def web_search(state):\n",
|
|
||||||
" results = search_ddg(state[\"query\"])\n",
|
|
||||||
" return {\"results\": results}\n",
|
|
||||||
"\n",
|
|
||||||
"def generate_search_response(state):\n",
|
|
||||||
" try:\n",
|
|
||||||
" context = \"\\n\\n\".join([f\"{r['title']}: {r['snippet']}\" for r in state[\"results\"]])\n",
|
|
||||||
" prompt = ChatPromptTemplate.from_template(qa_template)\n",
|
|
||||||
" chain = prompt | model\n",
|
|
||||||
" response = chain.invoke({\"question\": state[\"query\"], \"context\": context})\n",
|
|
||||||
" return {\"response\": response.content}\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" return {\"response\": f\"Error generating response: {e}\"}\n",
|
|
||||||
"\n",
|
|
||||||
"# Build search graph\n",
|
|
||||||
"search_graph = StateGraph(WebSearchState)\n",
|
|
||||||
"search_graph.add_node(\"search\", web_search)\n",
|
|
||||||
"search_graph.add_node(\"generate\", generate_search_response)\n",
|
|
||||||
"search_graph.add_edge(START, \"search\")\n",
|
|
||||||
"search_graph.add_edge(\"search\", \"generate\")\n",
|
|
||||||
"search_graph.add_edge(\"generate\", END)\n",
|
|
||||||
"search_workflow = search_graph.compile()\n",
|
|
||||||
"\n",
|
|
||||||
"# Main command-line interface\n",
|
|
||||||
"if __name__ == \"__main__\":\n",
|
|
||||||
" print(\"Welcome to the Advanced RAG System\")\n",
|
|
||||||
" print(\"Choose an option:\")\n",
|
|
||||||
" print(\"1. Analyze PDF\")\n",
|
|
||||||
" print(\"2. Crawl URL\")\n",
|
|
||||||
" print(\"3. Search Internet\")\n",
|
|
||||||
" choice = input(\"Enter your choice (1/2/3): \")\n",
|
|
||||||
"\n",
|
|
||||||
" if choice == \"1\":\n",
|
|
||||||
" pdf_path = input(\"Enter the path to the PDF file: \").strip()\n",
|
|
||||||
" if not pdf_path:\n",
|
|
||||||
" print(\"Please enter a valid file path.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" try:\n",
|
|
||||||
" print(\"Processing PDF...\")\n",
|
|
||||||
" documents = load_pdf(pdf_path)\n",
|
|
||||||
" if not documents:\n",
|
|
||||||
" print(\"No documents were loaded from the PDF. The file might be empty or not contain extractable text.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" chunked_documents = split_text(documents)\n",
|
|
||||||
" if not chunked_documents:\n",
|
|
||||||
" print(\"No text chunks were created. The PDF might not contain any text.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" retriever = build_hybrid_retriever(chunked_documents)\n",
|
|
||||||
" print(f\"Processed {len(chunked_documents)} chunks\")\n",
|
|
||||||
" question = input(\"Ask a question about the PDF: \").strip()\n",
|
|
||||||
" if not question:\n",
|
|
||||||
" print(\"Please enter a valid question.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"Searching document...\")\n",
|
|
||||||
" related_documents = retriever.get_relevant_documents(question)\n",
|
|
||||||
" if not related_documents:\n",
|
|
||||||
" print(\"No relevant documents found for the question.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" answer = answer_question(question, related_documents)\n",
|
|
||||||
" print(\"Answer:\", answer)\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Error: {e}\")\n",
|
|
||||||
"\n",
|
|
||||||
" elif choice == \"2\":\n",
|
|
||||||
" url = input(\"Enter the URL to analyze: \").strip()\n",
|
|
||||||
" if not url:\n",
|
|
||||||
" print(\"Please enter a valid URL.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" try:\n",
|
|
||||||
" print(\"Loading webpage...\")\n",
|
|
||||||
" web_documents = load_webpage(url)\n",
|
|
||||||
" if not web_documents:\n",
|
|
||||||
" print(\"No documents were loaded from the webpage. The page might be empty or not contain extractable text.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" web_chunks = split_text(web_documents)\n",
|
|
||||||
" if not web_chunks:\n",
|
|
||||||
" print(\"No text chunks were created. The webpage might not contain any text.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" web_retriever = build_hybrid_retriever(web_chunks)\n",
|
|
||||||
" print(f\"Processed {len(web_chunks)} chunks from webpage\")\n",
|
|
||||||
" question = input(\"Ask a question about the webpage: \").strip()\n",
|
|
||||||
" if not question:\n",
|
|
||||||
" print(\"Please enter a valid question.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"Analyzing content...\")\n",
|
|
||||||
" web_results = web_retriever.get_relevant_documents(question)\n",
|
|
||||||
" if not web_results:\n",
|
|
||||||
" print(\"No relevant documents found for the question.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" answer = answer_question(question, web_results)\n",
|
|
||||||
" print(\"Answer:\", answer)\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Error loading webpage: {e}\")\n",
|
|
||||||
"\n",
|
|
||||||
" elif choice == \"3\":\n",
|
|
||||||
" query = input(\"Enter your search query: \").strip()\n",
|
|
||||||
" if not query:\n",
|
|
||||||
" print(\"Please enter a valid search query.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" try:\n",
|
|
||||||
" print(\"Searching the web...\")\n",
|
|
||||||
" search_result = search_workflow.invoke({\"query\": query})\n",
|
|
||||||
" print(\"Response:\", search_result[\"response\"])\n",
|
|
||||||
" print(\"Sources:\")\n",
|
|
||||||
" for result in search_result[\"results\"]:\n",
|
|
||||||
" print(f\"- {result['title']}: {result['link']}\")\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Error during search: {e}\")\n",
|
|
||||||
"\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"Invalid choice\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python [conda env:base] *",
|
|
||||||
"language": "python",
|
|
||||||
"name": "conda-base-py"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1,235 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 30,
|
|
||||||
"id": "bee40edd-5f36-49e6-a64f-361674f1e681",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Chat with AI (type 'stop' or 'quit' to exit)\n",
|
|
||||||
"Note: First response may take longer while model loads\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdin",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"You: hi\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"Assistant: Hello! It's nice to meet you. Is there something I can help you with or would you like to chat for a bit?\n",
|
|
||||||
"Speech saved to response.mp3\n",
|
|
||||||
"Speed adjustment failed: [Errno 2] No such file or directory: 'ffprobe'. Using simple playback.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"/home/masih/anaconda3/lib/python3.12/site-packages/pydub/utils.py:198: RuntimeWarning: Couldn't find ffprobe or avprobe - defaulting to ffprobe, but may not work\n",
|
|
||||||
" warn(\"Couldn't find ffprobe or avprobe - defaulting to ffprobe, but may not work\", RuntimeWarning)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdin",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"You: I don't know\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"Assistant: We can start with something simple then. Would you like to play a game, have a fun conversation, or just explore some topics together? I'm here to listen and help if you need it. What's been on your mind lately?\n",
|
|
||||||
"Speech saved to response.mp3\n",
|
|
||||||
"Speed adjustment failed: [Errno 2] No such file or directory: 'ffprobe'. Using simple playback.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"/home/masih/anaconda3/lib/python3.12/site-packages/pydub/utils.py:198: RuntimeWarning: Couldn't find ffprobe or avprobe - defaulting to ffprobe, but may not work\n",
|
|
||||||
" warn(\"Couldn't find ffprobe or avprobe - defaulting to ffprobe, but may not work\", RuntimeWarning)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdin",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"You: quit\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Conversation ended\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from ollama import Client\n",
|
|
||||||
"from gtts import gTTS\n",
|
|
||||||
"from playsound import playsound\n",
|
|
||||||
"import os\n",
|
|
||||||
"from pydub import AudioSegment\n",
|
|
||||||
"from pydub.playback import play\n",
|
|
||||||
"\n",
|
|
||||||
"# Configure ffmpeg paths - IMPORTANT: Replace with your actual conda path\n",
|
|
||||||
"ffprobe_path = \"/home/masih/anaconda3/bin/ffprobe\"\n",
|
|
||||||
"AudioSegment.ffprobe = ffprobe_path\n",
|
|
||||||
"os.environ[\"PATH\"] += os.pathsep + os.path.dirname(ffprobe_path)\n",
|
|
||||||
"\n",
|
|
||||||
"# Initialize Ollama client\n",
|
|
||||||
"client = Client(host='http://localhost:11434')\n",
|
|
||||||
"\n",
|
|
||||||
"system_prompt = (\n",
|
|
||||||
" \"You are a helpful and polite assistant. \"\n",
|
|
||||||
" \"Provide clear and concise responses. \"\n",
|
|
||||||
" \"Avoid repetition and maintain a friendly tone.\"\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"conversation_history = []\n",
|
|
||||||
"max_history_length = 5\n",
|
|
||||||
"\n",
|
|
||||||
"def generate_response(user_input):\n",
|
|
||||||
" global conversation_history\n",
|
|
||||||
" conversation_history.append(f\"User: {user_input}\")\n",
|
|
||||||
" \n",
|
|
||||||
" messages = [{'role': 'system', 'content': system_prompt}]\n",
|
|
||||||
" for interaction in conversation_history[-max_history_length:]:\n",
|
|
||||||
" try:\n",
|
|
||||||
" role_part, content = interaction.split(': ', 1)\n",
|
|
||||||
" messages.append({\n",
|
|
||||||
" 'role': role_part.lower(),\n",
|
|
||||||
" 'content': content\n",
|
|
||||||
" })\n",
|
|
||||||
" except ValueError:\n",
|
|
||||||
" continue\n",
|
|
||||||
" \n",
|
|
||||||
" try:\n",
|
|
||||||
" response = client.chat(\n",
|
|
||||||
" model='llama3.2', # Corrected model name\n",
|
|
||||||
" messages=messages,\n",
|
|
||||||
" options={\n",
|
|
||||||
" 'temperature': 0.7,\n",
|
|
||||||
" 'top_p': 0.9,\n",
|
|
||||||
" 'num_predict': 100\n",
|
|
||||||
" }\n",
|
|
||||||
" )\n",
|
|
||||||
" assistant_response = response['message']['content']\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" assistant_response = f\"Error: {str(e)}\"\n",
|
|
||||||
" \n",
|
|
||||||
" conversation_history.append(f\"Assistant: {assistant_response}\")\n",
|
|
||||||
" return assistant_response\n",
|
|
||||||
"\n",
|
|
||||||
"def text_to_speech(text, output_file=\"response.mp3\", lang='en'):\n",
|
|
||||||
" try:\n",
|
|
||||||
" if os.path.exists(output_file):\n",
|
|
||||||
" os.remove(output_file)\n",
|
|
||||||
" tts = gTTS(text=text, lang=lang)\n",
|
|
||||||
" tts.save(output_file)\n",
|
|
||||||
" print(f\"Speech saved to {output_file}\")\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Text-to-speech error: {e}\")\n",
|
|
||||||
"\n",
|
|
||||||
"def play_sound_with_speed(file_path, speed=1.2):\n",
|
|
||||||
" if not os.path.exists(file_path):\n",
|
|
||||||
" print(\"Audio file not found\")\n",
|
|
||||||
" return\n",
|
|
||||||
" \n",
|
|
||||||
" try:\n",
|
|
||||||
" # Try speed-adjusted playback\n",
|
|
||||||
" audio = AudioSegment.from_file(file_path)\n",
|
|
||||||
" altered_audio = audio._spawn(\n",
|
|
||||||
" audio.raw_data,\n",
|
|
||||||
" overrides={\"frame_rate\": int(audio.frame_rate * speed)}\n",
|
|
||||||
" )\n",
|
|
||||||
" play(altered_audio)\n",
|
|
||||||
" print(\"Playing at 1.2x speed\")\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Speed adjustment failed: {str(e)}. Using simple playback.\")\n",
|
|
||||||
" try:\n",
|
|
||||||
" playsound(file_path)\n",
|
|
||||||
" except Exception as fallback_e:\n",
|
|
||||||
" print(f\"All playback methods failed: {str(fallback_e)}\")\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Chat with AI (type 'stop' or 'quit' to exit)\")\n",
|
|
||||||
"print(\"Note: First response may take longer while model loads\")\n",
|
|
||||||
"\n",
|
|
||||||
"while True:\n",
|
|
||||||
" try:\n",
|
|
||||||
" user_input = input(\"\\nYou: \")\n",
|
|
||||||
" if user_input.lower() in ['stop', 'quit']:\n",
|
|
||||||
" print(\"Conversation ended\")\n",
|
|
||||||
" break\n",
|
|
||||||
" \n",
|
|
||||||
" response = generate_response(user_input)\n",
|
|
||||||
" print(f\"\\nAssistant: {response}\")\n",
|
|
||||||
" \n",
|
|
||||||
" text_to_speech(response)\n",
|
|
||||||
" play_sound_with_speed(\"response.mp3\")\n",
|
|
||||||
" \n",
|
|
||||||
" except KeyboardInterrupt:\n",
|
|
||||||
" print(\"\\nConversation interrupted\")\n",
|
|
||||||
" break\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Critical error: {str(e)}\")\n",
|
|
||||||
" break"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "d673f565-6002-4b90-8c1b-7df956f536a0",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "afbdf0c0-aa60-46a6-a2a1-044e58f26162",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python [conda env:base] *",
|
|
||||||
"language": "python",
|
|
||||||
"name": "conda-base-py"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
97
LICENSE
97
LICENSE
@ -1,64 +1,73 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
PROPRIETARY SOFTWARE LICENSE AGREEMENT
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
Version 1.0, Effective Date: March 1, 2025
|
|
||||||
|
|
||||||
Copyright (c) 2025 Masih Moafi.
|
1. Definitions.
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Grant and License
|
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
|
||||||
This Software is licensed, not sold. All rights to the Software are reserved by Masih Moafi. No permission is granted to use, copy, modify, merge, publish, distribute, sublicense, or sell the Software, in whole or in part, unless you have obtained explicit, written approval from Masih Moafi prior to any such use.
|
|
||||||
|
|
||||||
Restrictions
|
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
|
||||||
|
|
||||||
You are not permitted to use the Software for any purpose without prior written authorization from Masih Moafi.
|
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
You are not permitted to copy, modify, or create derivative works based on the Software.
|
|
||||||
You are not permitted to distribute or publicly display the Software, in either source or binary form, without explicit written consent.
|
|
||||||
No right or license is granted to use any portion of the Software unless expressly provided in writing by Masih Moafi.
|
|
||||||
No Attribution or Citation Requirement
|
|
||||||
This License does not require you to provide any attribution or citation in connection with the Software. However, nothing in this License grants you any rights to use, reproduce, or distribute the Software without explicit authorization.
|
|
||||||
|
|
||||||
Termination
|
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
|
||||||
This License is effective until terminated. Any unauthorized use, copying, modification, or distribution of the Software shall automatically terminate any rights granted under this License. Upon termination, you must immediately destroy all copies of the Software.
|
|
||||||
|
|
||||||
Disclaimer of Warranty
|
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
|
||||||
The Software is provided "AS IS", without any warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. Masih Moafi shall not be liable for any claims, damages, or other liabilities arising from the use of the Software.
|
|
||||||
|
|
||||||
Limitation of Liability
|
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
|
||||||
In no event shall Masih Moafi be liable for any direct, indirect, incidental, special, or consequential damages arising out of the use of or inability to use the Software, even if advised of the possibility of such damages.
|
|
||||||
|
|
||||||
Governing Law
|
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
|
||||||
This License shall be governed by and construed in accordance with the laws of the Islamic Republic of Iran, without regard to its conflict of law provisions.
|
|
||||||
|
|
||||||
By using, copying, or otherwise interacting with this Software, you acknowledge that you have read, understood, and agree to be bound by the terms of this Proprietary Software License Agreement.
|
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
|
||||||
|
|
||||||
توافقنامه مجوز نرمافزار مالکیتی
|
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
|
||||||
نسخه ۱.۰، تاریخ اجرایی: ۱ مارس ۲۰۲۵
|
|
||||||
|
|
||||||
حق تألیف (کپیرایت) (c) ۲۰۲۵ مسیح معافی.
|
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
|
||||||
تمام حقوق محفوظ است.
|
|
||||||
|
|
||||||
۱. اعطای مجوز و صدور مجوز
|
2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
|
||||||
این نرمافزار به صورت مجوز صادر شده و به فروش نرفته است. تمامی حقوق مربوط به نرمافزار نزد مسیح معافی محفوظ است. هیچگونه اجازهای برای استفاده، کپی، تغییر، ترکیب، انتشار، توزیع، اعطای مجوز فرعی یا فروش نرمافزار، بهطور کلی یا جزئی، اعطا نمیشود مگر آنکه قبل از هرگونه استفاده، تأیید کتبی صریح از مسیح معافی دریافت شده باشد.
|
|
||||||
|
|
||||||
۲. محدودیتها
|
3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
|
||||||
|
|
||||||
شما مجاز به استفاده از نرمافزار برای هیچ منظوری بدون دریافت تأیید کتبی قبلی از مسیح معافی نیستید.
|
4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
|
||||||
شما مجاز به کپی، تغییر یا ایجاد آثار مشتق از نرمافزار نیستید.
|
|
||||||
شما مجاز به توزیع یا نمایش عمومی نرمافزار، چه بهصورت کد منبع و چه بهصورت باینری، بدون دریافت رضایت کتبی صریح نیستید.
|
|
||||||
هیچ حقی یا مجوزی برای استفاده از هیچ بخشی از نرمافزار اعطا نمیشود مگر آنکه بهطور صریح و کتبی از سوی مسیح معافی ارائه شود.
|
|
||||||
۳. عدم الزام به منسوبسازی یا ارجاع
|
|
||||||
این مجوز شما را ملزم نمیکند که در ارتباط با نرمافزار، منسوبسازی یا ارجاعی ارائه دهید. با این حال، هیچیک از مفاد این مجوز به شما حقی برای استفاده، تکثیر یا توزیع نرمافزار بدون دریافت تأیید صریح اعطا نمیکند.
|
|
||||||
|
|
||||||
۴. فسخ مجوز
|
(a) You must give any other recipients of the Work or Derivative Works a copy of this License; and
|
||||||
این مجوز از زمان اجرایی تا زمانی که فسخ نشود معتبر است. هرگونه استفاده، تکثیر، تغییر یا توزیع غیرمجاز نرمافزار بهطور خودکار هرگونه حقی که بر اساس این مجوز به شما اعطا شده است را فسخ میکند. پس از فسخ، شما باید بلافاصله تمامی نسخههای نرمافزار را از بین ببرید.
|
|
||||||
|
|
||||||
۵. انکار گارانتی
|
(b) You must cause any modified files to carry prominent notices stating that You changed the files; and
|
||||||
نرمافزار «همانطور که هست» ارائه میشود، بدون هیچگونه گارانتی صریح یا ضمنی، از جمله گارانتیهای تجاری بودن، تناسب برای یک هدف خاص و عدم نقض حقوق. مسیح معافی در هیچ موردی مسئولیتی در قبال ادعاها، خسارات یا سایر مسئولیتهایی که از استفاده نرمافزار ناشی میشود، نخواهد داشت.
|
|
||||||
|
|
||||||
۶. محدودیت مسئولیت
|
(c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
|
||||||
در هیچ شرایطی مسیح معافی مسئول هیچگونه خسارات مستقیم، غیرمستقیم، حادثهای، ویژه یا تبعی ناشی از استفاده یا عدم توانایی در استفاده از نرمافزار، حتی در صورت آگاهی از احتمال چنین خساراتی نخواهد بود.
|
|
||||||
|
|
||||||
۷. قانون حاکم
|
(d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
|
||||||
این مجوز تابع و مشمول قوانین جمهوری اسلامی ایران است و بر اساس آن تفسیر خواهد شد، بدون توجه به قوانین تعارض قوانین.
|
|
||||||
|
|
||||||
با استفاده، تکثیر یا هرگونه تعامل با این نرمافزار، شما تأیید میکنید که شرایط این توافقنامه مجوز نرمافزار مالکیتی
|
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2025 MasihMoafi
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|||||||
67
README.md
67
README.md
@ -1,67 +1,2 @@
|
|||||||
# Deploying Agentic RAG Systems to Perform Various Tasks Using LLMs
|
# LLMs
|
||||||
|
|
||||||
This repository showcases the implementation of a Retrieval-Augmented Generation (RAG) system for answering questions using large language models (LLMs) and document retrieval. The system integrates document indexing, chunking, and similarity search with advanced language models like `gemma2:9b` to provide context-aware responses. Additionally, it incorporates a web-browsing agent for retrieving live data.
|
|
||||||
|
|
||||||
## Table of Contents
|
|
||||||
- [Overview](#overview)
|
|
||||||
- [Installation](#installation)
|
|
||||||
- [Usage](#usage)
|
|
||||||
- [Components](#components)
|
|
||||||
- [RAG System](#rag-system)
|
|
||||||
- [Answer Generator](#answer-generator)
|
|
||||||
- [Web Browsing Agent](#web-browsing-agent)
|
|
||||||
- [Chroma-based RAG](#chroma-based-rag)
|
|
||||||
- [Results](#results)
|
|
||||||
- [License](#license)
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
The project is designed to perform tasks like document-based question answering, real-time information retrieval via web scraping, and context-aware response generation. It leverages multiple techniques:
|
|
||||||
- **RAG (Retrieval-Augmented Generation)**: Uses document indexing and retrieval for question answering.
|
|
||||||
- **Web Browsing**: Fetches live data to answer real-time queries.
|
|
||||||
- **Chroma and FAISS**: Index and retrieve relevant document chunks efficiently.
|
|
||||||
|
|
||||||
The system is multilingual and supports Persian language queries.
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
Steps Performed:
|
|
||||||
Document Processing: The documents are chunked into smaller segments for efficient retrieval.
|
|
||||||
Index Creation or Loading: An FAISS index or Chroma-based vector store is created or loaded for similarity search.
|
|
||||||
Query Answering: A set of queries is processed, and answers are generated using LLMs, based on the retrieved document chunks or web content.
|
|
||||||
Results are saved in an output file (response.txt or agent_results.txt).
|
|
||||||
|
|
||||||
## Components
|
|
||||||
### RAG System
|
|
||||||
The RAG system includes:
|
|
||||||
|
|
||||||
Document Chunking: Splitting large documents into smaller chunks to improve retrieval performance.
|
|
||||||
Index Creation: Using FAISS (or Chroma) for indexing the document chunks based on their embeddings.
|
|
||||||
Similarity Search: Utilizing cosine similarity for retrieving relevant chunks during query processing.
|
|
||||||
### Answer Generator
|
|
||||||
The Answer Generator class interacts with the RAG system to fetch the most relevant document chunks based on a given question. It then uses the LLM to generate a context-aware response.
|
|
||||||
|
|
||||||
### Chroma-based RAG
|
|
||||||
|
|
||||||
An alternative RAG implementation using Chroma for storing and querying document embeddings is also included. This utilizes LangChain's Chroma integration for efficient vector store management and querying.
|
|
||||||
|
|
||||||
### Web Browsing Agent
|
|
||||||
The Web Browsing Agent fetches real-time information from the web by scraping web pages. The agent can be used to get live data on current events, statistics, and more.
|
|
||||||
|
|
||||||
### Doc Search Agent
|
|
||||||
|
|
||||||
### Deep Search Agent
|
|
||||||
|
|
||||||
### The Power of Agentic Search
|
|
||||||
|
|
||||||
|
|
||||||
## Results
|
|
||||||
The system successfully processes predefined questions and generates responses based on the relevant document context. Additionally, the web-browsing agent retrieves live data for real-time questions, providing a comprehensive, multi-source approach to answering queries.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
The system demonstrates effective integration of multiple techniques to solve complex QA tasks.
|
|
||||||
|
|
||||||
## License
|
|
||||||
This project is licensed under the MIT License.
|
|
||||||
|
|||||||
@ -1,239 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 27,
|
|
||||||
"id": "36a09b01-3bb2-4a9f-ac22-354793704d60",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import torch\n",
|
|
||||||
"from transformers import pipeline"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 28,
|
|
||||||
"id": "a8674877-8f68-40c1-8a82-7ee881777a14",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"translator = pipeline(task=\"translation\",\n",
|
|
||||||
" model=\"facebook/nllb-200-distilled-600M\",\n",
|
|
||||||
" torch_dtype=torch.bfloat16) "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 29,
|
|
||||||
"id": "824e1154-a60d-46fc-987f-812999342724",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"text = \"\"\"\\\n",
|
|
||||||
"My puppy is adorable, \\\n",
|
|
||||||
"Your kitten is cute.\n",
|
|
||||||
"Her panda is friendly.\n",
|
|
||||||
"His llama is thoughtful. \\\n",
|
|
||||||
"We all have nice pets!\"\"\""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 30,
|
|
||||||
"id": "5241727b-0821-4c95-8800-6df6313c7c64",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"text_translated = translator(text,\n",
|
|
||||||
" src_lang=\"eng_Latn\",\n",
|
|
||||||
" tgt_lang=\"fra_Latn\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 31,
|
|
||||||
"id": "cb0e5fd4-0e82-4dcf-8b2a-05d67e49e75e",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[{'translation_text': 'Mon chiot est adorable, ton chaton est mignon, son panda est ami, sa lamme est attentive, nous avons tous de beaux animaux de compagnie.'}]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 31,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"text_translated"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 32,
|
|
||||||
"id": "604bd794-cb69-4226-8d09-61977034c7f6",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"summarizer = pipeline(task=\"summarization\",\n",
|
|
||||||
" model=\"facebook/bart-large-cnn\",\n",
|
|
||||||
" torch_dtype=torch.bfloat16)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 33,
|
|
||||||
"id": "c376f7cb-ecc9-43b2-bb0c-a04687795bff",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"text = \"\"\"\t1) People throughout history have been distracted. There is nothing uniquely distracting about modern technology. We are suffering from a crisis of attention because we don't allow ourselves a mental break. In particular, we no longer daydream, which is a beneficial type of spontaneous thought.\n",
|
|
||||||
"We’re bombarded by the idea that at the root of our attention issues lies a single powerful culprit: modern technology. If we truly want to focus, it seems, we need to turn off all our devices, quit social media, and retreat into the woods for a digital detox.\n",
|
|
||||||
" Here’s my resistance to that idea. At an elemental level, this particular era is no different than any other. there has always been a “crisis of attention.” Historically, people have turned to meditation (and other forms of contemplative(thoughtful, pensive) practice) to deal with feelings of being overwhelmed and scattered in focus, and to refocus and reflect on priorities , our inner values, intentions, purpose. This can certainly be a spiritual process, if that’s how you define it. But we’re discovering that mindfulness impacts the attention system and how it copes with the distractions that surround us and those that are generated internally. In part, that’s what meditation practitioners have always been pursuing. Think about life long ago: people in ancient India or medieval Europe didn’t have smartphones and Facebook, but they were still suffering in their own minds. They still turned to any number of practices for relief. They still described the same challenge: I’m not fully present for my life.\n",
|
|
||||||
" A crisis of attention can happen anytime you don’t allow yourself a break. when you don’t allow your mind to rest without having any task-at-hand. Remember our distinction between mind-wandering (having off-task thoughts during a task) and daydreaming (task-free spontaneous thought and opportunity for conscious reflection, creativity, and the like)? Well, one problem today is that we are always engaged in something. With these digital tools at our fingertips, we have constant access to all these forms of communication, content, and interaction, and we don’t tend to gravitate toward letting our thoughts meander(bend), unconstrained. Of the two types of spontaneous thought we discussed earlier, it’s the beneficial type, the daydreaming, that we barely get at all. \n",
|
|
||||||
" We all do it. I catch myself all the time, going from one type of mental engagement to the next. I call it hype tasking. Like surfing hyperlinks online (clicking from link to link as they grab your attention), we go from one task to the next and the next. You are probably doing it right now. We are “all task and no downtime.” And we’re asking an enormous amount from our attention systems. Your attentional capacity is not less than someone’s from hundreds of years ago. It’s only that right now, you’re using your attention in a particular focused way, all the time.\n",
|
|
||||||
" We’re taxing(exacting, demanding) our focused attention to the max. Hype tasking is hyper-taxing! Even something you might think of as relaxing (scrolling through Instagram, for example, or reading an article someone shared) is more engagement. It’s another task. Checking your notifications may seem “fun,” but it’s work for your attention. Task: check to see who posted what in response to my post. Task: check how many likes I got. Task: check who shared my funny meme. Your attention was focused on task after task after task, with no attentional downtime, not a moment for the mind to roam free.(travel aimlessly)\n",
|
|
||||||
" It’s not always realistic to unplug. We can’t just turn off our phones and pause our email. We cannot create a distraction-free world. The issue is not the existence of this technology; rather, it’s how we’re using it: we are not allowing our minds to pay attention differently. And this is where mindfulness comes in, as a way to steady your flashlight so you don’t end up swinging it around at any and all possible distractions, digital or not.\n",
|
|
||||||
"\"\"\""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 34,
|
|
||||||
"id": "e1920f67-f2ad-4b65-98fd-bd36aea8b866",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"summary = summarizer(text,\n",
|
|
||||||
" min_length=50,\n",
|
|
||||||
" max_length=250)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 35,
|
|
||||||
"id": "b7675973-dd49-45a7-ae20-27fd173fb209",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[{'summary_text': \"We are suffering from a crisis of attention because we don't allow ourselves a mental break. In particular, we no longer daydream, which is a beneficial type of spontaneous thought. We can’t just turn off our phones and pause our email. We cannot create a distraction-free world.\"}]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 35,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"summary"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "69c7ea2b-99cb-4c92-b9da-62d3cb272541",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from transformers.utils import logging\n",
|
|
||||||
"logging.set_verbosity_error()\n",
|
|
||||||
"from transformers import pipeline\n",
|
|
||||||
"sam_pipe = pipeline(\"mask-generation\", \"Zigeng/SlimSAM-uniform-77\")\n",
|
|
||||||
"from PIL import Image\n",
|
|
||||||
"raw_image = Image.open('C:/Users/Josep/OneDrive/Desktop/Edit Materials/heaven (7).jpg')\n",
|
|
||||||
"raw_image = raw_image.resize((720, 375))\n",
|
|
||||||
"output = sam_pipe(raw_image)\n",
|
|
||||||
"from helper import show_pipe_masks_on_image\n",
|
|
||||||
"show_pipe_masks_on_image(raw_image, output)\n",
|
|
||||||
"from transformers import SamModel, SamProcessor\n",
|
|
||||||
"model = SamModel.from_pretrained(\"./models/Zigeng/SlimSAM-uniform-77\")\n",
|
|
||||||
"processor = SamProcessor.from_pretrained(\"./models/Zigeng/SlimSAM-uniform-77\")\n",
|
|
||||||
"raw_image = raw_image.resize((720, 375))\n",
|
|
||||||
"input_points = [[[1600, 700]]]\n",
|
|
||||||
"inputs = processor(raw_image, input_points=input_points, return_tensors=\"pt\")\n",
|
|
||||||
"import torch\n",
|
|
||||||
"with torch.no_grad():\n",
|
|
||||||
" outputs = model(**inputs)\n",
|
|
||||||
"predicted_masks = processor.image_processor.post_process_masks(\n",
|
|
||||||
" outputs.pred_masks, inputs[\"original_sizes\"], inputs[\"reshaped_input_sizes\"]\n",
|
|
||||||
")\n",
|
|
||||||
"len(predicted_masks)\n",
|
|
||||||
"predicted_mask = predicted_masks[0]\n",
|
|
||||||
"predicted_mask.shape\n",
|
|
||||||
"outputs.iou_scores\n",
|
|
||||||
"from helper import show_mask_on_image\n",
|
|
||||||
"for i in range(3):\n",
|
|
||||||
" show_mask_on_image(raw_image, predicted_mask[:, i])\n",
|
|
||||||
"depth_estimator = pipeline(task=\"depth-estimation\", model=\"./models/Intel/dpt-hybrid-midas\")\n",
|
|
||||||
"raw_image = Image.open('gradio_tamagochi_vienna.png')\n",
|
|
||||||
"raw_image = raw_image.resize((806, 621))\n",
|
|
||||||
"output = depth_estimator(raw_image)\n",
|
|
||||||
"output\n",
|
|
||||||
"output[\"predicted_depth\"].shape\n",
|
|
||||||
"output[\"predicted_depth\"].unsqueeze(1).shape\n",
|
|
||||||
"prediction = torch.nn.functional.interpolate(\n",
|
|
||||||
" output[\"predicted_depth\"].unsqueeze(1), size=raw_image.size[::-1], mode=\"bicubic\", align_corners=False,\n",
|
|
||||||
")\n",
|
|
||||||
"prediction.shape\n",
|
|
||||||
"raw_image.size[::-1]\n",
|
|
||||||
"prediction\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"output = prediction.squeeze().numpy()\n",
|
|
||||||
"formatted = (output * 255 / np.max(output)).astype(\"uint8\")\n",
|
|
||||||
"depth = Image.fromarray(formatted)\n",
|
|
||||||
"depth\n",
|
|
||||||
"import os\n",
|
|
||||||
"import gradio as gr\n",
|
|
||||||
"from transformers import pipeline\n",
|
|
||||||
"def launch(input_image):\n",
|
|
||||||
" out = depth_estimator(input_image)\n",
|
|
||||||
"\n",
|
|
||||||
" # resize the prediction\n",
|
|
||||||
" prediction = torch.nn.functional.interpolate(\n",
|
|
||||||
" out[\"predicted_depth\"].unsqueeze(1),\n",
|
|
||||||
" size=input_image.size[::-1],\n",
|
|
||||||
" mode=\"bicubic\",\n",
|
|
||||||
" align_corners=False,\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" # normalize the prediction\n",
|
|
||||||
" output = prediction.squeeze().numpy()\n",
|
|
||||||
" formatted = (output * 255 / np.max(output)).astype(\"uint8\")\n",
|
|
||||||
" depth = Image.fromarray(formatted)\n",
|
|
||||||
" return depth\n",
|
|
||||||
"\n",
|
|
||||||
"iface = gr.Interface(launch, \n",
|
|
||||||
" inputs=gr.Image(type='pil'), \n",
|
|
||||||
" outputs=gr.Image(type='pil'))\n",
|
|
||||||
"iface.launch(share=True, server_port=int(os.environ['PORT1']))\n",
|
|
||||||
"iface.close()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.8.19"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
Loading…
x
Reference in New Issue
Block a user