In [None]:
#Test Dorna
response: ChatResponse = chat(model='llama3.2', messages=[
  {
    'role': 'user',
    'content': 'چرا اینترنت قطع میشه؟',
  },
])
print(response['message']['content'])
print(response.message.content)

In [None]:
!pip install faiss-cpu sentence-transformers ollama numpy

In [None]:
# MANUALLY RAG
from ollama import chat
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import torch
import os
import re

DOCUMENT_PATHS = [
    r'/home/masih/rag_data/Hamrah.txt', #replace path
    r'/home/masih/rag_data/vape.txt',
    r'/home/masih/rag_data/Shah.txt',
    r'/home/masih/rag_data/Khalife.txt',
    r'/home/masih/rag_data/carbon.txt',
    r'/home/masih/rag_data/takapoo.txt'
]

EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
LLM_MODEL = 'llama3.2'
CHUNK_SIZE = 1000
OVERLAP = 200
INDEX_PATH = r'C:\Users\ASUS\Downloads\doc_index.faiss'
CHUNK_MAP_PATH = r'C:\Users\ASUS\Downloads\chunk_map.npy'

class AdvancedRAG:
    def __init__(self):
        self.encoder = SentenceTransformer(EMBEDDING_MODEL, device='cuda' if torch.cuda.is_available() else 'cpu')
        self.index = None
        self.chunk_map = []
        
    def create_index(self):
        """Create FAISS index with cosine similarity and document mapping"""
        all_chunks = []
        doc_mapping = []
        
        # Process via CHUNKING (REQ 4 RAG)
        for doc_idx, path in enumerate(DOCUMENT_PATHS):
            with open(path, 'r', encoding='utf-8') as f:
                text = re.sub(r'\s+', ' ', f.read()).strip()
                chunks = [text[i:i+CHUNK_SIZE] for i in range(0, len(text), CHUNK_SIZE - OVERLAP)]
                all_chunks.extend(chunks)
                doc_mapping.extend([doc_idx] * len(chunks))
        
        # Normalized embeddings (REQ 4 cosine similarity)
        embeddings = self.encoder.encode(all_chunks, convert_to_tensor=True, device='cuda' if torch.cuda.is_available() else 'cpu')
        embeddings = embeddings.cpu().numpy()  # Move back to CPU for FAISS
        
        faiss.normalize_L2(embeddings)  
        
        # FAISS index & Mapping
        self.index = faiss.IndexFlatIP(embeddings.shape[1])
        self.index.add(embeddings.astype(np.float32))
        self.chunk_map = np.array(doc_mapping)
        
        # Index  
        faiss.write_index(self.index, INDEX_PATH)
        # Mapping 
        np.save(CHUNK_MAP_PATH, self.chunk_map)
    
    def load_index(self):
        """LOAD EXISTING DATA"""
        self.index = faiss.read_index(INDEX_PATH)
        self.chunk_map = np.load(CHUNK_MAP_PATH, allow_pickle=True)
        
    def query(self, question, doc_index, top_k=5):
        """DOCUMENT-SPECIFIC QUERY WITH COSINE SIMILARITY """
        # Encode 
        query_embed = self.encoder.encode([question], convert_to_tensor=True, device='cuda' if torch.cuda.is_available() else 'cpu')
        query_embed = query_embed.cpu().numpy()  # Move back to CPU for FAISS
        
        # Normalize 
        faiss.normalize_L2(query_embed)
        
        distances, indices = self.index.search(query_embed.astype(np.float32), top_k*3)
    
        relevant_chunks = []
        for idx in indices[0]:
            if self.chunk_map[idx] == doc_index:
                relevant_chunks.append(idx)
            if len(relevant_chunks) >= top_k:
                break
        
        return relevant_chunks

class AnswerGenerator:
    def __init__(self, rag_system):
        self.rag = rag_system
        self.chunks = [] 
        
    def get_answer(self, question, doc_index):
        """GENERATING CONTEXT-AWARE ANSWER"""
        if not self.chunks:
            self._load_chunks()
            
        chunk_indices = self.rag.query(question, doc_index)
        context = "\n".join([self.chunks[idx] for idx in chunk_indices])
        
        prompt = f"""با استفاده از متن زیر به سوال پاسخ دهید:
{context}

اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید

سوال: {question}
پاسخ:"""
        
        response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])
        return response['message']['content']
    
    def _load_chunks(self):
        """LOAD ALL CHUNKS(LAZY)"""
        self.chunks = []
        for path in DOCUMENT_PATHS:
            with open(path, 'r', encoding='utf-8') as f:
                text = re.sub(r'\s+', ' ', f.read()).strip()
                self.chunks.extend([text[i:i+CHUNK_SIZE] for i in range(0, len(text), CHUNK_SIZE - OVERLAP)])

# MAIN EXE of RAG
if __name__ == "__main__":
    # RAG init
    rag = AdvancedRAG()
    
    if not os.path.exists(INDEX_PATH):
        print("Building optimized index...")
        rag.create_index()
    else:
        print("Loading existing index...")
        rag.load_index()
    # Answer Generator init
    generator = AnswerGenerator(rag)
    
    queries = [
        ("چرا اینترنت همراه اول گوشی وصل نمیشود؟", 0),
        ("چطوری ویپ مورد نظرمو پیدا کنم؟", 1),
        ("شاه عباس که بود؟", 2),
        ("خلیفه سلطان که بود و چه کرد؟", 3),
        ("کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟", 4),
        ("شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟", 5)
    ]
    
    with open(r'C:\Users\ASUS\Downloads\representation.txt', 'w', encoding='utf-8') as f: #replace path
        for q_idx, (query, doc_idx) in enumerate(queries):
            answer = generator.get_answer(query, doc_idx)
            f.write(f"سوال {q_idx+1} ({doc_idx+1}):\n{query}\n\nپاسخ:\n{answer}\n\n{'='*50}\n\n")
            print(f"پردازش سوال {q_idx+1}/{len(queries)} تکمیل شد")

print("تمامی سوالات با موفقیت پردازش شدند!")


In [None]:
!pip install langchain chromadb sentence-transformers ollama

In [None]:
!pip install -U langchain-community

In [None]:
# CHROMOA
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.schema import Document
from ollama import chat
import os
import re

DOCUMENT_PATHS = [
    r'/home/masih/rag_data/Hamrah.txt', #replace path
    r'/home/masih/rag_data/vape.txt',
    r'/home/masih/rag_data/Shah.txt',
    r'/home/masih/rag_data/Khalife.txt',
    r'/home/masih/rag_data/carbon.txt',
    r'/home/masih/rag_data/takapoo.txt'
]

EMBEDDING_MODEL = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
LLM_MODEL = 'llama3.2'
CHUNK_SIZE = 1000
OVERLAP = 200
CHROMA_PERSIST_DIR = r'\home\Masih\chroma_db\chroma_db'  

class ChromaRAGSystem:
    def __init__(self):
        # Init embedding model
        self.embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)
        # Vector store instance
        self.vector_db = None
        
    def build_vector_store(self):
        """Process documents and create Chroma vector store"""
        all_docs = []
        

        for doc_idx, path in enumerate(DOCUMENT_PATHS):
            with open(path, 'r', encoding='utf-8') as f:
                text = re.sub(r'\s+', ' ', f.read()).strip()
                # sliding window chunking
                chunks = [
                    text[i:i+CHUNK_SIZE] 
                    for i in range(0, len(text), CHUNK_SIZE - OVERLAP)
                ]
                # LangChain documents with metadata
                for chunk in chunks:
                    all_docs.append(Document(
                        page_content=chunk,
                        metadata={"source_doc": doc_idx}
                    ))
        
        # Chroma vector store
        self.vector_db = Chroma.from_documents(
            documents=all_docs,
            embedding=self.embeddings,
            persist_directory=CHROMA_PERSIST_DIR
        )
        self.vector_db.persist()
        
    def load_vector_store(self):
        """Load existing Chroma vector store"""
        self.vector_db = Chroma(
            persist_directory=CHROMA_PERSIST_DIR,
            embedding_function=self.embeddings
        )
        
    def document_query(self, query, doc_index, top_k=5):
        """Retrieve context from specific document"""
        # Chroma metadata filtering
        results = self.vector_db.similarity_search(
            query=query,
            k=top_k,
            filter={"source_doc": doc_index}
        )
        return [doc.page_content for doc in results]

class AnswerGenerator:
    def __init__(self, rag_system):
        self.rag = rag_system
        
    def generate_response(self, question, doc_index):
        """Generate context-aware answer using LLM"""
        # Retrieve relevant context
        context_chunks = self.rag.document_query(question, doc_index)
        context = "\n".join(context_chunks)
        
        prompt = f"""با استفاده از متن زیر به سوال پاسخ دهید:
{context}

اگر پاسخ در متن وجود ندارد عبارت 'پاسخی یافت نشد' را برگردانید

سوال: {question}
پاسخ:"""
        
        response = chat(model=LLM_MODEL, messages=[{'role': 'user', 'content': prompt}])
        return response['message']['content']

if __name__ == "__main__":
    rag_system = ChromaRAGSystem()
    
    # Init vector store
    if not os.path.exists(CHROMA_PERSIST_DIR):
        print("Creating new vector store...")
        rag_system.build_vector_store()
    else:
        print("Loading existing vector store...")
        rag_system.load_vector_store()
    
    # Init answer generator
    answer_engine = AnswerGenerator(rag_system)

    queries = [
        ("چرا اینترنت همراه اول گوشی وصل نمیشود؟", 0),
        ("چطوری ویپ مورد نظرمو پیدا کنم؟", 1),
        ("شاه عباس که بود؟", 2),
        ("خلیفه سلطان که بود و چه کرد؟", 3),
        ("کربن اکتیو و کربن بلک چه هستند و چه تفاوتی دارند و برای چه استفاده میشن؟", 4),
        ("شرکت تکاپو صنعت نامی چه محصولاتی ارایه میدهد؟ چه چیزی این شرکت را منحصر به فرد میسازد؟ سهام این شرکت صعودی است یا نزولی؟", 5)
    ]
    
    with open( r'/home/masih/rag_data/response.txt', 'w', encoding='utf-8') as output_file: #repalce path
        for q_num, (query, doc_idx) in enumerate(queries):
            answer = answer_engine.generate_response(query, doc_idx)
            output_file.write(f"سوال {q_num+1} ({doc_idx+1}):\n{query}\n\nپاسخ:\n{answer}\n\n{'='*50}\n\n")
            print(f"پردازش سوال {q_num+1}/{len(queries)} تکمیل شد")

print("تمامی سوالات با موفقیت پردازش شدند!")

In [None]:
# AGENT
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import Ollama
import requests
from bs4 import BeautifulSoup

# 1. Add web browsing tool
def web_browser_tool(url: str) -> str:
    """Fetch webpage content (5000 character limit)"""
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Clean HTML
        for element in soup(['script', 'style', 'header', 'footer', 'nav']):
            element.decompose()
            
        text = soup.get_text(separator='\n', strip=True)
        return text[:5000]  # Prevent token overflow
    except Exception as e:
        return f"Error accessing website: {str(e)}"

# 2. Agent  
class AgentEnhancedGenerator:
    def __init__(self, rag_system):
        self.rag = rag_system  # Use RAG 
        self.llm = Ollama(model=LLM_MODEL)
        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        
        # Define tools
        self.tools = [
            Tool(
                name="Document_Search",
                func=self._document_search,  # Directly use RAG
                description="For questions about mobile networks, historical figures, materials, or companies"
            ),
            Tool(
                name="Web_Browser",
                func=web_browser_tool,
                description="For live web data or current information"
            )
        ]
        
        # Init agent
        self.agent = initialize_agent(
            tools=self.tools,
            llm=self.llm,
            agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
            memory=self.memory,
            verbose=True,
            handle_parsing_errors=True,
            max_iterations=3  # Prevent infinite loops
        )
    
    def _document_search(self, query: str) -> str:
        """Search across ALL documents in your existing RAG"""
        results = self.rag.vector_db.similarity_search(query, k=5)
        return "\n".join([doc.page_content for doc in results])
    
    def generate_response(self, question):
        """Generate answer using agent system"""
        try:
            response = self.agent.run(question)
            return response
        except Exception as e:
            return f"خطا در پردازش: {str(e)}"

# 3. Example
if __name__ == "__main__":
    # Init RAG
    rag_system = ChromaRAGSystem()
    
    if not os.path.exists(CHROMA_PERSIST_DIR):
        rag_system.build_vector_store()
    else:
        rag_system.load_vector_store()
    
    # Create agent-enhanced generator
    enhanced_agent = AgentEnhancedGenerator(rag_system)
    
    # Test questions (mix of document and web queries)
    test_questions = [
        "آخرین اخبار درباره شرکت تکاپو صنعت چیست؟",  # Will use web browser
        "تفاوت کربن اکتیو و کربن بلک چیست؟",  # Uses document search
        "آیا شاه عباس با خلیفه سلطان همکاری داشت؟"  # Uses conversation memory
    ]
    # Run queries
    with open(r'~\Desktop\agent_results.txt', 'w', encoding='utf-8') as output_file:
        for idx, question in enumerate(test_questions):
            answer = enhanced_agent.generate_response(question)
            output_file.write(f"سوال {idx+1}:\n{question}\n\nپاسخ:\n{answer}\n\n{'='*50}\n\n")
            print(f"پردازش سوال {idx+1}/{len(test_questions)} تکمیل شد")