RobbiePasquale
/

lightbulb

Model card Files Files and versions Community

RobbiePasquale commited on Oct 10, 2024

Commit

e1392d6

verified ·

1 Parent(s): adebc30

Upload 20 files

Browse files

Files changed (20) hide show

ToTSearch.py +219 -0
agent.py +1082 -0
lightbulb.py +1696 -0
main_menu.py +61 -0
mcts.py +225 -0
my_search_engine/my_search_engine/__init__.py +0 -0
my_search_engine/my_search_engine/__pycache__/__init__.cpython-312.pyc +0 -0
my_search_engine/my_search_engine/__pycache__/items.cpython-312.pyc +0 -0
my_search_engine/my_search_engine/items.py +10 -0
my_search_engine/my_search_engine/middlewares.py +56 -0
my_search_engine/my_search_engine/pipelines.py +53 -0
my_search_engine/my_search_engine/settings.py +49 -0
my_search_engine/my_search_engine/spiders/__init__.py +4 -0
my_search_engine/my_search_engine/spiders/__pycache__/__init__.cpython-312.pyc +0 -0
my_search_engine/my_search_engine/spiders/__pycache__/search_spider.cpython-312.pyc +0 -0
my_search_engine/my_search_engine/spiders/search_spider.py +176 -0
my_search_engine/scrapy.cfg +11 -0
ranking.py +239 -0
test_agent.py +148 -0
train_agent.py +116 -0

ToTSearch.py ADDED Viewed

	@@ -0,0 +1,219 @@

+# ToTSearch.py
+import random
+from typing import List, Dict, Any, Generator
+from sentence_transformers import SentenceTransformer, util
+import torch
+import numpy as np
+from twisted.internet import defer
+from agent import AutonomousWebAgent
+from mcts import MCTS, MCTSNode
+import logging
+from twisted.internet.defer import Deferred
+logger = logging.getLogger(__name__)
+class ToTNode:
+    def __init__(self, thought, parent=None):
+        self.thought = thought
+        self.parent = parent
+        self.children = []
+        self.visits = 0
+        self.value = 0
+        self.search_results = []
+        self.mcts_node = None
+    def add_child(self, child_thought):
+        child = ToTNode(child_thought, self)
+        self.children.append(child)
+        return child
+    def update(self, reward):
+        self.visits += 1
+        self.value += reward
+class ToTSearch:
+    def __init__(self, agent: AutonomousWebAgent, model='all-MiniLM-L6-v2', max_depth=3, num_thoughts=3, num_simulations=100):
+        self.agent = agent
+        self.model = SentenceTransformer(model)
+        self.max_depth = max_depth
+        self.num_thoughts = num_thoughts
+        self.num_simulations = num_simulations
+        self.mcts = MCTS(initial_state="", num_simulations=num_simulations)
+    def generate_thoughts(self, query: str) -> List[str]:
+        prompt = f"""Given the query "{query}", generate {self.num_thoughts} distinct thoughts or approaches to address it.
+        Each thought should be a complete sentence and offer a unique perspective or solution path."""
+        thoughts = self.agent.generate_text(prompt).split('\n')
+        return [thought.strip() for thought in thoughts if thought.strip()]
+    def expand_thought(self, thought: str) -> List[str]:
+        prompt = f"""Expand on the following thought: "{thought}"
+        Generate {self.num_thoughts} more specific sub-thoughts or considerations.
+        Each sub-thought should be a complete sentence and offer additional detail or a new angle."""
+        expansions = self.agent.generate_text(prompt).split('\n')
+        return [exp.strip() for exp in expansions if exp.strip()]
+    def evaluate_thought(self, thought: str, query: str) -> float:
+        thought_embedding = self.model.encode(thought)
+        query_embedding = self.model.encode(query)
+        return util.pytorch_cos_sim(thought_embedding, query_embedding).item()
+    @defer.inlineCallbacks
+    def search_and_augment(self, thought: str) -> Generator[Deferred, Any, List[Dict[str, Any]]]:
+        search_results = yield self.agent.retrieve_from_web(thought)
+        for result in search_results:
+            result['originating_thought'] = thought
+        defer.returnValue(search_results)
+    def select(self, node: ToTNode) -> ToTNode:
+        while node.children:
+            # Choose a node with zero visits or select based on the value/visits ratio
+            if any(child.visits == 0 for child in node.children):
+                zero_visit_nodes = [child for child in node.children if child.visits == 0]
+                selected_node = random.choice(zero_visit_nodes)
+                logger.debug(f"Selected node with 0 visits: {selected_node.thought}")
+                return selected_node
+            else:
+                selected_node = max(node.children, key=lambda child: (child.value / child.visits) if child.visits > 0 else float('-inf'))
+                logger.debug(f"Selected node based on value/visits ratio: {selected_node.thought}, value: {selected_node.value}, visits: {selected_node.visits}")
+                return selected_node
+        return node
+    def expand(self, node: ToTNode, query: str) -> ToTNode:
+        if not node.children and len(node.thought.split()) > 2:
+            expansions = self.expand_thought(node.thought)
+            for expansion in expansions:
+                node.add_child(expansion)
+        return random.choice(node.children) if node.children else node
+    @defer.inlineCallbacks
+    def simulate(self, node: ToTNode, query: str):
+        current_node = node
+        depth = 0
+        while depth < self.max_depth:
+            if not current_node.children:
+                break
+            current_node = random.choice(current_node.children)
+            depth += 1
+        logger.debug(f"Simulating for thought: {current_node.thought}")
+        search_results = yield self.search_and_augment(current_node.thought)
+        current_node.search_results = search_results
+        logger.debug(f"Search results count: {len(search_results)}")
+        ranked_results = self.agent.calculate_reward(current_node.thought, query)
+        logger.debug(f"Ranked results: {ranked_results}")
+        mcts_node = MCTSNode(current_node.thought)
+        current_node.mcts_node = mcts_node
+        mcts_total_reward = 0
+        for _ in range(self.num_simulations):
+            mcts_reward = yield self.mcts.simulate(mcts_node)
+            mcts_total_reward += mcts_reward
+            self.mcts.backpropagate(mcts_node, mcts_reward)
+        logger.debug(f"MCTS node visits: {mcts_node.visits}, total reward: {mcts_total_reward}")
+        if mcts_node.visits == 0 or ranked_results == 0:
+            logger.warning(f"Avoiding division by zero. MCTS visits: {mcts_node.visits}, Ranked results: {ranked_results}")
+            combined_reward = 0
+        else:
+            combined_reward = (ranked_results + mcts_value) / 2
+        if mcts_node.visits > 0:
+            mcts_value = mcts_total_reward / mcts_node.visits
+            logger.debug(f"MCTS value: {mcts_value}")
+        else:
+            mcts_value = 0
+            logger.warning(f"MCTS node has 0 visits, assigning value 0")
+        combined_reward = (ranked_results + mcts_value) / 2
+        logger.debug(f"Combined reward: {combined_reward}")
+        defer.returnValue(combined_reward)
+    def backpropagate(self, node: ToTNode, reward: float):
+        while node:
+            node.update(reward)
+            node = node.parent
+    @defer.inlineCallbacks
+    def tot_search(self, query: str) -> Generator[Deferred, Any, ToTNode]:
+        root = ToTNode(query)
+        for _ in range(self.num_simulations):
+            node = self.select(root)
+            node = self.expand(node, query)
+            reward = yield self.simulate(node, query)
+            self.backpropagate(node, reward)
+            # Update agent's experience replay
+            state = self.agent.extract_features(node.thought, query)
+            next_state = self.agent.extract_features(node.children[0].thought if node.children else node.thought, query)
+            self.agent.remember_worker(state, 0, reward, next_state, False)
+            # Perform agent's replay to update RL models
+            self.agent.replay_worker()
+            self.agent.replay_manager()
+        defer.returnValue(root)
+    def get_best_path(self, root: ToTNode) -> List[str]:
+        path = [root.thought]
+        current = root
+        while current.children:
+            current = max(current.children, key=lambda child: child.value / child.visits if child.visits > 0 else float('-inf'))
+            path.append(current.thought)
+        return path
+    @defer.inlineCallbacks
+    def synthesize_results(self, root: ToTNode, query: str) -> Generator[Deferred, Any, str]:
+        best_path = self.get_best_path(root)
+        all_results = []
+        def collect_results(node):
+            all_results.extend(node.search_results)
+            for child in node.children:
+                collect_results(child)
+        collect_results(root)
+        # Sort results by relevance
+        all_results.sort(key=lambda x: self.evaluate_thought(x['content'], query), reverse=True)
+        # Generate a summary of the top results
+        top_results = all_results[:5]  # Adjust the number as needed
+        summary_prompt = f"Synthesize the following information into a coherent answer for the query '{query}':\n\n"
+        summary_prompt += f"Thought path: {' -> '.join(best_path)}\n\n"
+        for result in top_results:
+            summary_prompt += f"- {result['content'][:200]}...\n"
+        # Use the agent's RAG capabilities for final answer generation
+        final_answer = yield self.agent.generate_rag_response(query, top_results)
+        # Save the generated answer and thought path to the agent's knowledge base
+        self.agent.add_document_to_kb(
+            title=f"ToT Search Result: {query}",
+            content=final_answer,
+            metadata={"thought_path": best_path}
+        )
+        defer.returnValue(final_answer)
+    @defer.inlineCallbacks
+    def search(self, query: str) -> Generator[Deferred, Any, str]:
+        logger.info(f"Starting ToT search for query: {query}")
+        root = yield self.tot_search(query)
+        final_answer = yield self.synthesize_results(root, query)
+        logger.info(f"ToT search completed for query: {query}")
+        defer.returnValue(final_answer)
+# Usage example:
+# tot_search = ToTSearch(agent)
+# final_answer = yield tot_search.search("What are the latest advancements in renewable energy?")

agent.py ADDED Viewed

	@@ -0,0 +1,1082 @@

+# agent.py
+# agent.py
+import numpy as np
+from mcts import MCTS
+from ranking import train_ranking_model
+from bs4 import BeautifulSoup
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from collections import deque, OrderedDict
+import random
+from sklearn.metrics.pairwise import cosine_similarity
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+from sentence_transformers import SentenceTransformer
+import hashlib
+from twisted.internet import defer
+import logging
+import json
+import os
+from urllib.parse import urlparse
+logger = logging.getLogger(__name__)
+# ==========================
+# Prioritized Experience Replay
+# ==========================
+class SumTree:
+    """
+    SumTree data structure where the parent’s value is the sum of its children.
+    Leaf nodes contain the priorities of experiences.
+    """
+    def __init__(self, capacity):
+        self.capacity = capacity
+        self.tree = np.zeros(2 * capacity - 1)
+        self.data = np.zeros(capacity, dtype=object)
+        self.write = 0
+        self.n_entries = 0
+    def _propagate(self, idx, change):
+        parent = (idx - 1) // 2
+        self.tree[parent] += change
+        if parent != 0:
+            self._propagate(parent, change)
+    def _retrieve(self, idx, s):
+        left = 2 * idx + 1
+        right = left + 1
+        if left >= len(self.tree):
+            return idx
+        if s <= self.tree[left]:
+            return self._retrieve(left, s)
+        else:
+            return self._retrieve(right, s - self.tree[left])
+    def total(self):
+        return self.tree[0]
+    def add(self, p, data):
+        idx = self.write + self.capacity - 1
+        self.data[self.write] = data
+        self.update(idx, p)
+        self.write += 1
+        if self.write >= self.capacity:
+            self.write = 0
+        if self.n_entries < self.capacity:
+            self.n_entries += 1
+    def update(self, idx, p):
+        change = p - self.tree[idx]
+        self.tree[idx] = p
+        self._propagate(idx, change)
+    def get(self, s):
+        idx = self._retrieve(0, s)
+        data_idx = idx - self.capacity + 1
+        return (idx, self.tree[idx], self.data[data_idx])
+class PrioritizedReplayMemory:
+    def __init__(self, capacity, alpha=0.6):
+        self.tree = SumTree(capacity)
+        self.alpha = alpha  # [0,1] convert the importance of TD error to priority
+        self.epsilon = 1e-6  # small amount to avoid zero priority
+    def add(self, error, sample):
+        p = (np.abs(error) + self.epsilon) ** self.alpha
+        self.tree.add(p, sample)
+    def sample(self, batch_size, beta=0.4):
+        batch = []
+        idxs = []
+        segment = self.tree.total() / batch_size
+        priorities = []
+        for i in range(batch_size):
+            a = segment * i
+            b = segment * (i + 1)
+            s = random.uniform(a, b)
+            idx, p, data = self.tree.get(s)
+            batch.append(data)
+            idxs.append(idx)
+            priorities.append(p)
+        total = self.tree.total()
+        probs = priorities / total
+        weights = (self.tree.n_entries * probs) ** (-beta)
+        weights /= weights.max()
+        return batch, idxs, weights
+    def update(self, idx, error):
+        p = (np.abs(error) + self.epsilon) ** self.alpha
+        self.tree.update(idx, p)
+# ==========================
+# Hierarchical Reinforcement Learning (HRL)
+# ==========================
+class ManagerModel(nn.Module):
+    """
+    High-level policy model (Manager) that decides which option to execute.
+    """
+    def __init__(self, input_size, hidden_size, num_options):
+        super(ManagerModel, self).__init__()
+        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
+        self.fc = nn.Linear(hidden_size, num_options)
+        self.layer_norm = nn.LayerNorm(hidden_size)
+    def forward(self, x, hidden=None):
+        if x.dim() == 2:
+            x = x.unsqueeze(1)  # Add a time dimension
+        out, hidden = self.lstm(x, hidden)
+        last_output = out[:, -1, :]
+        last_output = self.layer_norm(last_output)
+        option_scores = self.fc(last_output)
+        return option_scores, hidden
+class WorkerModel(nn.Module):
+    """
+    Low-level policy model (Worker) that executes actions based on the selected option.
+    """
+    def __init__(self, input_size, hidden_size, action_size):
+        super(WorkerModel, self).__init__()
+        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
+        self.fc = nn.Linear(hidden_size, action_size)
+        self.layer_norm = nn.LayerNorm(hidden_size)
+        self.action_size = action_size  # Store action_size for reference
+    def forward(self, x, hidden=None):
+        if x.dim() == 2:
+            x = x.unsqueeze(1)  # Add a time dimension
+        out, hidden = self.lstm(x, hidden)
+        last_output = out[:, -1, :]
+        last_output = self.layer_norm(last_output)
+        action_scores = self.fc(last_output)
+        return action_scores, hidden
+    def act(self, state, epsilon=0.1):
+        """
+        Selects an action using epsilon-greedy policy.
+        """
+        if random.random() < epsilon:
+            action = random.randint(0, self.action_size - 1)
+            return action
+        state = torch.FloatTensor(state).unsqueeze(0).to(next(self.parameters()).device)
+        with torch.no_grad():
+            action_scores, _ = self(state)
+            action = torch.argmax(action_scores, dim=1).item()
+        return action
+# ==========================
+# RAGSummarizer Class
+# ==========================
+class RAGSummarizer:
+    def __init__(self, model_name='gpt2', embedding_model='all-MiniLM-L6-v2',
+                 max_length=150, cache_capacity=100, persistent_cache_path='rag_cache.json'):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+        self.model = GPT2LMHeadModel.from_pretrained(model_name).to(self.device)
+        # Explicitly set the device for SentenceTransformer
+        self.embedding_model = SentenceTransformer(embedding_model, device=self.device)
+        self.max_length = max_length
+        self.cache = LRUCache(cache_capacity)
+        self.persistent_cache_path = persistent_cache_path
+        self.load_persistent_cache()
+    def load_persistent_cache(self):
+        if os.path.exists(self.persistent_cache_path):
+            with open(self.persistent_cache_path, 'r', encoding='utf-8') as f:
+                try:
+                    persistent_data = json.load(f)
+                    for key, value in persistent_data.items():
+                        self.cache.put(key, value)
+                    logger.info(f"Loaded persistent cache with {len(persistent_data)} entries.")
+                except json.JSONDecodeError:
+                    logger.warning("Persistent cache file is corrupted. Initializing empty cache.")
+        else:
+            logger.info("No persistent cache found. Starting with empty cache.")
+    def save_persistent_cache(self):
+        with open(self.persistent_cache_path, 'w', encoding='utf-8') as f:
+            json.dump(self.cache.cache, f, indent=2)
+        logger.info(f"Saved persistent cache with {len(self.cache.cache)} entries.")
+    def save_rag_data(self, query, chunks, embeddings):
+        data = {
+            "query": query,
+            "chunks": chunks,
+            "embeddings": embeddings.tolist()
+        }
+        os.makedirs("rag_data", exist_ok=True)
+        filename = f"rag_data/{hash(query)}.json"
+        with open(filename, 'w') as f:
+            json.dump(data, f, indent=2)
+        logger.info(f"Saved RAG data to {filename}")
+    def split_into_chunks(self, text, chunk_size=200):
+        words = text.split()
+        return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
+    def retrieve_relevant_chunks(self, query, chunks, embeddings, top_k=3):
+        if embeddings.size(0) == 0:
+            logger.warning("Embeddings are empty. Cannot retrieve relevant chunks.")
+            return []
+        query_embedding = self.embedding_model.encode([query], convert_to_tensor=True)
+        cosine_scores = cosine_similarity(query_embedding.cpu().numpy(), embeddings.cpu().numpy())[0]
+        top_indices = cosine_scores.argsort()[-top_k:][::-1]
+        # Ensure indices are within bounds
+        top_indices = [idx for idx in top_indices if idx < len(chunks)]
+        return [chunks[i] for i in top_indices]
+    def get_embeddings(self, chunks):
+        # Implement batch processing
+        batch_size = 32
+        embeddings = []
+        for i in range(0, len(chunks), batch_size):
+            batch = chunks[i:i+batch_size]
+            batch_embeddings = self.embedding_model.encode(batch, convert_to_tensor=True)
+            embeddings.append(batch_embeddings)
+        if embeddings:
+            return torch.cat(embeddings, dim=0)
+        else:
+            return torch.tensor([])
+    def generate_summary(self, query, relevant_chunks):
+        cache_key = hashlib.md5((query + ''.join(relevant_chunks)).encode()).hexdigest()
+        cached_summary = self.cache.get(cache_key)
+        if cached_summary:
+            return cached_summary
+        context = " ".join(relevant_chunks)
+        prompt = f"Summarize the following content in relation to '{query}': {context}\n\nSummary:"
+        input_ids = self.tokenizer.encode(prompt, return_tensors='pt').to(self.device)
+        try:
+            output = self.model.generate(
+                input_ids,
+                max_length=input_ids.shape[1] + self.max_length,
+                num_return_sequences=1,
+                no_repeat_ngram_size=2,
+                top_k=50,
+                top_p=0.95,
+                temperature=0.7,
+                early_stopping=True
+            )
+        except Exception as e:
+            logger.error(f"Error during summary generation: {str(e)}")
+            return "Summary generation failed."
+        self.save_rag_data(query, relevant_chunks, self.get_embeddings(relevant_chunks))
+        summary = self.tokenizer.decode(output[0], skip_special_tokens=True)
+        summary = summary.split("Summary:")[-1].strip()
+        self.cache.put(cache_key, summary)
+        self.save_persistent_cache()
+        return summary
+# ==========================
+# WorldModel Class
+# ==========================
+class WorldModel(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout=0.3):
+        super(WorldModel, self).__init__()
+        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers,
+                            batch_first=True, dropout=dropout)
+        self.fc = nn.Linear(hidden_size, output_size)
+        self.value_head = nn.Linear(hidden_size, 1)
+        self.layer_norm = nn.LayerNorm(hidden_size)
+    def forward(self, x, hidden=None):
+        if x.dim() == 2:
+            x = x.unsqueeze(1)  # Add a time dimension
+        out, hidden = self.lstm(x, hidden)
+        last_output = out[:, -1, :]
+        last_output = self.layer_norm(last_output)
+        action_scores = self.fc(last_output)
+        state_value = self.value_head(last_output)
+        return action_scores, state_value, hidden
+# ==========================
+# Manager and Worker Classes for HRL
+# ==========================
+class Manager:
+    def __init__(self, state_size, num_options, hidden_size=128, learning_rate=0.001, gamma=0.99,
+                 epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, memory_capacity=1000, device=torch.device("cpu")):
+        self.state_size = state_size
+        self.num_options = num_options
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.epsilon_decay = epsilon_decay
+        self.epsilon_min = epsilon_min
+        self.device = device
+        self.model = ManagerModel(state_size, hidden_size, num_options).to(self.device)
+        self.target_model = ManagerModel(state_size, hidden_size, num_options).to(self.device)
+        self.optimizer = optim.AdamW(self.model.parameters(), lr=learning_rate, weight_decay=1e-5)
+        self.loss_fn = nn.MSELoss()
+        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience=5, factor=0.5, verbose=True)
+        self.memory = PrioritizedReplayMemory(capacity=memory_capacity, alpha=0.6)
+        self.update_target_model()
+    def update_target_model(self):
+        self.target_model.load_state_dict(self.model.state_dict())
+    def remember(self, state, option, reward, next_state, done, td_error):
+        sample = (state, option, reward, next_state, done)
+        self.memory.add(td_error, sample)
+    def act(self, state):
+        if random.random() < self.epsilon:
+            option = random.randint(0, self.num_options - 1)
+            return option
+        state = torch.FloatTensor(state).unsqueeze(0).to(self.model.lstm.weight.device)
+        with torch.no_grad():
+            option_scores, _ = self.model(state)
+            option = torch.argmax(option_scores).item()
+        return option
+    def replay(self, batch_size, beta=0.4):
+        if self.memory.tree.n_entries < batch_size:
+            return
+        batch, idxs, weights = self.memory.sample(batch_size, beta)
+        states, options, rewards, next_states, dones = zip(*batch)
+        states = torch.FloatTensor(states).to(self.model.lstm.weight.device)
+        next_states = torch.FloatTensor(next_states).to(self.model.lstm.weight.device)
+        options = torch.LongTensor(options).unsqueeze(1).to(self.model.lstm.weight.device)
+        rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.model.lstm.weight.device)
+        dones = torch.FloatTensor(dones).unsqueeze(1).to(self.model.lstm.weight.device)
+        weights = torch.FloatTensor(weights).unsqueeze(1).to(self.model.lstm.weight.device)
+        # Current Q values
+        current_q_values, _ = self.model(states)
+        current_q_values = current_q_values.gather(1, options)
+        # Target Q values
+        with torch.no_grad():
+            next_q_values, _ = self.target_model(next_states)
+            max_next_q_values = next_q_values.max(1)[0].unsqueeze(1)
+            target_q_values = rewards + (self.gamma * max_next_q_values * (1 - dones))
+        # Compute TD errors
+        td_errors = target_q_values - current_q_values
+        # Compute loss with importance-sampling weights
+        loss = (td_errors.pow(2) * weights).mean()
+        # Optimize the model
+        self.optimizer.zero_grad()
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+        self.optimizer.step()
+        self.scheduler.step(loss.item())
+        # Update priorities
+        td_errors_np = td_errors.detach().cpu().numpy().squeeze()
+        for idx, td_error in zip(idxs, td_errors_np):
+            self.memory.update(idx, np.abs(td_error))
+        # Decay epsilon
+        if self.epsilon > self.epsilon_min:
+            self.epsilon *= self.epsilon_decay
+# ==========================
+# AutonomousWebAgent Class
+# ==========================
+def truncate_text(text, max_length=1024):
+    tokens = text.split()
+    if len(tokens) > max_length:
+        return ' '.join(tokens[:max_length])
+    return text
+class AutonomousWebAgent:
+    def __init__(self, state_size, action_size, num_options, hidden_size=64, learning_rate=0.001,
+                 gamma=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01,
+                 knowledge_base_path='knowledge_base.json'):
+        self.state_size = state_size
+        self.action_size = action_size
+        self.num_options = num_options  # Number of high-level options for HRL
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.epsilon_decay = epsilon_decay
+        self.epsilon_min = epsilon_min
+        # Initialize RAGSummarizer first to get the device
+        self.summarizer = RAGSummarizer()
+        self.device = self.summarizer.device
+        # Initialize SentenceTransformer with the correct device
+        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device=self.device)
+        # Low-level (Worker) Model
+        self.worker_model = WorldModel(state_size, hidden_size, action_size).to(self.device)
+        self.worker_target_model = WorldModel(state_size, hidden_size, action_size).to(self.device)
+        self.worker_optimizer = optim.AdamW(self.worker_model.parameters(), lr=learning_rate, weight_decay=1e-5)
+        self.worker_loss_fn = nn.MSELoss()
+        self.worker_scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.worker_optimizer, 'min', patience=5, factor=0.5, verbose=True)
+        self.worker_memory = PrioritizedReplayMemory(capacity=2000, alpha=0.6)
+        self.update_worker_target_model()
+        # High-level (Manager) Model
+        self.manager = Manager(state_size, num_options, hidden_size=128, learning_rate=learning_rate,
+                               gamma=gamma, epsilon=epsilon, epsilon_decay=epsilon_decay,
+                               epsilon_min=epsilon_min, memory_capacity=1000, device=self.device)
+        self.mcts = MCTS(initial_state="")
+        logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}")
+        self.site_performance = {}  # {(site, query): performance_score}
+        # List of all search sites (base URLs without the query)
+        self.all_search_sites = [
+            "https://en.wikibooks.org/w/index.php?search=",
+            "https://en.wikiversity.org/w/index.php?search=",
+            "https://commons.wikimedia.org/w/index.php?search=",
+            "https://stackexchange.com/search?q=",
+            "https://arxiv.org/search/?query=",
+            "https://www.ncbi.nlm.nih.gov/pmc/?term=",
+            "https://www.gutenberg.org/ebooks/search/?query=",
+            "https://openlibrary.org/search?q=",
+            "https://doaj.org/search/articles?ref=homepage&q=",
+            "https://www.ted.com/search?q=",
+            "https://en.citizendium.org/wiki?search=",
+            "https://www.jstor.org/action/doBasicSearch?Query=",
+            "https://archive.org/search.php?query=",
+            "https://search.scielo.org/?q=",
+            "https://paperswithcode.com/search?q=",
+            "https://www.reddit.com/search/?q=",
+            "https://huggingface.co/models?search=",
+            "https://huggingface.co/datasets?search=",
+            "https://machinelearningmastery.com/?s=",
+            "https://www.kaggle.com/search?q=",
+            "https://towardsdatascience.com/search?q=",
+            "https://github.com/search?q=",
+            "https://stackoverflow.com/search?q=",
+            "https://www.youtube.com/results?search_query=",
+            "https://www.slideshare.net/search/slideshow?searchfrom=header&q="
+        ]
+        # Initialize Knowledge Base
+        self.knowledge_base_path = knowledge_base_path
+        self.knowledge_base = []
+        self.kb_embeddings = None
+        self.load_knowledge_base()
+        # Additional Features for State Representation
+        self.additional_features = ['image_count', 'script_count', 'css_count']
+    def save(self, filename):
+        """Save the entire agent state."""
+        state = {
+            'worker_model': self.worker_model.state_dict(),
+            'manager_model': self.manager.model.state_dict(),
+            'worker_optimizer': self.worker_optimizer.state_dict(),
+            'manager_optimizer': self.manager.optimizer.state_dict(),
+            'epsilon': self.epsilon
+        }
+        torch.save(state, filename)
+        logger.info(f"Saved agent state to {filename}")
+    def load(self, filename):
+        """Load the entire agent state."""
+        state = torch.load(filename, map_location=self.device)
+        self.worker_model.load_state_dict(state['worker_model'])
+        self.manager.model.load_state_dict(state['manager_model'])
+        self.worker_optimizer.load_state_dict(state['worker_optimizer'])
+        self.manager.optimizer.load_state_dict(state['manager_optimizer'])
+        self.epsilon = state['epsilon']
+        logger.info(f"Loaded agent state from {filename}")
+    # ==========================
+    # Text Generation
+    # ==========================
+    def generate_text(self, prompt):
+        # Use the RAGSummarizer to generate text
+        chunks = self.summarizer.split_into_chunks(prompt)
+        embeddings = self.summarizer.get_embeddings(chunks)
+        relevant_chunks = self.summarizer.retrieve_relevant_chunks(query=prompt, chunks=chunks, embeddings=embeddings)
+        generated_text = self.summarizer.generate_summary(prompt, relevant_chunks)
+        return generated_text
+    # ==========================
+    # Knowledge Base Management
+    # ==========================
+    def load_knowledge_base(self):
+        if not os.path.exists(self.knowledge_base_path):
+            logger.warning(f"Knowledge base file {self.knowledge_base_path} does not exist. Initializing empty KB.")
+            self.knowledge_base = []
+            self.kb_embeddings = torch.tensor([]).to(self.device)
+            return
+        with open(self.knowledge_base_path, 'r', encoding='utf-8') as f:
+            self.knowledge_base = json.load(f)
+        if self.knowledge_base:
+            texts = [doc['content'] for doc in self.knowledge_base]
+            self.kb_embeddings = self.embedding_model.encode(texts, convert_to_tensor=True)
+            logger.info(f"Loaded {len(self.knowledge_base)} documents into the knowledge base.")
+        else:
+            self.kb_embeddings = torch.tensor([]).to(self.device)
+            logger.info("Knowledge base is empty.")
+    def save_knowledge_base(self):
+        with open(self.knowledge_base_path, 'w', encoding='utf-8') as f:
+            json.dump(self.knowledge_base, f, indent=2)
+        logger.info(f"Knowledge base saved with {len(self.knowledge_base)} documents.")
+    def add_document_to_kb(self, title, content, metadata=None):
+        document = {
+            "title": title,
+            "content": content,
+            "metadata": metadata or {}
+        }
+        self.knowledge_base.append(document)
+        # Update embeddings
+        new_embedding = self.embedding_model.encode([content], convert_to_tensor=True).to(self.device)
+        if self.kb_embeddings.numel() == 0:
+            self.kb_embeddings = new_embedding
+        else:
+            self.kb_embeddings = torch.cat([self.kb_embeddings, new_embedding], dim=0)
+        # Save to knowledge base
+        self.save_knowledge_base()
+        logger.info(f"Added new document to knowledge base: {title}")
+    def retrieve_from_kb(self, query, top_k=5):
+        if not self.knowledge_base:
+            logger.warning("Knowledge base is empty. No documents to retrieve.")
+            return []
+        query_embedding = self.embedding_model.encode([query], convert_to_tensor=True).to(self.device)
+        if self.kb_embeddings is None or self.kb_embeddings.numel() == 0:
+            logger.warning("Knowledge base embeddings are empty. No documents to retrieve.")
+            return []
+        if query_embedding.size(1) != self.kb_embeddings.size(1):
+            logger.error("Dimension mismatch between query embedding and KB embeddings.")
+            return []
+        cosine_scores = cosine_similarity(query_embedding.cpu().numpy(), self.kb_embeddings.cpu().numpy())[0]
+        top_indices = cosine_scores.argsort()[-top_k:][::-1]
+        # Ensure indices are within the knowledge_base length
+        top_indices = [idx for idx in top_indices if idx < len(self.knowledge_base)]
+        retrieved_docs = []
+        for idx in top_indices:
+            doc = self.knowledge_base[idx]
+            doc['score'] = cosine_scores[idx]
+            retrieved_docs.append(doc)
+        logger.info(f"Retrieved top {len(retrieved_docs)} documents from Knowledge Base for the query.")
+        return retrieved_docs
+    # ==========================
+    # RAG Integration
+    # ==========================
+    def retrieve_from_web(self, query, top_k=5):
+        logger.info(f"Performing web search for query: {query}")
+        mcts_iterations = self.calculate_mcts_iterations(np.zeros(self.state_size, dtype=np.float32))
+        self.mcts = MCTS(initial_state=query, num_simulations=mcts_iterations)
+        try:
+            new_query = yield self.mcts.run()
+            logger.debug(f"New query from MCTS: {new_query}")
+            # Select search sites
+            search_sites = self.select_search_sites(new_query)
+            results = yield self.mcts.web_search(new_query, search_sites)
+            logger.debug(f"Web search completed. Found {len(results)} results")
+            return results[:top_k] if results else []
+        except Exception as e:
+            logger.error(f"Error during MCTS or web search: {str(e)}", exc_info=True)
+            return []
+    def combine_documents(self, kb_docs, web_docs):
+        combined = kb_docs + web_docs
+        logger.info(f"Combined {len(kb_docs)} KB documents and {len(web_docs)} Web documents.")
+        return combined
+    def save_llm_training_data(self, query, content, summary=None, link=None, title=None):
+        data = {
+            "query": query,
+            "search_result": {
+                "link": link,
+                "title": title
+            },
+            "content": content,
+            "description": summary
+        }
+        os.makedirs("llm_training_data", exist_ok=True)
+        file_path = "llm_training_data/llm_training_data.jsonl"
+        # Append the new data as a new line in the JSONL file
+        with open(file_path, 'a', encoding='utf-8') as f:
+            json.dump(data, f)
+            f.write('\n')
+        logger.info(f"Appended LLM training data to {file_path}")
+    # ==========================
+    # Hierarchical RL Integration
+    # ==========================
+    def remember_manager(self, state, option, reward, next_state, done, td_error):
+        self.manager.remember(state, option, reward, next_state, done, td_error)
+    def remember_worker(self, state, action, reward, next_state, done):
+        self.worker_memory.add(reward, (state, action, reward, next_state, done))
+    # ==========================
+    # Action Selection and Execution
+    # ==========================
+    def act_manager(self, state):
+        option = self.manager.act(state)
+        return option
+    def act_worker(self, state):
+        action = self.worker_model.act(state, epsilon=self.epsilon)
+        return action
+    # ==========================
+    # Replay Methods
+    # ==========================
+    def replay_manager(self, batch_size=32, beta=0.4):
+        self.manager.replay(batch_size, beta)
+    def replay_worker(self, batch_size=32, beta=0.4):
+        result = self.worker_memory.replay(batch_size, beta)
+        if result is None:
+            return
+        batch, idxs, weights = result
+        if len(self.worker_memory.tree.data) >= batch_size:
+            batch, idxs, weights = self.worker_memory.sample(batch_size, beta)
+            states, actions, rewards, next_states, dones = zip(*batch)
+            states = torch.FloatTensor(states).to(self.worker_model.lstm.weight.device)
+            next_states = torch.FloatTensor(next_states).to(self.worker_model.lstm.weight.device)
+            actions = torch.LongTensor(actions).unsqueeze(1).to(self.worker_model.lstm.weight.device)
+            rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.worker_model.lstm.weight.device)
+            dones = torch.FloatTensor(dones).unsqueeze(1).to(self.worker_model.lstm.weight.device)
+            weights = torch.FloatTensor(weights).unsqueeze(1).to(self.worker_model.lstm.weight.device)
+            # Current Q values
+            current_q_values, _ = self.worker_model(states)
+            current_q_values = current_q_values.gather(1, actions)
+            # Target Q values
+            with torch.no_grad():
+                next_q_values, _ = self.worker_target_model(next_states)
+                max_next_q_values = next_q_values.max(1)[0].unsqueeze(1)
+                target_q_values = rewards + (self.gamma * max_next_q_values * (1 - dones))
+            # Compute TD errors
+            td_errors = target_q_values - current_q_values
+            # Compute loss with importance-sampling weights
+            loss = (td_errors.pow(2) * weights).mean()
+            # Optimize the model
+            self.worker_optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.worker_model.parameters(), max_norm=1.0)
+            self.worker_optimizer.step()
+            self.worker_scheduler.step(loss.item())
+            # Update priorities
+            td_errors_np = td_errors.detach().cpu().numpy().squeeze()
+            for idx, td_error in zip(idxs, td_errors_np):
+                self.worker_memory.update(idx, np.abs(td_error))
+            # Decay epsilon
+            if self.epsilon > self.epsilon_min:
+                self.epsilon *= self.epsilon_decay
+                logger.debug(f"Updated epsilon to: {self.epsilon}")
+    # ==========================
+    # Load and Save Models
+    # ==========================
+    def load_worker_model(self, name):
+        self.worker_model.load_state_dict(torch.load(name, map_location=self.device))
+        logger.info(f"Loaded worker model weights from {name}")
+    def save_worker_model(self, name):
+        torch.save(self.worker_model.state_dict(), name)
+        logger.info(f"Saved worker model weights to {name}")
+    def load_manager_model(self, name):
+        self.manager.model.load_state_dict(torch.load(name, map_location=self.device))
+        self.manager.update_target_model()
+        logger.info(f"Loaded manager model weights from {name}")
+    def save_manager_model(self, name):
+        torch.save(self.manager.model.state_dict(), name)
+        logger.info(f"Saved manager model weights to {name}")
+    # ==========================
+    # Update Target Models
+    # ==========================
+    def update_worker_target_model(self):
+        self.worker_target_model.load_state_dict(self.worker_model.state_dict())
+        logger.info("Updated worker target model with current model weights")
+    def update_manager_target_model(self):
+        self.manager.update_target_model()
+        logger.info("Updated manager target model with current model weights")
+    # ==========================
+    # Feature Extraction
+    # ==========================
+    def extract_features(self, content, query):
+        content = truncate_text(content)
+        query = truncate_text(query)
+        soup = BeautifulSoup(content, 'html.parser')
+        text = soup.get_text()
+        word_count = len(text.split())
+        link_count = len(soup.find_all('a'))
+        header_count = len(soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']))
+        # Calculate semantic similarity
+        text_embedding = self.embedding_model.encode([text], convert_to_tensor=True).to(self.device)
+        query_embedding = self.embedding_model.encode([query], convert_to_tensor=True).to(self.device)
+        semantic_similarity = cosine_similarity(text_embedding.cpu().numpy(), query_embedding.cpu().numpy())[0][0]
+        # Additional Features
+        image_count = len(soup.find_all('img'))
+        script_count = len(soup.find_all('script'))
+        css_count = len(soup.find_all('link', rel='stylesheet'))
+        return np.array([word_count, link_count, header_count, semantic_similarity, image_count, script_count, css_count])
+    # ==========================
+    # Reward Calculation
+    # ==========================
+    def calculate_reward(self, content, query):
+        try:
+            ranked_results = train_ranking_model(query, [{'content': content}])
+            logger.debug(f"Ranked results: {ranked_results}")
+            if ranked_results and isinstance(ranked_results[0], dict) and 'predicted_score' in ranked_results[0]:
+                reward = max(ranked_results[0]['predicted_score'], 0)
+                logger.debug(f"Calculated reward: {reward}")
+                return reward
+            else:
+                logger.warning(f"Invalid ranked results: {ranked_results}")
+                return 0
+        except Exception as e:
+            logger.error(f"Error in calculate_reward: {str(e)}", exc_info=True)
+            return 0
+    # ==========================
+    # Search Site Selection
+    # ==========================
+    def select_search_sites(self, query, num_sites=5):
+        # Select top sites based on past performance for this query
+        site_scores = {}
+        for (site, q), score in self.site_performance.items():
+            if q == query:
+                site_scores[site] = site_scores.get(site, 0) + score
+        if site_scores:
+            sorted_sites = sorted(site_scores.items(), key=lambda x: x[1], reverse=True)
+            top_sites = [site for site, score in sorted_sites[:num_sites]]
+        else:
+            # If no past data, select random sites
+            top_sites = random.sample(self.all_search_sites, num_sites)
+        # Construct full URLs with query
+        search_sites = [site + query for site in top_sites]
+        return search_sites
+    # ==========================
+    # Search Method with HRL
+    # ==========================
+    @defer.inlineCallbacks
+    def search(self, query, max_steps=2):
+        logger.info(f"Starting search for query: {query}")
+        state = np.zeros(self.state_size, dtype=np.float32)
+        total_reward = 0
+        content = ""
+        done = False
+        results = None
+        try:
+            # High-Level: Manager selects an option
+            option = self.act_manager(state)
+            logger.debug(f"Manager selected option: {option}")
+            # Execute the selected option
+            if option == 0:  # Search Option
+                logger.debug("Executing Search Option")
+                results = yield self.retrieve_from_web(query)
+                if results:
+                    content = results[0]['content']
+                    site = urlparse(results[0]['link']).netloc
+                    self.save_llm_training_data(
+                        query,
+                        content,
+                        summary=results[0].get('summary'),
+                        link=results[0].get('link'),
+                        title=results[0].get('title')
+                    )
+                    self.add_document_to_kb(title=results[0].get('title', 'No Title'), content=content, metadata=results[0].get('meta', {}))
+                    next_state = self.extract_features(content, query)
+                    reward = self.calculate_reward(content, query)
+                    logger.debug(f"Extracted features: {next_state}, Reward: {reward}")
+                    # Update site performance
+                    key = (site, query)
+                    self.site_performance[key] = self.site_performance.get(key, 0) + reward
+                    # Remember Manager's experience
+                    self.remember_manager(state, option, reward, next_state, done, td_error=reward)
+                    # Remember Worker's experience
+                    self.remember_worker(state, 0, reward, next_state, done)
+                    state = next_state.astype(np.float32)
+                    total_reward += reward
+                else:
+                    reward = -1
+                    logger.warning(f"No results for query: {query}")
+                    # Remember Manager's experience
+                    self.remember_manager(state, option, reward, state, True, td_error=reward)
+            elif option == 1:  # Summarize Option
+                logger.debug("Executing Summarize Option")
+                if content:
+                    summary = self.summarizer.generate_summary(content, query)
+                    self.save_llm_training_data(
+                        query,
+                        content,
+                        summary=summary,
+                        link=results[0].get('link') if results else None,
+                        title=results[0].get('title') if results else None
+                    )
+                    reward = self.calculate_reward(summary, query)
+                    next_state = self.extract_features(summary, query)
+                    logger.info(f"Summary:\n{summary}")
+                    logger.info(f"Summarized content. Reward: {reward}")
+                    # Remember Manager's experience
+                    self.remember_manager(state, option, reward, next_state, done, td_error=reward)
+                    # Remember Worker's experience
+                    self.remember_worker(state, 1, reward, next_state, done)
+                    state = next_state.astype(np.float32)
+                    total_reward += reward
+                else:
+                    reward = -1
+                    logger.warning("No content to summarize")
+                    # Remember Manager's experience
+                    self.remember_manager(state, option, reward, state, True, td_error=reward)
+            elif option == 2:  # RAG-based Generation Option
+                logger.debug("Executing RAG-based Generation Option")
+                kb_docs = self.retrieve_from_kb(query, top_k=5)
+                web_docs = []  # Assuming web_docs are already retrieved
+                combined_docs = self.combine_documents(kb_docs, web_docs)
+                generated_output = self.generate_rag_response(query, combined_docs)
+                logger.info(f"Generated Output:\n{generated_output}")
+                self.save_llm_training_data(
+                    query,
+                    generated_output,
+                    summary=None,
+                    link=None,
+                    title="RAG-generated response"
+                )
+                reward = self.calculate_reward(generated_output, query)
+                next_state = self.extract_features(generated_output, query)
+                # Remember Manager's experience
+                self.remember_manager(state, option, reward, next_state, done, td_error=reward)
+                # Remember Worker's experience
+                self.remember_worker(state, 2, reward, next_state, done)
+                state = next_state.astype(np.float32)
+                total_reward += reward
+            else:
+                logger.warning(f"Unknown option selected by Manager: {option}")
+            # Perform replay for both Manager and Worker
+            self.replay_manager(batch_size=32, beta=0.4)
+            self.replay_worker(batch_size=32, beta=0.4)
+            # Update target models periodically
+            self.update_worker_target_model()
+            self.update_manager_target_model()
+            logger.info(f"Search completed. Total reward: {total_reward}")
+            defer.returnValue(total_reward)
+        except Exception as e:
+            logger.error(f"Error during search: {str(e)}", exc_info=True)
+            defer.returnValue(-1)  # Return a negative reward on error
+    # ==========================
+    # Summarization Method
+    # ==========================
+    def summarize(self, content, query):
+        chunks = self.summarizer.split_into_chunks(content)
+        embeddings = self.summarizer.get_embeddings(chunks)
+        relevant_chunks = self.summarizer.retrieve_relevant_chunks(query, chunks, embeddings)
+        summary = self.summarizer.generate_summary(query, relevant_chunks)
+        # Save RAG data
+        self.summarizer.save_rag_data(query, chunks, embeddings)
+        return summary
+    # ==========================
+    # MCTS Iterations Calculation
+    # ==========================
+    def calculate_mcts_iterations(self, state):
+        # Calculate MCTS iterations based on state complexity
+        base_iterations = 2
+        complexity_factor = np.mean(state) / 100  # Normalize state values
+        iterations = int(base_iterations * (1 + complexity_factor))
+        max_iterations = 5  # Set a reasonable maximum
+        return min(iterations, max_iterations)
+    # ==========================
+    # RAG-based Response Generation
+    # ==========================
+    def generate_rag_response(self, query, combined_docs):
+        if not combined_docs:
+            logger.warning("No documents available for RAG-based generation.")
+            return "I'm sorry, I couldn't find any relevant information."
+        # Prepare context for the generator
+        context = "\n\n".join([f"Title: {doc.get('title', 'No Title')}\nContent: {doc.get('content', '')}" for doc in combined_docs])
+        prompt = f"Query: {query}\n\nContext:\n{context}\n\nAnswer:"
+        # Check cache first
+        cache_key = hashlib.md5(prompt.encode()).hexdigest()
+        cached_response = self.summarizer.cache.get(cache_key)
+        if cached_response:
+            logger.debug("Using cached RAG response.")
+            return cached_response
+        # Generate response
+        input_ids = self.summarizer.tokenizer.encode(prompt, return_tensors='pt').to(self.summarizer.device)
+        try:
+            output = self.summarizer.model.generate(
+                input_ids,
+                max_length=input_ids.shape[1] + self.summarizer.max_length,
+                num_return_sequences=1,
+                no_repeat_ngram_size=2,
+                top_k=50,
+                top_p=0.95,
+                temperature=0.7,
+                early_stopping=True
+            )
+        except Exception as e:
+            logger.error(f"Error during RAG response generation: {str(e)}")
+            return "RAG response generation failed."
+        response = self.summarizer.tokenizer.decode(output[0], skip_special_tokens=True)
+        answer = response.split("Answer:")[-1].strip()
+        # Cache the response
+        self.summarizer.cache.put(cache_key, answer)
+        self.summarizer.save_persistent_cache()
+        return answer
+    # ==========================
+    # Manager and Worker Interaction
+    # ==========================
+    def select_option(self, option):
+        """
+        Define the mapping of options to their corresponding actions.
+        """
+        # This can be expanded based on the number of options
+        option_actions = {
+            0: self.perform_search,
+            1: self.perform_summarization,
+            2: self.perform_rag_generation
+        }
+        action = option_actions.get(option, None)
+        if action:
+            return action
+        else:
+            logger.error(f"No action defined for option: {option}")
+            return None
+    def perform_search(self, query):
+        """
+        Perform the search action.
+        """
+        # Implementation is handled in the 'search' method
+        pass
+    def perform_summarization(self, content, query):
+        """
+        Perform the summarization action.
+        """
+        # Implementation is handled in the 'summarize' method
+        pass
+    def perform_rag_generation(self, query, combined_docs):
+        """
+        Perform the RAG-based generation action.
+        """
+        # Implementation is handled in the 'generate_rag_response' method
+        pass
+# ==========================
+# LRUCache Class
+# ==========================
+class LRUCache:
+    def __init__(self, capacity):
+        self.cache = OrderedDict()
+        self.capacity = capacity
+    def get(self, key):
+        if key not in self.cache:
+            return None
+        self.cache.move_to_end(key)
+        return self.cache[key]
+    def put(self, key, value):
+        if key in self.cache:
+            self.cache.move_to_end(key)
+        self.cache[key] = value
+        if len(self.cache) > self.capacity:
+            self.cache.popitem(last=False)

lightbulb.py ADDED Viewed

	@@ -0,0 +1,1696 @@

+import argparse
+import math
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader
+import copy
+from torch.optim.lr_scheduler import CosineAnnealingLR
+from torch.cuda.amp import autocast, GradScaler
+from datasets import load_dataset
+from transformers import AutoTokenizer
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train or Inference with World Model and Tree of Thought.')
+    parser.add_argument('--model_name', type=str, default='gpt2', help='Pretrained model name or path')
+    parser.add_argument('--dataset_name', type=str, default='wikitext', help='Dataset name from HuggingFace Datasets')
+    parser.add_argument('--dataset_config', type=str, default='wikitext-2-raw-v1', help='Dataset configuration name')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size')
+    parser.add_argument('--num_epochs', type=int, default=3, help='Number of epochs')
+    parser.add_argument('--max_length', type=int, default=128, help='Maximum sequence length')
+    parser.add_argument('--mcts_iterations', type=int, default=3, help='Number of MCTS Iterations')
+    parser.add_argument('--mcts_exploration_constant', type=float, default=1.414, help='Exploration constant for MCTS')
+    parser.add_argument('--accumulation_steps', type=int, default=4, help='Gradient accumulation steps')
+    parser.add_argument('--learning_rate', type=float, default=1e-4, help='Learning rate')
+    parser.add_argument('--weight_decay', type=float, default=1e-2, help='Weight decay')
+    parser.add_argument('--alpha', type=float, default=0.1, help='Entropy regularization weight')
+    parser.add_argument('--beta', type=float, default=0.1, help='Variance regularization weight')
+    parser.add_argument('--max_grad_norm', type=float, default=1.0, help='Max gradient norm for clipping')
+    parser.add_argument('--save_dir', type=str, default='./models', help='Directory to save the models')
+    parser.add_argument('--temperature', type=float, default=1.0, help='Temperature parameter for entropy and variance')
+    parser.add_argument('--mode', type=str, choices=['train', 'inference'], default='train', help='Mode: train or inference')
+    parser.add_argument('--inference_mode', type=str, choices=['world_model', 'without_world_model', 'world_model_tree_of_thought'], default='world_model_tree_of_thought', help='Inference mode')
+    parser.add_argument('--query', type=str, default='', help='Input query for inference')
+    parser.add_argument('--train_mode', type=str, choices=['world_model', 'language_model'], default='world_model', help='Train world model or language model only')
+    # Use parse_known_args to ignore unknown arguments
+    args, unknown = parser.parse_known_args()
+    return args
+def load_data(args, tokenizer):
+    # Load the dataset
+    dataset = load_dataset(args.dataset_name, args.dataset_config)
+    # Ensure the tokenizer has a padding token
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    def tokenize_function(examples):
+        return tokenizer(examples['text'], truncation=True, max_length=args.max_length)
+    tokenized_datasets = dataset.map(
+        tokenize_function,
+        batched=True,
+        num_proc=4,
+        remove_columns=dataset['train'].column_names,
+    )
+    # Build inputs and labels for language modeling
+    block_size = args.max_length
+    def group_texts(examples):
+        # Concatenate all texts
+        concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+        total_length = len(concatenated_examples['input_ids'])
+        # We drop the small remainder
+        total_length = (total_length // block_size) * block_size
+        # Split by chunks of block_size
+        result = {
+            k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+            for k, t in concatenated_examples.items()
+        }
+        result['labels'] = result['input_ids'].copy()
+        return result
+    lm_datasets = tokenized_datasets.map(
+        group_texts,
+        batched=True,
+        num_proc=4,
+    )
+    # Create DataLoader
+    train_dataset = lm_datasets['train']
+    eval_dataset = lm_datasets['validation'] if 'validation' in lm_datasets else lm_datasets['test']
+    def data_collator(data):
+        return {
+            'input_ids': torch.tensor([f['input_ids'] for f in data], dtype=torch.long),
+            'labels': torch.tensor([f['labels'] for f in data], dtype=torch.long)
+        }
+    train_loader = DataLoader(
+        train_dataset,
+        shuffle=True,
+        batch_size=args.batch_size,
+        collate_fn=data_collator,
+        pin_memory=True,  # Speeds up transfer to GPU
+        num_workers=4
+    )
+    eval_loader = DataLoader(
+        eval_dataset,
+        shuffle=False,
+        batch_size=args.batch_size,
+        collate_fn=data_collator,
+        pin_memory=True,
+        num_workers=4
+    )
+    return train_loader, eval_loader
+def save_all_models(transformer_model, representation_network, dynamics_network, prediction_network, action_encoder, save_dir, epoch):
+    """
+    Save all models to the specified directory.
+    Args:
+        transformer_model (nn.Module): Transformer model.
+        representation_network (nn.Module): Representation network.
+        dynamics_network (nn.Module): Dynamics network.
+        prediction_network (nn.Module): Prediction network.
+        action_encoder (nn.Module): Action encoder.
+        save_dir (str): Directory to save the models.
+        epoch (int): Current epoch number.
+    """
+    os.makedirs(save_dir, exist_ok=True)
+    torch.save(transformer_model.state_dict(), os.path.join(save_dir, f'transformer_model_epoch_{epoch}.pt'))
+    torch.save(representation_network.state_dict(), os.path.join(save_dir, f'representation_network_epoch_{epoch}.pt'))
+    torch.save(dynamics_network.state_dict(), os.path.join(save_dir, f'dynamics_network_epoch_{epoch}.pt'))
+    torch.save(prediction_network.state_dict(), os.path.join(save_dir, f'prediction_network_epoch_{epoch}.pt'))
+    torch.save(action_encoder.state_dict(), os.path.join(save_dir, f'action_encoder_epoch_{epoch}.pt'))
+    print(f"All models saved for epoch {epoch}.")
+class RotaryPositionalEncoding(nn.Module):
+    def __init__(self, d_model):
+        super(RotaryPositionalEncoding, self).__init__()
+        inv_freq = 1.0 / (10000 ** (torch.arange(0, d_model, 2).float() / d_model))
+        self.register_buffer('inv_freq', inv_freq)
+    def forward(self, x):
+        seq_len, batch_size, _ = x.size()
+        t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq)
+        sinusoid_inp = torch.einsum("i,j->ij", t, self.inv_freq)
+        sin = sinusoid_inp.sin().unsqueeze(1)  # (seq_len, 1, d_model/2)
+        cos = sinusoid_inp.cos().unsqueeze(1)  # (seq_len, 1, d_model/2)
+        x1 = x[..., 0::2]
+        x2 = x[..., 1::2]
+        # Apply rotation
+        x_rotated = torch.zeros_like(x)
+        x_rotated[..., 0::2] = x1 * cos - x2 * sin
+        x_rotated[..., 1::2] = x1 * sin + x2 * cos
+        return x_rotated
+class MultiHeadAttention(nn.Module):
+    def __init__(self, d_model, num_heads):
+        super(MultiHeadAttention, self).__init__()
+        assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
+        self.d_k = d_model // num_heads
+        self.num_heads = num_heads
+        self.linear_q = nn.Linear(d_model, d_model)
+        self.linear_k = nn.Linear(d_model, d_model)
+        self.linear_v = nn.Linear(d_model, d_model)
+        self.linear_out = nn.Linear(d_model, d_model)
+    def forward(self, query, key, value, mask=None):
+        batch_size = query.size(0)
+        query = self.linear_q(query).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
+        key = self.linear_k(key).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
+        value = self.linear_v(value).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
+        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.d_k)
+        if mask is not None:
+            scores = scores.masked_fill(mask == 0, -1e4)
+        attn = F.softmax(scores, dim=-1)
+        output = torch.matmul(attn, value)
+        output = output.transpose(1, 2).contiguous().view(batch_size, -1, self.num_heads * self.d_k)
+        return self.linear_out(output)
+class MoE(nn.Module):
+    def __init__(self, d_model, num_experts, d_ff, top_k=2, dropout=0.1):
+        super(MoE, self).__init__()
+        self.num_experts = num_experts
+        self.top_k = top_k
+        self.experts = nn.ModuleList([
+            nn.Sequential(
+                nn.Linear(d_model, d_ff),
+                nn.GELU() if i % 2 == 0 else nn.SiLU(),
+                nn.Linear(d_ff, d_model)
+            )
+            for i in range(num_experts)
+        ])
+        self.gate = nn.Linear(d_model, num_experts)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        batch_size, seq_len, d_model = x.size()
+        # Compute gating scores
+        gate_scores = self.gate(x)  # (batch_size, seq_len, num_experts)
+        top_k_scores, top_k_indices = torch.topk(gate_scores, self.top_k, dim=-1)  # (batch_size, seq_len, top_k)
+        top_k_scores = F.softmax(top_k_scores, dim=-1)  # (batch_size, seq_len, top_k)
+        # Initialize output
+        output = torch.zeros_like(x)
+        # Flatten batch and sequence dimensions
+        x_flat = x.view(-1, d_model)  # (batch_size * seq_len, d_model)
+        output_flat = output.view(-1, d_model)
+        top_k_indices_flat = top_k_indices.view(-1, self.top_k)  # (batch_size * seq_len, top_k)
+        top_k_scores_flat = top_k_scores.view(-1, self.top_k)  # (batch_size * seq_len, top_k)
+        for k in range(self.top_k):
+            expert_idx_flat = top_k_indices_flat[:, k]  # (batch_size * seq_len)
+            expert_scores_flat = top_k_scores_flat[:, k]  # (batch_size * seq_len)
+            for e in range(self.num_experts):
+                mask = (expert_idx_flat == e)  # Boolean mask
+                if mask.any():
+                    x_masked = x_flat[mask]  # Select tokens for expert e
+                    expert_output = self.experts[e](x_masked)  # Apply expert e
+                    output_flat[mask] += expert_scores_flat[mask].unsqueeze(-1) * expert_output
+        output = output_flat.view(batch_size, seq_len, d_model)
+        return self.dropout(output)
+class TransformerBlock(nn.Module):
+    def __init__(self, d_model, num_heads, d_ff, num_experts, dropout=0.1, top_k=2):
+        super(TransformerBlock, self).__init__()
+        self.self_attention = MultiHeadAttention(d_model, num_heads)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.cross_attention = MultiHeadAttention(d_model, num_heads)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.moe = MoE(d_model, num_experts, d_ff, top_k, dropout)
+        self.norm3 = nn.LayerNorm(d_model)
+    def forward(self, x, mask=None, enc_output=None, enc_mask=None):
+        # Self-attention
+        attn_output = self.self_attention(x, x, x, mask)
+        x = self.norm1(x + attn_output)
+        # Cross-attention (only in decoder)
+        if enc_output is not None:
+            cross_attn_output = self.cross_attention(x, enc_output, enc_output, enc_mask)
+            x = self.norm2(x + cross_attn_output)
+        # Feedforward/MoE
+        moe_output = self.moe(x)
+        return self.norm3(x + moe_output)
+class Transformer(nn.Module):
+    def __init__(self, input_dim, d_model, num_heads, num_layers, d_ff, num_experts, output_dim, dropout=0.1, top_k=2):
+        super(Transformer, self).__init__()
+        self.embedding = nn.Embedding(input_dim, d_model, padding_idx=input_dim - 1)
+        self.rotary_positional_encoding = RotaryPositionalEncoding(d_model)
+        self.encoder_layers = nn.ModuleList(
+            [TransformerBlock(d_model, num_heads, d_ff, num_experts, dropout, top_k) for _ in range(num_layers)]
+        )
+        self.decoder_layers = nn.ModuleList(
+            [TransformerBlock(d_model, num_heads, d_ff, num_experts, dropout, top_k) for _ in range(num_layers)]
+        )
+        self.output_layer = nn.Linear(d_model, output_dim)
+        self.d_model = d_model
+    def forward(self, src, tgt, src_mask=None, tgt_mask=None):
+        # Encoder
+        src = self.embedding(src) * math.sqrt(self.d_model)
+        src = src.transpose(0, 1)  # (batch_size, seq_len, d_model) -> (seq_len, batch_size, d_model)
+        src = self.rotary_positional_encoding(src)
+        src = src.transpose(0, 1)  # (seq_len, batch_size, d_model) -> (batch_size, seq_len, d_model)
+        for layer in self.encoder_layers:
+            src = layer(src, src_mask)
+        # Decoder
+        tgt = self.embedding(tgt) * math.sqrt(self.d_model)
+        tgt = tgt.transpose(0, 1)
+        tgt = self.rotary_positional_encoding(tgt)
+        tgt = tgt.transpose(0, 1)
+        for layer in self.decoder_layers:
+            tgt = layer(tgt, tgt_mask, src, src_mask)
+        output = self.output_layer(tgt)
+        return output
+    def generate(self, src, tokenizer, max_length=20, temperature=1.0):
+        """
+        Generate sequences using differentiable sampling (Gumbel-Softmax).
+        Args:
+            src (torch.Tensor): Source input tensor of shape (batch_size, seq_len)
+            tokenizer (transformers.PreTrainedTokenizer): Tokenizer to access special tokens
+            max_length (int): Maximum length of the generated sequence
+            temperature (float): Temperature parameter for Gumbel-Softmax
+        Returns:
+            torch.Tensor: Generated sequences of shape (batch_size, max_length)
+            torch.Tensor: Entropy values for each time step
+            torch.Tensor: Variance values for each time step
+        """
+        batch_size = src.size(0)
+        # Encode the source
+        src_enc = self.embedding(src) * math.sqrt(self.d_model)
+        src_enc = src_enc.transpose(0, 1)
+        src_enc = self.rotary_positional_encoding(src_enc)
+        src_enc = src_enc.transpose(0, 1)
+        for layer in self.encoder_layers:
+            src_enc = layer(src_enc)
+        # Initialize decoder input with <sos> tokens
+        tgt_seq = torch.full((batch_size, 1), tokenizer.bos_token_id, dtype=torch.long, device=src.device)
+        entropies = []
+        variances = []
+        for _ in range(max_length):
+            tgt_emb = self.embedding(tgt_seq) * math.sqrt(self.d_model)
+            tgt_emb = tgt_emb.transpose(0, 1)
+            tgt_emb = self.rotary_positional_encoding(tgt_emb)
+            tgt_emb = tgt_emb.transpose(0, 1)
+            tgt_dec = tgt_emb
+            for layer in self.decoder_layers:
+                tgt_dec = layer(tgt_dec, None, src_enc, None)
+            output = self.output_layer(tgt_dec)  # (batch_size, seq_len, vocab_size)
+            logits = output[:, -1, :]  # Get logits for the last time step
+            # Compute token probabilities
+            probs = F.softmax(logits / temperature, dim=-1)  # (batch_size, vocab_size)
+            # Compute entropy
+            entropy = -torch.sum(probs * torch.log(probs + 1e-9), dim=-1)  # (batch_size)
+            entropies.append(entropy)
+            # Sample token using Gumbel-Softmax
+            gumbel_noise = -torch.log(-torch.log(torch.rand_like(probs) + 1e-9) + 1e-9)
+            y = (logits + gumbel_noise) / temperature
+            y = F.softmax(y, dim=-1)  # (batch_size, vocab_size)
+            # Compute variance
+            variance = torch.var(y, dim=-1)  # (batch_size)
+            variances.append(variance)
+            # Get token indices (argmax for hard selection)
+            next_tokens = torch.argmax(y, dim=-1, keepdim=True)  # (batch_size, 1)
+            tgt_seq = torch.cat([tgt_seq, next_tokens], dim=1)
+        # Stack entropies and variances
+        entropies = torch.stack(entropies, dim=1)  # (batch_size, max_length)
+        variances = torch.stack(variances, dim=1)  # (batch_size, max_length)
+        return tgt_seq[:, 1:], entropies, variances  # Exclude the initial <sos> token
+# Objective Functions
+class InfoNCE_Loss(nn.Module):
+    def __init__(self, temperature=0.07):
+        super(InfoNCE_Loss, self).__init__()
+        self.temperature = temperature
+        self.cross_entropy = nn.CrossEntropyLoss()
+    def forward(self, z_i, z_j):
+        """
+        Args:
+            z_i (torch.Tensor): Flattened representations from view i, shape (2n, embed_dim)
+            z_j (torch.Tensor): Flattened representations from view j, shape (2n, embed_dim)
+        Returns:
+            torch.Tensor: InfoNCE loss
+        """
+        n = z_i.size(0)
+        z = torch.cat([z_i, z_j], dim=0)  # Shape: (2n, embed_dim)
+        z = F.normalize(z, dim=1)
+        similarity_matrix = torch.matmul(z, z.T)  # Shape: (2n, 2n)
+        # Create a mask to exclude self-similarity
+        mask = torch.eye(2 * n, device=z.device, dtype=torch.bool)
+        similarity_matrix = similarity_matrix.masked_fill(mask, -1e4)  # Use a manageable negative value
+        # Create labels for contrastive learning
+        labels = torch.arange(n, device=z.device)
+        labels = torch.cat([labels + n, labels], dim=0)  # Shape: (2n,)
+        # Apply temperature scaling
+        similarity_matrix /= self.temperature
+        # Compute cross-entropy loss
+        loss = self.cross_entropy(similarity_matrix, labels)
+        return loss
+class CovarianceRegularization(nn.Module):
+    def __init__(self, lambda_reg=1e-3):
+        super(CovarianceRegularization, self).__init__()
+        self.lambda_reg = lambda_reg
+    def forward(self, embeddings):
+        """
+        Args:
+            embeddings (torch.Tensor): Embedding tensor, shape (batch_size, embed_dim)
+        Returns:
+            torch.Tensor: Covariance regularization loss
+        """
+        batch_size, embed_dim = embeddings.size()
+        mean = embeddings.mean(dim=0)
+        embeddings_centered = embeddings - mean
+        cov = (embeddings_centered.T @ embeddings_centered) / (batch_size - 1)
+        cov_loss = torch.sum(cov ** 2) - torch.sum(torch.diag(cov) ** 2)
+        return self.lambda_reg * cov_loss
+class DynamicsPerformanceLoss(nn.Module):
+    def __init__(self, lambda_var=1e-3):
+        super(DynamicsPerformanceLoss, self).__init__()
+        self.lambda_var = lambda_var
+    def forward(self, true_next_state, predicted_next_state):
+        """
+        Args:
+            true_next_state (torch.Tensor): Ground truth next state, shape (batch_size, state_dim)
+            predicted_next_state (torch.Tensor): Predicted next state, shape (batch_size, state_dim)
+        Returns:
+            torch.Tensor: Dynamics performance loss
+        """
+        mse_loss = F.mse_loss(predicted_next_state, true_next_state)
+        variance_loss = torch.var(predicted_next_state, dim=0).mean()
+        return mse_loss + self.lambda_var * variance_loss
+class ThoughtConsistencyLoss(nn.Module):
+    def __init__(self):
+        super(ThoughtConsistencyLoss, self).__init__()
+    def forward(self, true_next_state, perturbed_next_state):
+        """
+        Args:
+            true_next_state (torch.Tensor): Ground truth next state, shape (batch_size, state_dim)
+            perturbed_next_state (torch.Tensor): Perturbed next state, shape (batch_size, state_dim)
+        Returns:
+            torch.Tensor: Thought-consistency loss
+        """
+        return F.mse_loss(true_next_state, perturbed_next_state)
+class PolicyValueJointLoss(nn.Module):
+    def __init__(self, lambda_value=0.5):
+        super(PolicyValueJointLoss, self).__init__()
+        self.lambda_value = lambda_value
+        self.cross_entropy = nn.CrossEntropyLoss()
+        self.mse_loss = nn.MSELoss()
+    def forward(self, policy_logits, true_policy, value_pred, true_value):
+        """
+        Args:
+            policy_logits (torch.Tensor): Logits from the policy network, shape (batch_size * seq_len, num_actions)
+            true_policy (torch.Tensor): Ground truth policy, shape (batch_size * seq_len, num_actions)
+            value_pred (torch.Tensor): Predicted values, shape (batch_size * seq_len)
+            true_value (torch.Tensor): Ground truth values, shape (batch_size * seq_len)
+        Returns:
+            torch.Tensor: Combined policy and value loss
+        """
+        policy_logits = policy_logits.view(-1, policy_logits.size(-1))
+        true_policy = true_policy.view(-1, true_policy.size(-1))
+        value_pred = value_pred.view(-1)
+        true_value = true_value.view(-1)
+        policy_loss = self.cross_entropy(policy_logits, true_policy.argmax(dim=1))
+        value_loss = self.mse_loss(value_pred, true_value)
+        return policy_loss + self.lambda_value * value_loss
+class ActionDiversityReward(nn.Module):
+    def __init__(self, lambda_div=1e-3):
+        super(ActionDiversityReward, self).__init__()
+        self.lambda_div = lambda_div
+    def forward(self, action_embeddings):
+        """
+        Args:
+            action_embeddings (torch.Tensor): Embeddings of actions, shape (batch_size, embed_dim)
+        Returns:
+            torch.Tensor: Action diversity loss
+        """
+        similarity_matrix = F.cosine_similarity(action_embeddings.unsqueeze(1), action_embeddings.unsqueeze(0), dim=2)
+        # Zero out self-similarity
+        similarity_matrix = similarity_matrix - torch.eye(similarity_matrix.size(0)).to(action_embeddings.device)
+        diversity_loss = torch.sum(similarity_matrix ** 2)
+        return self.lambda_div * diversity_loss
+class ExpectedThoughtValueLoss(nn.Module):
+    def __init__(self):
+        super(ExpectedThoughtValueLoss, self).__init__()
+    def forward(self, mcts_best_values):
+        """
+        Args:
+            mcts_best_values (torch.Tensor): Best values from MCTS, shape (batch_size)
+        Returns:
+            torch.Tensor: ETV loss
+        """
+        return -mcts_best_values.mean()
+class ExplorationRegularization(nn.Module):
+    def __init__(self, lambda_expl=1e-3):
+        super(ExplorationRegularization, self).__init__()
+        self.lambda_expl = lambda_expl
+    def forward(self, visit_counts):
+        """
+        Args:
+            visit_counts (torch.Tensor): Visit counts for actions, shape (batch_size, num_actions)
+        Returns:
+            torch.Tensor: Exploration regularization loss
+        """
+        reward = torch.sum(1.0 / (visit_counts + 1), dim=-1)
+        return self.lambda_expl * reward.mean()
+class KL_DivergenceLoss(nn.Module):
+    def __init__(self):
+        super(KL_DivergenceLoss, self).__init__()
+    def forward(self, old_policy, new_policy):
+        """
+        Args:
+            old_policy (torch.Tensor): Old policy probabilities, shape (batch_size, num_actions)
+            new_policy (torch.Tensor): New policy probabilities, shape (batch_size, num_actions)
+        Returns:
+            torch.Tensor: KL divergence loss
+        """
+        kl_div = F.kl_div(new_policy.log(), old_policy, reduction='batchmean')
+        return kl_div
+# MuZero Components
+class ActionEncoder(nn.Module):
+    def __init__(self, action_vocab_size, embed_dim):
+        super(ActionEncoder, self).__init__()
+        self.embedding = nn.Embedding(action_vocab_size, embed_dim)
+    def forward(self, action_indices):
+        """
+        Args:
+            action_indices (torch.Tensor): Tensor of shape (batch_size, seq_len)
+        Returns:
+            torch.Tensor: Encoded actions of shape (batch_size, seq_len, embed_dim)
+        """
+        return self.embedding(action_indices)
+class RepresentationNetwork(nn.Module):
+    def __init__(self, vocab_dim, d_model, state_dim):
+        super(RepresentationNetwork, self).__init__()
+        self.proj = nn.Linear(vocab_dim, d_model)  # Project from vocab_dim to d_model
+        self.linear = nn.Linear(d_model, state_dim)  # Project from d_model to state_dim
+        self.norm = nn.LayerNorm(state_dim)
+    def forward(self, transformer_output):
+        """
+        Args:
+            transformer_output (torch.Tensor): Shape (batch_size, seq_len, vocab_dim)
+        Returns:
+            torch.Tensor: Encoded state of shape (batch_size, seq_len, state_dim)
+        """
+        # First project down from vocab_dim to d_model
+        projected_output = self.proj(transformer_output)
+        # Then project down from d_model to state_dim
+        state = self.linear(projected_output)
+        state = self.norm(state)
+        return state
+class DynamicsNetwork(nn.Module):
+    def __init__(self, state_dim, action_dim, hidden_dim):
+        super(DynamicsNetwork, self).__init__()
+        self.rms_norm = nn.LayerNorm(state_dim)
+        self.fc1 = nn.Linear(state_dim + action_dim, hidden_dim)
+        self.activation = nn.GELU()
+        self.fc2 = nn.Linear(hidden_dim, state_dim)
+    def forward(self, state, action):
+        """
+        Args:
+            state (torch.Tensor): Current state, shape (batch_size, seq_len, state_dim)
+            action (torch.Tensor): Action embedding, shape (batch_size, seq_len, action_dim)
+        Returns:
+            torch.Tensor: Predicted next state, shape (batch_size, seq_len, state_dim)
+        """
+        norm_state = self.rms_norm(state)
+        combined = torch.cat([norm_state, action], dim=-1)
+        hidden = self.activation(self.fc1(combined))
+        next_state = self.fc2(hidden)
+        return next_state
+class PredictionNetwork(nn.Module):
+    def __init__(self, state_dim, action_vocab_size, value_dim):
+        super(PredictionNetwork, self).__init__()
+        self.state_dim = state_dim
+        self.rms_norm = nn.LayerNorm(state_dim)
+        self.policy_head = nn.Linear(state_dim, action_vocab_size)  # Output size is action_vocab_size
+        self.value_head = nn.Linear(state_dim, value_dim)
+    def forward(self, state):
+        """
+        Args:
+            state (torch.Tensor): State representation, shape (batch_size, seq_len, state_dim)
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: Policy logits and value estimates
+        """
+        norm_state = self.rms_norm(state)
+        policy_logits = self.policy_head(norm_state)  # Shape: (batch_size, seq_len, action_vocab_size)
+        value_estimates = self.value_head(norm_state).squeeze(-1)  # Shape: (batch_size, seq_len)
+        return policy_logits, value_estimates
+# Tree of Thought Components
+class ThoughtNode:
+    def __init__(self, name):
+        self.name = name
+        self.children = []
+        self.parent = None
+    def add_child(self, child_node):
+        child_node.parent = self
+        self.children.append(child_node)
+# Function to build the Tree of Thought from your detailed structure
+def build_tree_of_thought():
+    # Create the root node
+    root = ThoughtNode('Problem-Solving Process')
+    # Level 1 nodes
+    problem_identification = ThoughtNode('Problem Identification')
+    problem_analysis = ThoughtNode('Problem Analysis')
+    solution_generation = ThoughtNode('Solution Generation')
+    implementation = ThoughtNode('Implementation')
+    evaluation_adjustment = ThoughtNode('Evaluation and Adjustment')
+    root.add_child(problem_identification)
+    root.add_child(problem_analysis)
+    root.add_child(solution_generation)
+    root.add_child(implementation)
+    root.add_child(evaluation_adjustment)
+    # Problem Identification children
+    B1 = ThoughtNode('Define the Problem')
+    B2 = ThoughtNode('Identify Stakeholders')
+    B3 = ThoughtNode('Determine Constraints')
+    B4 = ThoughtNode('Recognize Problem Type')
+    B5 = ThoughtNode('Historical Context')
+    problem_identification.add_child(B1)
+    problem_identification.add_child(B2)
+    problem_identification.add_child(B3)
+    problem_identification.add_child(B4)
+    problem_identification.add_child(B5)
+    # Define the Problem children
+    B1a = ThoughtNode('Problem Statement Formulation')
+    B1b = ThoughtNode('Scope Definition')
+    B1c = ThoughtNode('Objective Setting')
+    B1.add_child(B1a)
+    B1.add_child(B1b)
+    B1.add_child(B1c)
+    # Identify Stakeholders children
+    B2a = ThoughtNode('Stakeholder Mapping')
+    B2b = ThoughtNode('Interest and Influence Analysis')
+    B2c = ThoughtNode('Engagement Strategy')
+    B2.add_child(B2a)
+    B2.add_child(B2b)
+    B2.add_child(B2c)
+    # Determine Constraints children
+    B3a = ThoughtNode('Resource Limitations')
+    B3b = ThoughtNode('Time Constraints')
+    B3c = ThoughtNode('Legal and Regulatory Constraints')
+    B3.add_child(B3a)
+    B3.add_child(B3b)
+    B3.add_child(B3c)
+    # Recognize Problem Type children
+    B4a = ThoughtNode('Simple vs Complex')
+    B4b = ThoughtNode('Known vs Unknown')
+    B4c = ThoughtNode('Tame vs Wicked Problems')
+    B4.add_child(B4a)
+    B4.add_child(B4b)
+    B4.add_child(B4c)
+    # Historical Context children
+    B5a = ThoughtNode('Previous Attempts')
+    B5b = ThoughtNode('Lessons Learned')
+    B5c = ThoughtNode('Environmental Factors')
+    B5.add_child(B5a)
+    B5.add_child(B5b)
+    B5.add_child(B5c)
+    # Problem Analysis children
+    C1 = ThoughtNode('Root Cause Analysis')
+    C2 = ThoughtNode('System Mapping')
+    C3 = ThoughtNode('Data Collection')
+    C4 = ThoughtNode('Impact Assessment')
+    C5 = ThoughtNode('Theoretical Framework')
+    problem_analysis.add_child(C1)
+    problem_analysis.add_child(C2)
+    problem_analysis.add_child(C3)
+    problem_analysis.add_child(C4)
+    problem_analysis.add_child(C5)
+    # Root Cause Analysis children
+    C1a = ThoughtNode('5 Whys Technique')
+    C1b = ThoughtNode('Fishbone Diagram')
+    C1c = ThoughtNode('Pareto Analysis')
+    C1.add_child(C1a)
+    C1.add_child(C1b)
+    C1.add_child(C1c)
+    # System Mapping children
+    C2a = ThoughtNode('Causal Loop Diagrams')
+    C2b = ThoughtNode('Stock and Flow Models')
+    C2c = ThoughtNode('Network Analysis')
+    C2.add_child(C2a)
+    C2.add_child(C2b)
+    C2.add_child(C2c)
+    # Data Collection children
+    C3a = ThoughtNode('Quantitative Data')
+    C3b = ThoughtNode('Qualitative Data')
+    C3c = ThoughtNode('Data Validation')
+    C3.add_child(C3a)
+    C3.add_child(C3b)
+    C3.add_child(C3c)
+    # Quantitative Data children
+    C3a1 = ThoughtNode('Surveys and Questionnaires')
+    C3a2 = ThoughtNode('Experimental Data')
+    C3a3 = ThoughtNode('Big Data Analytics')
+    C3a.add_child(C3a1)
+    C3a.add_child(C3a2)
+    C3a.add_child(C3a3)
+    # Qualitative Data children
+    C3b1 = ThoughtNode('Interviews')
+    C3b2 = ThoughtNode('Focus Groups')
+    C3b3 = ThoughtNode('Observational Studies')
+    C3b.add_child(C3b1)
+    C3b.add_child(C3b2)
+    C3b.add_child(C3b3)
+    # Data Validation children
+    C3c1 = ThoughtNode('Statistical Validation')
+    C3c2 = ThoughtNode('Cross-Validation')
+    C3c3 = ThoughtNode('Expert Review')
+    C3c.add_child(C3c1)
+    C3c.add_child(C3c2)
+    C3c.add_child(C3c3)
+    # Impact Assessment children
+    C4a = ThoughtNode('Environmental Impact')
+    C4b = ThoughtNode('Social Impact')
+    C4c = ThoughtNode('Economic Impact')
+    C4.add_child(C4a)
+    C4.add_child(C4b)
+    C4.add_child(C4c)
+    # Theoretical Framework children
+    C5a = ThoughtNode('Literature Review')
+    C5b = ThoughtNode('Conceptual Modeling')
+    C5c = ThoughtNode('Hypothesis Formation')
+    C5.add_child(C5a)
+    C5.add_child(C5b)
+    C5.add_child(C5c)
+    # Solution Generation children
+    D1 = ThoughtNode('Creative Problem Solving')
+    D2 = ThoughtNode('Analytical Approach')
+    D3 = ThoughtNode('Mathematical Computation')
+    D4 = ThoughtNode('Decision Making')
+    solution_generation.add_child(D1)
+    solution_generation.add_child(D2)
+    solution_generation.add_child(D3)
+    solution_generation.add_child(D4)
+    # Action Planning, Resource Allocation, Change Management children (implementation phase)
+    E1 = ThoughtNode('Action Planning')
+    E2 = ThoughtNode('Resource Allocation')
+    E3 = ThoughtNode('Change Management')
+    implementation.add_child(E1)
+    implementation.add_child(E2)
+    implementation.add_child(E3)
+    # Verification, Performance Metrics, Feedback Loops, Continuous Improvement children (evaluation phase)
+    F1 = ThoughtNode('Verification')
+    F2 = ThoughtNode('Performance Metrics')
+    F3 = ThoughtNode('Feedback Loops')
+    F4 = ThoughtNode('Continuous Improvement')
+    evaluation_adjustment.add_child(F1)
+    evaluation_adjustment.add_child(F2)
+    evaluation_adjustment.add_child(F3)
+    evaluation_adjustment.add_child(F4)
+    # Cross-Cutting Considerations children
+    G = ThoughtNode('Cross-Cutting Considerations')
+    root.add_child(G)
+    # Cross-Cutting Considerations children
+    G1 = ThoughtNode('Ethical Framework')
+    G2 = ThoughtNode('Stakeholder Management')
+    G3 = ThoughtNode('Interdisciplinary Connections')
+    G4 = ThoughtNode('Technological Integration')
+    G5 = ThoughtNode('Emotional Intelligence')
+    G6 = ThoughtNode('Collaborative Problem Solving')
+    G7 = ThoughtNode('Computational Considerations')  # Assuming H was intended as G7
+    G8 = ThoughtNode('Order of Operations')  # Assuming I was intended as G8
+    G9 = ThoughtNode('Critical Thinking')  # Assuming J was intended as G9
+    G10 = ThoughtNode('Future Perspective')  # Assuming K was intended as G10
+    G11 = ThoughtNode('Learning and Adaptation')  # Assuming L was intended as G11
+    G.add_child(G1)
+    G.add_child(G2)
+    G.add_child(G3)
+    G.add_child(G4)
+    G.add_child(G5)
+    G.add_child(G6)
+    G.add_child(G7)
+    G.add_child(G8)
+    G.add_child(G9)
+    G.add_child(G10)
+    G.add_child(G11)
+    # Ethical Framework children
+    G1a = ThoughtNode('Value-based Decision Making')
+    G1b = ThoughtNode('Long-term Consequences')
+    G1.add_child(G1a)
+    G1.add_child(G1b)
+    # Value-based Decision Making children
+    G1a1 = ThoughtNode('Ethical Theories Application')
+    G1a2 = ThoughtNode('Moral Dilemma Resolution')
+    G1a.add_child(G1a1)
+    G1a.add_child(G1a2)
+    # Long-term Consequences children
+    G1b1 = ThoughtNode('Sustainability Assessment')
+    G1b2 = ThoughtNode('Intergenerational Impact')
+    G1b.add_child(G1b1)
+    G1b.add_child(G1b2)
+    # Stakeholder Management children
+    G2a = ThoughtNode('Direct Stakeholders')
+    G2b = ThoughtNode('Indirect Stakeholders')
+    G2c = ThoughtNode('Conflicting Interests')
+    G2.add_child(G2a)
+    G2.add_child(G2b)
+    G2.add_child(G2c)
+    # Conflicting Interests children
+    G2c1 = ThoughtNode('Negotiation Strategies')
+    G2c2 = ThoughtNode('Conflict Resolution Techniques')
+    G2c.add_child(G2c1)
+    G2c.add_child(G2c2)
+    # Interdisciplinary Connections children
+    G3a = ThoughtNode('Related Fields')
+    G3b = ThoughtNode('Cross-disciplinary Impact')
+    G3.add_child(G3a)
+    G3.add_child(G3b)
+    # Related Fields children
+    G3a1 = ThoughtNode('Cross-domain Knowledge Transfer')
+    G3a2 = ThoughtNode('Interdisciplinary Collaboration')
+    G3a.add_child(G3a1)
+    G3a.add_child(G3a2)
+    # Cross-disciplinary Impact children
+    G3b1 = ThoughtNode('Synergy Identification')
+    G3b2 = ThoughtNode('Holistic Impact Assessment')
+    G3b.add_child(G3b1)
+    G3b.add_child(G3b2)
+    # Technological Integration children
+    G4a = ThoughtNode('AI-assisted Problem Solving')
+    G4b = ThoughtNode('Data-driven Insights')
+    G4c = ThoughtNode('Digital Collaboration Tools')
+    G4.add_child(G4a)
+    G4.add_child(G4b)
+    G4.add_child(G4c)
+    # AI-assisted Problem Solving children
+    G4a1 = ThoughtNode('Machine Learning Models')
+    G4a2 = ThoughtNode('Natural Language Processing')
+    G4a.add_child(G4a1)
+    G4a.add_child(G4a2)
+    # Data-driven Insights children
+    G4b1 = ThoughtNode('Big Data Analytics')
+    G4b2 = ThoughtNode('Predictive Modeling')
+    G4b.add_child(G4b1)
+    G4b.add_child(G4b2)
+    # Digital Collaboration Tools children
+    G4c1 = ThoughtNode('Project Management Platforms')
+    G4c2 = ThoughtNode('Virtual Reality Collaboration')
+    G4c.add_child(G4c1)
+    G4c.add_child(G4c2)
+    # Emotional Intelligence children
+    G5a = ThoughtNode('Self-Awareness')
+    G5b = ThoughtNode('Empathy')
+    G5c = ThoughtNode('Stress Management')
+    G5.add_child(G5a)
+    G5.add_child(G5b)
+    G5.add_child(G5c)
+    # Self-Awareness children
+    G5a1 = ThoughtNode('Emotional Recognition')
+    G5a2 = ThoughtNode('Personal Bias Identification')
+    G5a.add_child(G5a1)
+    G5a.add_child(G5a2)
+    # Empathy children
+    G5b1 = ThoughtNode('Perspective Taking')
+    G5b2 = ThoughtNode('Active Listening')
+    G5b.add_child(G5b1)
+    G5b.add_child(G5b2)
+    # Stress Management children
+    G5c1 = ThoughtNode('Mindfulness Techniques')
+    G5c2 = ThoughtNode('Resilience Building')
+    G5c.add_child(G5c1)
+    G5c.add_child(G5c2)
+    # Collaborative Problem Solving children
+    G6a = ThoughtNode('Team Dynamics')
+    G6b = ThoughtNode('Communication Strategies')
+    G6c = ThoughtNode('Conflict Resolution')
+    G6.add_child(G6a)
+    G6.add_child(G6b)
+    G6.add_child(G6c)
+    # Team Dynamics children
+    G6a1 = ThoughtNode('Team Formation Strategies')
+    G6a2 = ThoughtNode('Role Assignment')
+    G6a.add_child(G6a1)
+    G6a.add_child(G6a2)
+    # Communication Strategies children
+    G6b1 = ThoughtNode('Clear Messaging')
+    G6b2 = ThoughtNode('Feedback Mechanisms')
+    G6b.add_child(G6b1)
+    G6b.add_child(G6b2)
+    # Conflict Resolution children
+    G6c1 = ThoughtNode('Mediation Techniques')
+    G6c2 = ThoughtNode('Consensus Building')
+    G6c.add_child(G6c1)
+    G6c.add_child(G6c2)
+    # Computational Considerations children
+    G7a = ThoughtNode('CPU Operations')
+    G7b = ThoughtNode('GPU Parallelization')
+    G7c = ThoughtNode('Floating-Point Precision')
+    G7.add_child(G7a)
+    G7.add_child(G7b)
+    G7.add_child(G7c)
+    # CPU Operations children
+    G7a1 = ThoughtNode('Instruction Set Architecture')
+    G7a2 = ThoughtNode('Pipelining and Parallelism')
+    G7a.add_child(G7a1)
+    G7a.add_child(G7a2)
+    # GPU Parallelization children
+    G7b1 = ThoughtNode('CUDA Programming')
+    G7b2 = ThoughtNode('OpenCL Framework')
+    G7b.add_child(G7b1)
+    G7b.add_child(G7b2)
+    # Floating-Point Precision children
+    G7c1 = ThoughtNode('IEEE 754 Standard')
+    G7c2 = ThoughtNode('Error Propagation Analysis')
+    G7c.add_child(G7c1)
+    G7c.add_child(G7c2)
+    # Order of Operations children
+    G8a = ThoughtNode('Parentheses')
+    G8b = ThoughtNode('Exponents')
+    G8c = ThoughtNode('Multiplication and Division')
+    G8d = ThoughtNode('Addition and Subtraction')
+    G8.add_child(G8a)
+    G8.add_child(G8b)
+    G8.add_child(G8c)
+    G8.add_child(G8d)
+    # Critical Thinking children
+    G9a = ThoughtNode('Assumptions Questioning')
+    G9b = ThoughtNode('Bias Recognition')
+    G9.add_child(G9a)
+    G9.add_child(G9b)
+    # Assumptions Questioning children
+    G9a1 = ThoughtNode('Socratic Questioning')
+    G9a2 = ThoughtNode('Devil\'s Advocate Approach')
+    G9a.add_child(G9a1)
+    G9a.add_child(G9a2)
+    # Bias Recognition children
+    G9b1 = ThoughtNode('Cognitive Bias Identification')
+    G9b2 = ThoughtNode('Debiasing Techniques')
+    G9b.add_child(G9b1)
+    G9b.add_child(G9b2)
+    # Future Perspective children
+    G10a = ThoughtNode('Short-term Projections')
+    G10b = ThoughtNode('Long-term Scenarios')
+    G10c = ThoughtNode('Potential Impacts')
+    G10.add_child(G10a)
+    G10.add_child(G10b)
+    G10.add_child(G10c)
+    # Short-term Projections children
+    G10a1 = ThoughtNode('Trend Analysis')
+    G10a2 = ThoughtNode('Scenario Planning')
+    G10a.add_child(G10a1)
+    G10a.add_child(G10a2)
+    # Long-term Scenarios children
+    G10b1 = ThoughtNode('Futures Wheel')
+    G10b2 = ThoughtNode('Backcasting')
+    G10b.add_child(G10b1)
+    G10b.add_child(G10b2)
+    # Potential Impacts children
+    G10c1 = ThoughtNode('Risk Assessment')
+    G10c2 = ThoughtNode('Opportunity Identification')
+    G10c.add_child(G10c1)
+    G10c.add_child(G10c2)
+    # Learning and Adaptation children
+    G11a = ThoughtNode('Reflective Practice')
+    G11b = ThoughtNode('Knowledge Transfer')
+    G11c = ThoughtNode('Adaptive Problem Solving')
+    G11.add_child(G11a)
+    G11.add_child(G11b)
+    G11.add_child(G11c)
+    # Reflective Practice children
+    G11a1 = ThoughtNode('After Action Review')
+    G11a2 = ThoughtNode('Learning Journals')
+    G11a.add_child(G11a1)
+    G11a.add_child(G11a2)
+    # Knowledge Transfer children
+    G11b1 = ThoughtNode('Best Practice Documentation')
+    G11b2 = ThoughtNode('Mentoring Programs')
+    G11b.add_child(G11b1)
+    G11b.add_child(G11b2)
+    # Adaptive Problem Solving children
+    G11c1 = ThoughtNode('Iterative Approaches')
+    G11c2 = ThoughtNode('Flexibility in Methodology')
+    G11c.add_child(G11c1)
+    G11c.add_child(G11c2)
+    return root
+def traverse_tree(node, action_list):
+    if node.name not in action_list:
+        action_list.append(node.name)
+    for child in node.children:
+        traverse_tree(child, action_list)
+class MCTSNode:
+    __slots__ = [
+        'state',
+        'parent',
+        'action',
+        'children',
+        'visit_count',
+        'value_sum',
+        'prior',
+        'cached_policy',
+        'cached_value',
+        'thought_node'  # Added to keep track of the current thought node
+    ]
+    def __init__(self, state, thought_node, parent=None, action=None):
+        self.state = state
+        self.thought_node = thought_node  # Reference to the ThoughtNode
+        self.parent = parent
+        self.action = action
+        self.children = {}
+        self.visit_count = 0
+        self.value_sum = 0.0
+        self.prior = 0.0
+        self.cached_policy = None
+        self.cached_value = None
+    def expand(self, priors):
+        """
+        Expand the node by adding all valid child nodes from the thought tree.
+        Args:
+            priors (dict): A dictionary mapping action names to prior probabilities.
+        """
+        for child_thought_node in self.thought_node.children:
+            action = child_thought_node.name  # Action name
+            if action not in self.children:
+                # Assume batch size of 1 for individual nodes
+                child_state = self.state.apply_action(action)
+                child_node = MCTSNode(
+                    state=child_state,
+                    thought_node=child_thought_node,
+                    parent=self,
+                    action=action
+                )
+                child_node.prior = priors.get(action, 1.0 / len(self.thought_node.children))  # Default prior if not provided
+                self.children[action] = child_node
+    def is_leaf(self):
+        return len(self.children) == 0
+    def ucb_score(self, total_visits, exploration_constant=math.sqrt(2)):
+        if self.visit_count == 0:
+            return float('inf')
+        avg_value = self.value_sum / self.visit_count
+        exploration_term = exploration_constant * self.prior * math.sqrt(total_visits) / (1 + self.visit_count)
+        return avg_value + exploration_term
+class MCTS:
+    def __init__(self, prediction_network, dynamics_network, action_encoder, num_iterations=10, exploration_constant=math.sqrt(2)):
+        self.prediction_network = prediction_network
+        self.dynamics_network = dynamics_network
+        self.action_encoder = action_encoder
+        self.num_iterations = num_iterations
+        self.exploration_constant = exploration_constant
+        self.cache = {}
+    def search(self, root_state):
+        """
+        Perform MCTS starting from the root state.
+        Args:
+            root_state (State): The root state from which to start the search.
+        Returns:
+            str: The best action to take from the root state.
+        """
+        root_node = MCTSNode(state=root_state, thought_node=root_state.thought_node)
+        for _ in range(self.num_iterations):
+            node = self.select(root_node)
+            value = self.evaluate(node)
+            self.backpropagate(node, value)
+        best_action = self.best_action(root_node)
+        return best_action
+    def select(self, node):
+        while not node.is_leaf():
+            total_visits = sum(child.visit_count for child in node.children.values())
+            _, node = max(
+                node.children.items(),
+                key=lambda item: item[1].ucb_score(total_visits, self.exploration_constant)
+            )
+        return node
+    def evaluate(self, node):
+        # Use the prediction network to get policy and value estimates
+        state_representation = node.state.representation  # Shape: (batch_size=1, seq_len, state_dim)
+        policy_logits, value_estimate = self.prediction_network(state_representation)
+        value_estimate = value_estimate.item()  # Convert tensor to scalar
+        # Convert policy logits to probabilities
+        policy_probs = F.softmax(policy_logits, dim=-1).squeeze(0)  # Shape: (seq_len, action_vocab_size)
+        # For simplicity, use the last time step's policy
+        policy_probs = policy_probs[-1]  # Shape: (action_vocab_size,)
+        # Map policy probabilities to the actions available from the current thought node
+        priors = {}
+        for child in node.thought_node.children:
+            action_name = child.name
+            action_idx = action_to_index.get(action_name, None)
+            if action_idx is not None and action_idx < policy_probs.size(0):
+                priors[action_name] = policy_probs[action_idx].item()
+            else:
+                priors[action_name] = 1.0 / len(node.thought_node.children)  # Uniform prior if not found
+        # Expand the node
+        node.expand(priors)
+        return value_estimate
+    def backpropagate(self, node, value):
+        while node is not None:
+            node.visit_count += 1
+            node.value_sum += value
+            node = node.parent
+    def best_action(self, root_node):
+        # Select the child with the highest visit count
+        best_child = max(root_node.children.values(), key=lambda n: n.visit_count)
+        return best_child.action
+class State:
+    def __init__(self, representation, dynamics_network, action_encoder, thought_node):
+        """
+        Args:
+            representation (torch.Tensor): Encoded state representation, shape (batch_size, seq_len, state_dim)
+            dynamics_network (nn.Module): The Dynamics Network to predict next states
+            action_encoder (nn.Module): The Action Encoder to encode actions
+            thought_node (ThoughtNode): The current node in the Tree of Thought
+        """
+        self.representation = representation  # Shape: (batch_size, seq_len, state_dim)
+        self.dynamics_network = dynamics_network
+        self.action_encoder = action_encoder
+        self.thought_node = thought_node  # Current position in the Tree of Thought
+    def apply_action(self, action):
+        """
+        Apply an action to the current state to get a new state.
+        Args:
+            action (str): The action to apply (the name of the ThoughtNode)
+        Returns:
+            State: The new state after applying the action
+        """
+        # Find the corresponding child node in the thought tree
+        next_thought_node = None
+        for child in self.thought_node.children:
+            if child.name == action:
+                next_thought_node = child
+                break
+        if next_thought_node is None:
+            raise ValueError(f"Action '{action}' is not valid from the current thought node.")
+        # Encode action
+        action_index = torch.tensor([[action_to_index[action]]], device=self.representation.device)
+        action_embedding = self.action_encoder(action_index)
+        # Predict the next state using the Dynamics Network
+        next_state_representation = self.dynamics_network(self.representation, action_embedding)
+        return State(
+            representation=next_state_representation,
+            dynamics_network=self.dynamics_network,
+            action_encoder=self.action_encoder,
+            thought_node=next_thought_node
+        )
+class PPOAgent:
+    def __init__(self, policy_network, optimizer, clip_epsilon=0.2, entropy_coef=0.01, value_coef=0.5):
+        self.policy_network = policy_network
+        self.optimizer = optimizer
+        self.clip_epsilon = clip_epsilon
+        self.entropy_coef = entropy_coef
+        self.value_coef = value_coef
+    def compute_loss(self, states, old_log_probs, actions, returns, advantages):
+        # Get policy logits and value estimates
+        policy_logits, value_estimates = self.policy_network(states)
+        batch_size, seq_len, num_actions = policy_logits.size()
+        # Flatten tensors using reshape
+        policy_logits = policy_logits.reshape(-1, num_actions)  # Shape: (batch_size * seq_len, num_actions)
+        value_estimates = value_estimates.view(-1)
+        actions = actions.reshape(-1)                           # Shape: (batch_size * seq_len)
+        old_log_probs = old_log_probs.reshape(-1)               # Shape: (batch_size * seq_len)
+        returns = returns.view(-1)
+        advantages = advantages.reshape(-1)                     # Shape: (batch_size * seq_len)
+        # Ensure value_estimates and returns are the same size
+        if value_estimates.size() != returns.size():
+            print(f"Shape mismatch: value_estimates shape: {value_estimates.size()}, returns shape: {returns.size()}")
+            value_estimates = value_estimates[:returns.size(0)]
+        # Compute new log probabilities
+        new_log_probs_all = F.log_softmax(policy_logits, dim=-1)  # Shape: (batch_size * seq_len, num_actions)
+        new_log_probs = new_log_probs_all.gather(1, actions.unsqueeze(-1)).squeeze(-1)  # Shape: (batch_size * seq_len)
+        # Compute ratios
+        ratios = torch.exp(new_log_probs - old_log_probs)
+        # PPO surrogate loss
+        surr1 = ratios * advantages
+        surr2 = torch.clamp(ratios, 1 - self.clip_epsilon, 1 + self.clip_epsilon) * advantages
+        policy_loss = -torch.min(surr1, surr2).mean()
+        # Value loss
+        value_loss = F.mse_loss(value_estimates, returns)
+        # Entropy loss
+        entropy = -(new_log_probs * torch.exp(new_log_probs)).mean()
+        # Total loss
+        total_loss = policy_loss + self.value_coef * value_loss - self.entropy_coef * entropy
+        return total_loss
+def infer(query, world_model_components, root_thought_node, tokenizer, max_length=20, inference_mode='world_model'):
+    """
+    Perform inference given a query, utilizing the Tree of Thought and MCTS.
+    Args:
+        query (str): The input query or prompt.
+        world_model_components (tuple): Tuple containing the model components.
+        root_thought_node (ThoughtNode): The root node of the Tree of Thought.
+        tokenizer (transformers.PreTrainedTokenizer): The tokenizer used.
+        max_length (int): Maximum length for the generated sequence.
+        inference_mode (str): Inference mode ('world_model', 'without_world_model', 'world_model_tree_of_thought')
+    Returns:
+        List[str] or str: The sequence of actions (thoughts) selected or generated text.
+    """
+    representation_network, dynamics_network, prediction_network, action_encoder, ppo_agent, model_transformer = world_model_components
+    # Tokenize and encode the query
+    input_ids = tokenizer.encode(query, return_tensors='pt').to(device)
+    attention_mask = (input_ids != tokenizer.pad_token_id).long()
+    if inference_mode == 'without_world_model':
+        # Directly use the transformer model to generate text
+        with torch.no_grad():
+            generated_ids, entropies, variances = model_transformer.generate(src=input_ids, tokenizer=tokenizer, max_length=max_length, temperature=args.temperature)
+        generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+        return generated_text
+    else:
+        # Use the world model components
+        with torch.no_grad():
+            transformer_output = model_transformer(input_ids, input_ids)
+            # Get the initial state representation
+            initial_representation = representation_network(transformer_output)  # Shape: (batch_size=1, seq_len, state_dim)
+            initial_state = State(
+                representation=initial_representation,
+                dynamics_network=dynamics_network,
+                action_encoder=action_encoder,
+                thought_node=root_thought_node
+            )
+            if inference_mode == 'world_model_tree_of_thought':
+                # Use MCTS with Tree of Thought
+                mcts = MCTS(prediction_network, dynamics_network, action_encoder, num_iterations=args.mcts_iterations, exploration_constant=args.mcts_exploration_constant)
+                current_state = initial_state
+                thought_sequence = []
+                for _ in range(max_length):
+                    best_action = mcts.search(current_state)
+                    thought_sequence.append(best_action)
+                    # Apply the best action to get the next state
+                    current_state = current_state.apply_action(best_action)
+                    # Check if we've reached a leaf node (no further actions)
+                    if len(current_state.thought_node.children) == 0:
+                        break
+                return thought_sequence
+            else:
+                # Use the world model without Tree of Thought
+                # For simplicity, we will generate actions based on the prediction network
+                policy_logits, _ = prediction_network(initial_state.representation)
+                policy_probs = F.softmax(policy_logits, dim=-1)
+                # Select actions with highest probabilities
+                top_actions = torch.argmax(policy_probs, dim=-1)
+                generated_actions = [index_to_action[idx.item()] for idx in top_actions[0]]
+                return generated_actions
+def train_epoch_world_model(world_model_components, train_loader, optimizer, scheduler, scaler, args, model_transformer, state_dim, embed_dim, input_dim):
+    representation_network, dynamics_network, prediction_network, action_encoder, ppo_agent, _ = world_model_components
+    representation_network.train()
+    dynamics_network.train()
+    prediction_network.train()
+    action_encoder.train()
+    ppo_agent.policy_network.train()
+    total_loss = 0.0
+    optimizer.zero_grad()
+    print(f"Starting World Model training epoch with {len(train_loader)}batches...")
+    for i, batch in enumerate(train_loader):
+        print(f"Processing batch {i+1}/{len(train_loader)}...")
+        # Move batches to the device
+        src_batch = batch['input_ids'].to(device)
+        tgt_batch = batch['labels'].to(device)
+        with torch.amp.autocast(device_type='cuda'):
+            print("Forward pass through Transformer (frozen)...")
+            with torch.no_grad():
+                transformer_output = model_transformer(src_batch, tgt_batch[:, :-1])
+            # World Model - Representation
+            state_representation = representation_network(transformer_output)  # On GPU
+            # For simplicity, let's assume true actions are provided (e.g., next tokens)
+            true_actions = tgt_batch[:, :-1]  # Shape: (batch_size, seq_len)
+            action_sequences = true_actions
+            # Get action embeddings
+            action_embeddings = action_encoder(action_sequences)  # Shape: (batch_size, seq_len, embed_dim)
+            # Apply dynamics network
+            predicted_next_state_batch = dynamics_network(state_representation, action_embeddings)  # Shape: (batch_size, seq_len, state_dim)
+            # Prediction Network - Policy logits and value
+            policy_logits, value_estimates = prediction_network(predicted_next_state_batch)
+    # value_estimates now has shape (batch_size, seq_len)
+            # Define true_policy and true_value as placeholders on the GPU
+            true_policy = F.one_hot(true_actions, num_classes=input_dim).float()  # Shape: (batch_size, seq_len, input_dim)
+            true_value = torch.zeros_like(value_estimates).to(device)
+            # Compute PPO loss
+            actions_selected = true_actions  # Shape: (batch_size, seq_len)
+            old_log_probs = torch.zeros_like(actions_selected, dtype=torch.float32).to(device)
+            returns = torch.zeros_like(actions_selected, dtype=torch.float32).to(device)
+            advantages = torch.zeros_like(actions_selected, dtype=torch.float32).to(device)
+            # Compute PPO loss using states
+            ppo_loss = ppo_agent.compute_loss(state_representation, old_log_probs, actions_selected, returns, advantages)
+            # Compute InfoNCE Loss
+            z_i = state_representation.view(-1, state_dim)  # Shape: (batch_size * seq_len, state_dim)
+            z_j = F.dropout(z_i, p=0.1, training=True)
+            info_nce = InfoNCE_Loss()(z_i, z_j)
+            # Compute other losses
+            covariance = CovarianceRegularization()(predicted_next_state_batch.view(-1, predicted_next_state_batch.size(-1)))
+            dynamics_loss = DynamicsPerformanceLoss()(state_representation, predicted_next_state_batch)
+            perturbed_next_state = predicted_next_state_batch + torch.randn_like(predicted_next_state_batch) * 0.01
+            thought_loss = ThoughtConsistencyLoss()(predicted_next_state_batch, perturbed_next_state)
+            pv_loss = PolicyValueJointLoss()(policy_logits, true_policy, value_estimates.squeeze(-1), true_value.squeeze(-1))
+            action_diversity = ActionDiversityReward()(action_embeddings.view(-1, embed_dim))
+            mcts_best_values = torch.zeros(actions_selected.size(0)).to(device)
+            etv = ExpectedThoughtValueLoss()(mcts_best_values)
+            visit_counts = torch.ones(actions_selected.size(0), policy_logits.size(-1)).to(device)
+            exploration = ExplorationRegularization()(visit_counts)
+            old_policy = F.softmax(policy_logits.detach(), dim=-1)
+            new_policy = F.softmax(policy_logits, dim=-1)
+            kl_loss = KL_DivergenceLoss()(old_policy, new_policy)
+            # Total Loss
+            loss = (
+                ppo_loss +
+                info_nce +
+                covariance +
+                dynamics_loss +
+                thought_loss +
+                pv_loss +
+                action_diversity +
+                etv +
+                exploration +
+                kl_loss
+            )
+            loss = loss / args.accumulation_steps
+        print("Backward pass...")
+        scaler.scale(loss).backward()
+        if (i + 1) % args.accumulation_steps == 0 or (i + 1) == len(train_loader):
+            print("Gradient clipping...")
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(
+                [param for group in optimizer.param_groups for param in group['params']],
+                args.max_grad_norm
+            )
+            print("Optimizer step...")
+            scaler.step(optimizer)
+            scaler.update()
+            print("Zeroing gradients...")
+            optimizer.zero_grad()
+            print("Updating learning rate...")
+            scheduler.step()
+        total_loss += loss.item() * args.accumulation_steps
+        print(f"Batch {i+1} completed. Current loss: {loss.item():.4f}")
+    avg_loss = total_loss / len(train_loader)
+    print(f"World Model training epoch completed. Average loss: {avg_loss:.4f}")
+    return avg_loss
+def train_epoch_language_model(model, train_loader, optimizer, scheduler, scaler, args):
+    model.train()
+    total_loss = 0.0
+    optimizer.zero_grad()
+    print(f"Starting Language Model training epoch with {len(train_loader)} batches...")
+    for i, batch in enumerate(train_loader):
+        input_ids = batch['input_ids'].to(device)
+        labels = batch['labels'].to(device)
+        with autocast():
+            outputs = model(input_ids, input_ids)
+            logits = outputs.view(-1, outputs.size(-1))
+            labels = labels.view(-1)
+            loss = F.cross_entropy(logits, labels, ignore_index=model.embedding.padding_idx)
+            loss = loss / args.accumulation_steps
+        scaler.scale(loss).backward()
+        if (i + 1) % args.accumulation_steps == 0 or (i + 1) == len(train_loader):
+            scaler.unscale_(optimizer)
+            torch.nn.utils.clip_grad_norm_(
+                [param for group in optimizer.param_groups for param in group['params']],
+                args.max_grad_norm
+            )
+            scaler.step(optimizer)
+            scaler.update()
+            optimizer.zero_grad()
+            scheduler.step()
+        total_loss += loss.item() * args.accumulation_steps
+        print(f"Batch {i + 1} completed. Current loss: {loss.item():.4f}")
+    avg_loss = total_loss / len(train_loader)
+    print(f"Language Model training epoch completed. Average loss: {avg_loss:.4f}")
+    return avg_loss
+def main():
+    args = parse_args()
+    print("Arguments parsed successfully.")
+    # Create save directory
+    os.makedirs(args.save_dir, exist_ok=True)
+    print(f"Save directory created: {args.save_dir}")
+    # Load tokenizer
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    print("Tokenizer loaded successfully.")
+    # Define padding_idx and input dimension based on tokenizer
+    padding_idx = tokenizer.pad_token_id
+    input_dim = len(tokenizer)
+    # Initialize the Transformer model on GPU
+    print("Initializing Transformer model...")
+    model_transformer = Transformer(
+        input_dim=input_dim,
+        d_model=128,
+        num_heads=4,
+        num_layers=4,
+        d_ff=256,
+        num_experts=2,
+        output_dim=input_dim,
+        dropout=0.1,
+        top_k=2
+    ).to(device)
+    model_transformer.train()
+    print("Transformer model initialized on device.")
+    # Define model parameters (adjusted for speed)
+    d_model = 128
+    state_dim = 128
+    action_dim = d_model
+    hidden_dim = 256
+    vocab_dim = input_dim
+    embed_dim = d_model
+    # Define World Model components
+    representation_network = RepresentationNetwork(vocab_dim, d_model, state_dim).to(device)
+    dynamics_network = DynamicsNetwork(state_dim, action_dim, hidden_dim).to(device)
+    prediction_network = PredictionNetwork(state_dim, input_dim, 1).to(device)
+    action_encoder = ActionEncoder(input_dim, action_dim).to(device)
+    # Initialize PPO Agent
+    ppo_agent = PPOAgent(
+        policy_network=prediction_network,
+        optimizer=optim.AdamW(prediction_network.parameters(), lr=args.learning_rate),
+        clip_epsilon=0.2,
+        entropy_coef=0.01,
+        value_coef=0.5
+    )
+    # Bundle World Model components
+    world_model_components = (representation_network, dynamics_network, prediction_network, action_encoder, ppo_agent, model_transformer)
+    if args.mode == 'train':
+        print("Loading and preprocessing data...")
+        train_loader, eval_loader = load_data(args, tokenizer)
+        print("Data loaded and preprocessed successfully.")
+        # Optimizer and Scheduler
+        optimizer = optim.AdamW(
+            list(representation_network.parameters()) +
+            list(dynamics_network.parameters()) +
+            list(prediction_network.parameters()) +
+            list(action_encoder.parameters()),
+            lr=args.learning_rate, weight_decay=args.weight_decay
+        ) if args.train_mode == 'world_model' else optim.AdamW(model_transformer.parameters(), lr=args.learning_rate)
+        scheduler = CosineAnnealingLR(optimizer, T_max=args.num_epochs)
+        scaler = GradScaler()
+        print(f"Starting {args.train_mode} training...")
+        for epoch in range(args.num_epochs):
+            if args.train_mode == 'world_model':
+                avg_loss = train_epoch_world_model(
+                    world_model_components,
+                    train_loader,
+                    optimizer,
+                    scheduler,
+                    scaler,
+                    args,
+                    model_transformer,
+                    state_dim,
+                    embed_dim,
+                    input_dim
+                )
+            else:
+                avg_loss = train_epoch_language_model(
+                    model_transformer,
+                    train_loader,
+                    optimizer,
+                    scheduler,
+                    scaler,
+                    args
+                )
+            print(f"{args.train_mode.capitalize()} training epoch {epoch + 1} completed. Average loss: {avg_loss:.4f}")
+            if args.train_mode == 'world_model':
+                save_all_models(model_transformer, representation_network, dynamics_network, prediction_network, action_encoder, args.save_dir, epoch + 1)
+                print(f"Models saved for epoch {epoch + 1}")
+            else:
+                torch.save(model_transformer.state_dict(), os.path.join(args.save_dir, f'language_model_epoch_{epoch + 1}.pt'))
+                print(f"Language model saved for epoch {epoch + 1}")
+        print("Training completed.")
+    elif args.mode == 'inference':
+        # Build Tree of Thought if needed
+        tree_root = build_tree_of_thought()
+        # Generate action list
+        action_list = []
+        traverse_tree(tree_root, action_list)
+        # Create mappings
+        global action_to_index, index_to_action
+        action_to_index = {action: idx for idx, action in enumerate(action_list)}
+        index_to_action = {idx: action for action, idx in action_to_index.items()}
+        action_vocab_size = len(action_list)
+        # Update action encoder and prediction network with new vocab size
+        action_encoder = ActionEncoder(action_vocab_size, action_dim).to(device)
+        prediction_network = PredictionNetwork(state_dim, action_vocab_size, 1).to(device)
+        # Load the saved models
+        # Assuming the models are saved after training
+        # You need to adjust the paths and epoch numbers as necessary
+        model_transformer.load_state_dict(torch.load(os.path.join(args.save_dir, 'transformer_model_epoch_2.pt')))
+        representation_network.load_state_dict(torch.load(os.path.join(args.save_dir, 'representation_network_epoch_2.pt')))
+        dynamics_network.load_state_dict(torch.load(os.path.join(args.save_dir, 'dynamics_network_epoch_2.pt')))
+        saved_state_dict = torch.load(os.path.join(args.save_dir, 'prediction_network_epoch_2.pt'))
+        prediction_network.policy_head = nn.Linear(prediction_network.state_dim, 50257)  # Update to match saved model size
+        prediction_network.load_state_dict(saved_state_dict, strict=False)
+        # Resize `policy_head` back to 158 after loading
+        prediction_network.policy_head = nn.Linear(prediction_network.state_dim, 158).to(device)
+        action_encoder.load_state_dict(torch.load(os.path.join(args.save_dir, 'action_encoder_epoch_2.pt')))
+        # Prepare the components
+        world_model_components = (representation_network, dynamics_network, prediction_network, action_encoder, ppo_agent, model_transformer)
+        # Perform inference
+        if not args.query:
+            args.query = input("Please enter your query: ")
+        result = infer(args.query, world_model_components, tree_root, tokenizer, inference_mode=args.inference_mode)
+        if args.inference_mode == 'without_world_model':
+            print("Generated Text:")
+            print(result)
+        else:
+            print("Generated Thought Sequence:")
+            for thought in result:
+                print(thought)
+if __name__ == '__main__':
+    main()

main_menu.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# main_menu.py
+import argparse
+import sys
+from train_agent import train_agent
+from test_agent import TestAgent, run_test_session
+from lightbulb import main as world_model_main
+def parse_main_args():
+    parser = argparse.ArgumentParser(description="Main Menu for Selecting Tasks")
+    parser.add_argument('--task', type=str, choices=['train_llm_world', 'train_agent', 'test_agent'],
+                        required=True, help='Choose task to execute: train_llm_world, train_agent, test_agent')
+    # Optional arguments for more granular control
+    parser.add_argument('--model_name', type=str, default='gpt2', help='Pretrained model name for LLM')
+    parser.add_argument('--dataset_name', type=str, default='wikitext', help='Dataset name for training')
+    parser.add_argument('--dataset_config', type=str, default='wikitext-2-raw-v1', help='Dataset configuration name')
+    parser.add_argument('--batch_size', type=int, default=4, help='Batch size for training')
+    parser.add_argument('--num_epochs', type=int, default=3, help='Number of epochs for training')
+    parser.add_argument('--max_length', type=int, default=128, help='Maximum sequence length for training')
+    parser.add_argument('--mode', type=str, choices=['train', 'inference'], default='train', help='Train or inference mode for LLM')
+    parser.add_argument('--query', type=str, default='', help='Query for the test_agent')
+    return parser.parse_args()
+def main():
+    # Parse arguments for the main function
+    args = parse_main_args()
+    # Execute tasks based on user input
+    if args.task == 'train_llm_world':
+        print("Starting LLM and World Model Training...")
+        # Directly call the world model main function
+        sys.argv = ['lightbulb.py', '--mode', args.mode, '--model_name', args.model_name,
+                    '--dataset_name', args.dataset_name, '--dataset_config', args.dataset_config,
+                    '--batch_size', str(args.batch_size), '--num_epochs', str(args.num_epochs),
+                    '--max_length', str(args.max_length)]
+        world_model_main()
+    elif args.task == 'train_agent':
+        print("Starting Agent Training...")
+        # Call the train_agent function from train_agent.py
+        from twisted.internet import reactor, task
+        d = task.deferLater(reactor, 0, train_agent)
+        d.addErrback(lambda failure: print(f"An error occurred: {failure}", exc_info=True))
+        d.addBoth(lambda _: reactor.stop())
+        reactor.run()
+    elif args.task == 'test_agent':
+        print("Starting Test Agent...")
+        test_agent = TestAgent()
+        if args.query:
+            # Directly process a single query
+            result = test_agent.process_query(args.query)
+            print("\nAgent's response:")
+            print(result)
+        else:
+            # Run the interactive session
+            reactor.callWhenRunning(run_test_session)
+            reactor.run()
+if __name__ == "__main__":
+    main()

mcts.py ADDED Viewed

	@@ -0,0 +1,225 @@

+# mcts.py
+import math
+import random
+from nltk.corpus import wordnet
+from scrapy.crawler import CrawlerRunner
+from scrapy.utils.log import configure_logging
+from scrapy.utils.project import get_project_settings
+from twisted.internet import reactor, defer
+from scrapy import signals
+import logging
+from my_search_engine.my_search_engine.spiders.search_spider import SearchSpider
+from sentence_transformers import SentenceTransformer, util
+from ranking import train_ranking_model
+import time
+logger = logging.getLogger(__name__)
+class MCTSNode:
+    def __init__(self, state, parent=None, action=None):
+        self.state = state
+        self.parent = parent
+        self.action = action
+        self.children = []
+        self.visits = 0
+        self.value = 0
+        self.ucb_score = float('inf')
+    def is_leaf(self):
+        return len(self.children) == 0
+    def add_child(self, child_state, action=None):
+        child_node = MCTSNode(child_state, parent=self, action=action)
+        self.children.append(child_node)
+        return child_node
+    def update(self, reward):
+        self.visits += 1
+        self.value += reward
+        if self.parent:  # Only calculate UCB if not root
+            self.ucb_score = self.calculate_ucb()
+    def calculate_ucb(self, exploration_weight=1.41):
+        if self.visits == 0 or not self.parent:
+            return float('inf')
+        exploitation = self.value / self.visits
+        exploration = exploration_weight * math.sqrt(math.log(self.parent.visits) / self.visits)
+        return exploitation + exploration
+class MCTS:
+    def __init__(self, initial_state, num_simulations=20, exploration_weight=1.41):
+        self.root = MCTSNode(initial_state)
+        self.num_simulations = num_simulations
+        self.exploration_weight = exploration_weight
+        self.query_model = SentenceTransformer('all-MiniLM-L6-v2')
+        self.results = []
+        self.crawler_runner = CrawlerRunner(get_project_settings())
+        self.initial_state = initial_state
+        self.num_iterations = 5
+    def select(self, node):
+        while not node.is_leaf():
+            if not node.children:
+                return node
+            node = max(node.children, key=lambda c: c.calculate_ucb(self.exploration_weight))
+        return node
+    def expand(self, node):
+        if node.visits == 0:
+            return node
+        possible_refinements = self.get_possible_refinements(node.state)
+        for refinement in possible_refinements:
+            node.add_child(refinement)
+        return random.choice(node.children) if node.children else node
+    def calculate_combined_reward(self, ranking_score, state):
+        state_length_reward = len(state) / 100
+        if state:
+            query_complexity = len(set(state.split())) / len(state.split())
+        else:
+            query_complexity = 0
+        semantic_similarity = self.calculate_semantic_similarity(state, self.root.state)
+        combined_reward = (
+            0.5 * ranking_score +
+            0.2 * state_length_reward +
+            0.2 * query_complexity +
+            0.1 * semantic_similarity
+        )
+        return combined_reward
+    def calculate_semantic_similarity(self, query1, query2):
+        embedding1 = self.query_model.encode(query1)
+        embedding2 = self.query_model.encode(query2)
+        return util.pytorch_cos_sim(embedding1, embedding2).item()
+    def backpropagate(self, node, reward):
+        while node is not None:
+            node.update(reward)
+            node = node.parent
+    def best_action(self):
+        if not self.root.children:
+            return self.root
+        def score(node):
+            if node.visits == 0:
+                return float('-inf')
+            return node.value / node.visits
+        return max(self.root.children, key=score)
+    def refine_query(self, query):
+        words = query.split()
+        refined_query = []
+        for word in words:
+            if word.lower() not in {"how", "to", "get", "an", "the", "and", "or", "of", "build"}:
+                synonyms = wordnet.synsets(word)
+                if synonyms:
+                    synonym_words = [lemma.name() for lemma in synonyms[0].lemmas()
+                                    if len(lemma.name().split()) == 1 and word != lemma.name()]
+                    if synonym_words:
+                        refined_query.append(random.choice(synonym_words))
+                    else:
+                        refined_query.append(word)
+                else:
+                    refined_query.append(word)
+            else:
+                refined_query.append(word)
+        possible_intent_keywords = ['guide', 'tutorial', 'LLM', 'language model', 'NLP', 'GPT']
+        refined_query.append(random.choice(possible_intent_keywords))
+        return ' '.join(refined_query)
+    def get_related_queries(self, query):
+        query_embedding = self.query_model.encode(query)
+        refined_query_variations = [query]
+        words_to_avoid = {'how', 'to', 'get'}
+        words = query.split()
+        for word in words:
+            if word.lower() not in words_to_avoid:
+                synonyms = wordnet.synsets(word)
+                if synonyms:
+                    synonym_words = [lemma.name() for lemma in synonyms[0].lemmas() if lemma.name() != word]
+                    if synonym_words:
+                        refined_query = query.replace(word, random.choice(synonym_words))
+                        refined_query_variations.append(refined_query)
+        refined_query_variations = list(set(refined_query_variations))
+        refined_query_embeddings = [self.query_model.encode(variation) for variation in refined_query_variations]
+        similarity_scores = util.pytorch_cos_sim(query_embedding, refined_query_embeddings).tolist()[0]
+        similarity_threshold = 0.8
+        filtered_queries = [variation for idx, variation in enumerate(refined_query_variations)
+                            if similarity_scores[idx] > similarity_threshold]
+        return filtered_queries[:2] if filtered_queries else [query]
+    def get_possible_refinements(self, query):
+        refined_queries = self.get_related_queries(query)
+        return refined_queries + [self.refine_query(query)]
+    @defer.inlineCallbacks
+    def web_search(self, query, search_sites=None):
+        if not query.strip():
+            logger.error("Cannot perform web search with an empty query.")
+            defer.returnValue([])
+        logger.info(f"Starting web search for query: {query}")
+        configure_logging(install_root_handler=False)
+        logging.basicConfig(level=logging.INFO)
+        results = []
+        def crawler_results(item, response, spider):
+            logger.info(f"Received result: {item['title']}")
+            results.append(item)
+        try:
+            crawler = self.crawler_runner.create_crawler(SearchSpider)
+            crawler.signals.connect(crawler_results, signal=signals.item_scraped)
+            # Start crawling, passing query and search_sites to the spider
+            yield self.crawler_runner.crawl(crawler, query=query, search_sites=search_sites)
+        except Exception as e:
+            logger.error(f"Error during web search: {str(e)}")
+            defer.returnValue([])
+        logger.info(f"Web search completed. Found {len(results)} results.")
+        defer.returnValue(results)
+    @defer.inlineCallbacks
+    def run(self):
+        logger.info(f"Starting MCTS run with {self.num_iterations} iterations")
+        for i in range(self.num_iterations):
+            logger.debug(f"Iteration {i+1}/{self.num_iterations}")
+            leaf = self.select(self.root)
+            child = self.expand(leaf)
+            reward = yield self.simulate(child)
+            self.backpropagate(child, reward)
+        best_child = self.best_action()
+        logger.info(f"MCTS run completed. Best action: {best_child.state}")
+        defer.returnValue(best_child.state if best_child != self.root else self.root.state)
+    @defer.inlineCallbacks
+    def simulate(self, node):
+        query_results = yield self.web_search(node.state)
+        ranked_results = train_ranking_model(node.state, query_results)
+        if ranked_results:
+            top_score = ranked_results[0]['predicted_score']
+        else:
+            top_score = 0
+        reward = self.calculate_combined_reward(top_score, node.state)
+        defer.returnValue(reward)

my_search_engine/my_search_engine/__init__.py ADDED Viewed

File without changes

my_search_engine/my_search_engine/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (165 Bytes). View file

my_search_engine/my_search_engine/__pycache__/items.cpython-312.pyc ADDED Viewed

Binary file (799 Bytes). View file

my_search_engine/my_search_engine/items.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import scrapy
+class MySearchEngineItem(scrapy.Item):
+    title = scrapy.Field()
+    link = scrapy.Field()
+    content = scrapy.Field()
+    score = scrapy.Field()  # Will be set later during ranking (MCTS or NLP)
+    meta = scrapy.Field()
+    predicted_score = scrapy.Field()
+    summary = scrapy.Field()

my_search_engine/my_search_engine/middlewares.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# middlewares.py
+import random
+import logging
+logger = logging.getLogger(__name__)
+class RotateUserAgentMiddleware:
+    """Middleware for rotating user agents to avoid detection."""
+    USER_AGENTS = [
+        # Chrome User Agents
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
+        " Chrome/93.0.4577.63 Safari/537.36",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)"
+        " Chrome/93.0.4577.63 Safari/537.36",
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)"
+        " Chrome/93.0.4577.63 Safari/537.36",
+        # Firefox User Agents
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:92.0) Gecko/20100101 Firefox/92.0",
+        "Mozilla/5.0 (X11; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0",
+        # Safari User Agents
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
+        # Edge User Agents
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36 Edg/93.0.961.38",
+        # Opera User Agents
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36 OPR/78.0.4093.184",
+        # Mobile User Agents
+        "Mozilla/5.0 (Linux; Android 11; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.62 Mobile Safari/537.36",
+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/93.0.4577.62 Mobile/15E148 Safari/604.1",
+        # Add more user agents from different browsers and devices
+    ]
+    def process_request(self, request, spider):
+        """Assign a random user agent to each request."""
+        user_agent = random.choice(self.USER_AGENTS)
+        request.headers['User-Agent'] = user_agent
+        logger.debug(f"Using User-Agent: {user_agent}")
+# Optional: Proxy Middleware
+class ProxyMiddleware:
+    """Middleware for rotating proxies."""
+    PROXIES = [
+        # Add proxy URLs if using proxies
+        # 'http://proxy1.example.com:8000',
+        # 'http://proxy2.example.com:8031',
+    ]
+    def process_request(self, request, spider):
+        if self.PROXIES:
+            proxy = random.choice(self.PROXIES)
+            request.meta['proxy'] = proxy
+            logger.debug(f"Using Proxy: {proxy}")

my_search_engine/my_search_engine/pipelines.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# pipelines.py
+import json
+class SaveToJSONPipeline:
+    """Pipeline that saves scraped data to a JSON file."""
+    def open_spider(self, spider):
+        """Open the file when the spider starts."""
+        self.file = open('scraped_results.json', 'w', encoding='utf-8')
+    def close_spider(self, spider):
+        """Close the file when the spider finishes."""
+        self.file.close()
+    def process_item(self, item, spider):
+        """Write each scraped item to the JSON file."""
+        line = json.dumps(dict(item), ensure_ascii=False) + "\n"
+        self.file.write(line)
+        return item
+class ContentCleanupPipeline:
+    """Pipeline to clean up content by removing unnecessary whitespace."""
+    def process_item(self, item, spider):
+        """Clean up content field."""
+        item['content'] = ' '.join(item['content'].split())  # Clean up content by removing extra spaces
+        return item
+class DisplayResultsPipeline:
+    """Pipeline that formats and prints the search results in a Google-like format."""
+    def open_spider(self, spider):
+        """Initialize an empty results list when the spider starts."""
+        self.results = []
+    def process_item(self, item, spider):
+        """Store the item in the results list."""
+        self.results.append({
+            'title': item['title'],
+            'summary': item['content'],
+            'link': item['link']
+        })
+        return item
+    def close_spider(self, spider):
+        """Print out the formatted results when the spider finishes."""
+        print("\nTop 10 Related Links for the Search Query:")
+        for i, result in enumerate(self.results[:10], start=1):
+            print(f"{i}. {result['title']}\n   {result['summary'][:200]}...\n   {result['link']}\n")

my_search_engine/my_search_engine/settings.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# settings.py
+# Scrapy configurations
+BOT_NAME = 'my_search_engine'
+SPIDER_MODULES = ['my_search_engine.spiders']
+NEWSPIDER_MODULE = 'my_search_engine.spiders'
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+# Configure maximum concurrent requests performed by Scrapy
+CONCURRENT_REQUESTS = 16
+CONCURRENT_REQUESTS_PER_DOMAIN = 1
+# Configure a delay for requests to the same website
+DOWNLOAD_DELAY = 2  # Fixed delay of 2 seconds
+# Disable cookies (enabled by default)
+COOKIES_ENABLED = False
+# Enable AutoThrottle
+AUTOTHROTTLE_ENABLED = True
+AUTOTHROTTLE_START_DELAY = 1  # Initial download delay
+AUTOTHROTTLE_MAX_DELAY = 10  # Maximum download delay in case of high latencies
+AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0  # Average number of requests Scrapy should be sending in parallel
+# User Agent (default, only used if RotateUserAgentMiddleware fails)
+USER_AGENT = "Mozilla/5.0 (compatible; MySearchEngine/1.0)"
+# Downloader middlewares
+DOWNLOADER_MIDDLEWARES = {
+    'my_search_engine.middlewares.RotateUserAgentMiddleware': 543,
+    # Uncomment the following line if using the ProxyMiddleware
+    # 'my_search_engine.middlewares.ProxyMiddleware': 544,
+    'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,  # Disable default user agent middleware
+}
+# Item pipelines
+ITEM_PIPELINES = {
+    'my_search_engine.pipelines.SaveToJSONPipeline': 300,
+    'my_search_engine.pipelines.ContentCleanupPipeline': 400,
+    'my_search_engine.pipelines.DisplayResultsPipeline': 200,
+}
+# Enable logging
+LOG_ENABLED = True
+LOG_LEVEL = 'INFO'  # Set to 'DEBUG' to see all logs, including middleware logs
+# Additional settings can be added below as needed

my_search_engine/my_search_engine/spiders/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.

my_search_engine/my_search_engine/spiders/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (173 Bytes). View file

my_search_engine/my_search_engine/spiders/__pycache__/search_spider.cpython-312.pyc ADDED Viewed

Binary file (11.2 kB). View file

my_search_engine/my_search_engine/spiders/search_spider.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# search_spider.py
+import scrapy
+from bs4 import BeautifulSoup
+from my_search_engine.my_search_engine.items import MySearchEngineItem
+import random
+from urllib.parse import urlparse, urljoin
+import traceback
+import re
+from twisted.internet.error import TCPTimedOutError, ConnectionRefusedError, TimeoutError
+from scrapy.exceptions import CloseSpider
+import logging
+logger = logging.getLogger(__name__)
+class SearchSpider(scrapy.Spider):
+    name = "search_spider"
+    allowed_domains = []  # To be set dynamically from search_sites
+    def __init__(self, query=None, search_sites=None, max_depth=2, max_links_per_page=3, *args, **kwargs):
+        super(SearchSpider, self).__init__(*args, **kwargs)
+        self.query = query
+        if not self.query:
+            raise CloseSpider("No search query provided")
+        self.max_depth = max_depth
+        self.max_links_per_page = max_links_per_page
+        if search_sites is None:
+            self.search_sites = [
+            f"https://en.wikibooks.org/w/index.php?search={self.query}",
+            f"https://en.wikiversity.org/w/index.php?search={self.query}",
+            f"https://commons.wikimedia.org/w/index.php?search={self.query}",
+            f"https://stackexchange.com/search?q={self.query}",
+            f"https://arxiv.org/search/?query={self.query}&searchtype=all",
+            f"https://www.ncbi.nlm.nih.gov/pmc/?term={self.query}",
+            f"https://www.gutenberg.org/ebooks/search/?query={self.query}",
+            f"https://openlibrary.org/search?q={self.query}",
+            f"https://doaj.org/search/articles?ref=homepage&q={self.query}",
+            f"https://www.ted.com/search?q={self.query}",
+            f"https://en.citizendium.org/wiki?search={self.query}",
+            f"https://www.jstor.org/action/doBasicSearch?Query={self.query}",
+            f"https://archive.org/search.php?query={self.query}",
+            f"https://search.scielo.org/?q={self.query}",
+            f"https://paperswithcode.com/search?q={self.query}",
+            f"https://www.reddit.com/search/?q={self.query}",
+            f"https://huggingface.co/models?search={self.query}",
+            f"https://huggingface.co/datasets?search={self.query}",
+            f"https://machinelearningmastery.com/?s={self.query}",
+            f"https://www.kaggle.com/search?q={self.query}",
+            f"https://towardsdatascience.com/search?q={self.query}",
+            f"https://github.com/search?q={self.query}",
+            f"https://stackoverflow.com/search?q={self.query}",
+            f"https://www.youtube.com/results?search_query={self.query}",
+            f"https://www.slideshare.net/search/slideshow?searchfrom=header&q={self.query}"
+        ]
+        else:
+            self.search_sites = search_sites
+    def start_requests(self):
+        if not self.query:
+            logger.error("No search query provided in start_requests")
+            return
+        self.allowed_domains = list(set([urlparse(url).netloc for url in self.search_sites]))
+        logger.info(f"Starting requests for query: {self.query}")
+        for url in self.search_sites:
+            yield scrapy.Request(
+                url,
+                callback=self.parse,
+                meta={
+                    'dont_retry': True,
+                    'handle_httpstatus_list': [302, 403, 404, 420, 429, 500, 503],
+                    'depth': 1  # Start at depth 1
+                },
+                errback=self.errback_httpbin
+            )
+    def parse(self, response):
+        depth = response.meta.get('depth', 1)
+        if depth > self.max_depth:
+            logger.debug(f"Reached max depth at {response.url}")
+            return
+        logger.info(f"Parsing response from {response.url} at depth {depth}")
+        try:
+            soup = BeautifulSoup(response.text, 'html.parser')
+            # Check for irrelevant or blocked content
+            if any(term in soup.text.lower() for term in ['captcha', 'verification', 'no items found', 'no results', 'access denied']):
+                logger.warning(f"Irrelevant page detected: {response.url}")
+                return
+            title = soup.find('title').get_text().strip() if soup.find('title') else 'No title'
+            meta_description = soup.find('meta', {'name': 'description'})
+            meta_description = meta_description['content'].strip() if meta_description else 'No description'
+            content = self.extract_main_content(soup)
+            summary = self.generate_summary(content, 200)
+            total_links = len(soup.find_all('a', href=True))
+            content_length = len(content.split())
+            if content_length < 100:
+                logger.info(f"Content too short ({content_length} words) for {response.url}")
+                return
+            item = MySearchEngineItem()
+            item['title'] = title
+            item['link'] = response.url
+            item['content'] = content
+            item['summary'] = summary
+            item['meta'] = {
+                'description': meta_description,
+                'total_links': total_links,
+                'content_length': content_length,
+                'domain': urlparse(response.url).netloc,
+            }
+            yield item
+            # Limit the number of links per page
+            links = soup.find_all('a', href=True)
+            random.shuffle(links)
+            links = links[:self.max_links_per_page]  # Limit the number of links
+            for link in links:
+                href = link.get('href')
+                full_url = urljoin(response.url, href)
+                if self.is_valid_link(full_url):
+                    logger.debug(f"Following link: {full_url}")
+                    yield scrapy.Request(
+                        url=full_url,
+                        callback=self.parse,
+                        meta={'depth': depth + 1},
+                        errback=self.errback_httpbin
+                    )
+        except Exception as e:
+            logger.error(f"Error parsing {response.url}: {str(e)}")
+            logger.error(traceback.format_exc())
+    def extract_main_content(self, soup):
+        for element in soup(['script', 'style', 'nav', 'header', 'footer']):
+            element.decompose()
+        main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content')
+        if main_content:
+            return ' '.join(main_content.stripped_strings)
+        paragraphs = soup.find_all('p')
+        return ' '.join([p.get_text().strip() for p in paragraphs])
+    def generate_summary(self, content, max_length=200):
+        sentences = re.split(r'(?<=[.!?])\s+', content)
+        summary = ""
+        for sentence in sentences:
+            if len(summary) + len(sentence) <= max_length:
+                summary += sentence + " "
+            else:
+                break
+        return summary.strip()
+    def is_valid_link(self, url):
+        parsed_url = urlparse(url)
+        return any(domain in parsed_url.netloc for domain in self.allowed_domains)
+    def errback_httpbin(self, failure):
+        logger.error(f"Error on {failure.request.url}: {str(failure.value)}")
+        logger.error(traceback.format_exc())
+        if failure.check(ConnectionRefusedError):
+            logger.warning(f"Connection refused: {failure.request.url}")
+        elif failure.check(TimeoutError, TCPTimedOutError):
+            logger.warning(f"Timeout: {failure.request.url}")
+        else:
+            logger.error(f"Failed to process: {failure.request.url}")

my_search_engine/scrapy.cfg ADDED Viewed

	@@ -0,0 +1,11 @@

+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+[settings]
+default = my_search_engine.settings
+[deploy]
+#url = http://localhost:6800/
+project = my_search_engine

ranking.py ADDED Viewed

	@@ -0,0 +1,239 @@

+# ranking.py
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import pandas as pd
+from sentence_transformers import SentenceTransformer, util
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+from collections import Counter
+import re
+import string
+from collections import Counter
+from sklearn.feature_extraction.text import TfidfVectorizer
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+import spacy
+def truncate_text(text, max_length=1024):
+    tokens = text.split()
+    if len(tokens) > max_length:
+        return ' '.join(tokens[:max_length])
+    return text
+class RankingNN(nn.Module):
+    def __init__(self, input_size=7):
+        super(RankingNN, self).__init__()
+        self.fc1 = nn.Linear(input_size, 64)
+        self.fc2 = nn.Linear(64, 32)
+        self.fc3 = nn.Linear(32, 16)
+        self.fc4 = nn.Linear(16, 1)
+        self.dropout = nn.Dropout(0.2)
+    def forward(self, x):
+        x = torch.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = torch.relu(self.fc2(x))
+        x = self.dropout(x)
+        x = torch.relu(self.fc3(x))
+        x = self.fc4(x)
+        return x
+transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
+ranking_model = RankingNN()
+optimizer = optim.Adam(ranking_model.parameters(), lr=0.001, weight_decay=1e-5)
+criterion = nn.MSELoss()
+scaler = MinMaxScaler()
+# Download necessary resources
+import nltk
+nltk.download('punkt')
+nltk.download('stopwords')
+nltk.download('wordnet')
+# Initialize resources
+stop_words = set(stopwords.words('english'))
+lemmatizer = WordNetLemmatizer()
+nlp = spacy.load("en_core_web_sm")  # Small model to keep compute low
+def preprocess_text(text):
+    """
+    Preprocess the input text by lowercasing, removing punctuation, and filtering out stopwords.
+    Lemmatization is applied as well.
+    """
+    # Lowercase the text
+    text = text.lower()
+    # Remove punctuation using regex
+    text = re.sub(r'[' + string.punctuation + ']', ' ', text)
+    # Tokenize the text into words
+    words = word_tokenize(text)
+    # Lemmatize, filter out stopwords and non-alphabetic words
+    processed_words = [lemmatizer.lemmatize(word) for word in words if word.isalpha() and word not in stop_words]
+    return processed_words
+def extract_named_entities(text):
+    """
+    Extract named entities (e.g., people, organizations, locations) from the text.
+    """
+    doc = nlp(text)
+    named_entities = [ent.text for ent in doc.ents if ent.label_ in {"PERSON", "ORG", "GPE", "LOC"}]
+    return named_entities
+def extract_keywords_tfidf(corpus, text, n=5):
+    """
+    Extract keywords from the text using TF-IDF, combined with Named Entity Recognition and lemmatization.
+    """
+    # Preprocess the text and the entire corpus
+    preprocessed_texts = [' '.join(preprocess_text(doc)) for doc in corpus]
+    preprocessed_text = ' '.join(preprocess_text(text))
+    # Named entities extraction
+    named_entities = extract_named_entities(text)
+    # Use TF-IDF vectorizer to find the most important words
+    vectorizer = TfidfVectorizer(max_features=1000)  # Keep it light, max 1000 features
+    X = vectorizer.fit_transform(preprocessed_texts)
+    # Get the feature names (i.e., the words)
+    feature_names = vectorizer.get_feature_names_out()
+    # Transform the current text into TF-IDF scores
+    response = vectorizer.transform([preprocessed_text])
+    tfidf_scores = zip(feature_names, response.toarray()[0])
+    # Sort by TF-IDF score
+    sorted_tfidf = sorted(tfidf_scores, key=lambda x: x[1], reverse=True)
+    # Combine top TF-IDF words with named entities for more richness
+    keywords = [word for word, score in sorted_tfidf[:n]]
+    combined_keywords = keywords + named_entities
+    return combined_keywords[:n]
+def extract_keywords(text, corpus, n=5):
+    """
+    Wrapper function that combines preprocessing, TF-IDF, and Named Entity Recognition to extract top N keywords.
+    """
+    if not text.strip():
+        return []
+    # Extract keywords using the TF-IDF based approach
+    keywords = extract_keywords_tfidf(corpus, text, n)
+    # If no meaningful keywords are found, fallback to keyword frequency
+    if not keywords:
+        return extract_fallback_keywords(text, n)
+    return keywords
+def extract_fallback_keywords(text, n=5):
+    """
+    Fallback method to extract keywords based on word frequency in case TF-IDF or NER fails.
+    """
+    words = preprocess_text(text)
+    word_freq = Counter(words)
+    return [word for word, _ in word_freq.most_common(n)]
+def calculate_keyword_overlap(query_keywords, result_keywords):
+    if len(query_keywords) == 0:
+        return 0  # No keywords in query, so overlap is 0
+    return len(set(query_keywords) & set(result_keywords)) / len(query_keywords)
+def train_ranking_model(query, results, corpus=None, epochs=1):
+    query = truncate_text(query)
+    if not results:
+        print("No results available. Skipping training.")
+        return []
+    if corpus is None:
+        # If no corpus is provided, use results as a fallback
+        corpus = [truncate_text(result['content']) for result in results if 'content' in result]
+    query_embedding = transformer_model.encode(query)
+    query_keywords = extract_keywords(query, corpus)
+    training_data = []
+    target_scores = []
+    for result in results:
+        # Truncate content
+        content = truncate_text(result['content'])
+        content_embedding = transformer_model.encode(content)
+        # Handle missing 'title' and 'meta' fields with default values, and truncate
+        title = truncate_text(result.get('title', ''))
+        title_embedding = transformer_model.encode(title)
+        meta_description = truncate_text(result.get('meta', {}).get('description', ''))
+        meta_description_embedding = transformer_model.encode(meta_description)
+        content_similarity = util.pytorch_cos_sim(query_embedding, content_embedding).item()
+        title_similarity = util.pytorch_cos_sim(query_embedding, title_embedding).item()
+        meta_description_similarity = util.pytorch_cos_sim(query_embedding, meta_description_embedding).item()
+        # Handle missing metadata by providing default values
+        content_length = result.get('meta', {}).get('content_length', 0)
+        total_links = result.get('meta', {}).get('total_links', 0)
+        result_keywords = extract_keywords(content, corpus)
+        keyword_overlap = calculate_keyword_overlap(query_keywords, result_keywords)
+        domain_authority = get_domain_authority(result.get('link', ''))
+        features = [
+            content_similarity, title_similarity, meta_description_similarity,
+            content_length, total_links, keyword_overlap, domain_authority
+        ]
+        training_data.append(features)
+        target_score = (0.4 * content_similarity + 0.3 * title_similarity +
+                        0.2 * meta_description_similarity + 0.1 * keyword_overlap)
+        target_scores.append(target_score)
+    # Normalize features
+    training_data = scaler.fit_transform(training_data)
+    training_data_tensor = torch.tensor(training_data, dtype=torch.float32)
+    target_scores_tensor = torch.tensor(target_scores, dtype=torch.float32).unsqueeze(1)
+    # Training loop
+    for epoch in range(epochs):
+        optimizer.zero_grad()
+        predicted_scores = ranking_model(training_data_tensor)
+        loss = criterion(predicted_scores, target_scores_tensor)
+        loss.backward()
+        optimizer.step()
+        if (epoch + 1) % 5 == 0:
+            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
+    # Predict final scores and rank results
+    with torch.no_grad():
+        final_scores = ranking_model(training_data_tensor).squeeze().tolist()
+    # Ensure final_scores is always a list
+    if isinstance(final_scores, float):
+        final_scores = [final_scores]
+    for result, score in zip(results, final_scores):
+        result['predicted_score'] = score
+    ranked_results = sorted(results, key=lambda x: x['predicted_score'], reverse=True)
+    return ranked_results
+def get_domain_authority(url):
+    # Placeholder function - replace with actual domain authority data if available
+    high_authority_domains = ['arxiv.org', 'ncbi.nlm.nih.gov', 'nature.com', 'science.org']
+    medium_authority_domains = ['wikipedia.org', 'stackexchange.com', 'github.com']
+    for domain in high_authority_domains:
+        if domain in url:
+            return 1.0
+    for domain in medium_authority_domains:
+        if domain in url:
+            return 0.7
+    return 0.5

test_agent.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# test_agent.py
+import logging
+from twisted.internet import reactor, defer, threads
+from agent import AutonomousWebAgent
+from ToTSearch import ToTSearch
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Initialize the logger
+logger = logging.getLogger(__name__)
+# Suppress detailed logs for some libraries (like Scrapy or Transformers)
+logging.getLogger('scrapy').setLevel(logging.ERROR)
+logging.getLogger('transformers').setLevel(logging.ERROR)
+logging.getLogger('twisted').setLevel(logging.ERROR)
+import warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+class TestAgent:
+    def __init__(self):
+        # Initialize the AutonomousWebAgent
+        state_size = 7  # word_count, link_count, header_count, semantic_similarity, image_count, script_count, css_count
+        action_size = 3  # 0: Click Link, 1: Summarize, 2: RAG Generate
+        num_options = 3  # 0: Search, 1: Summarize, 2: RAG Generate
+        self.agent = AutonomousWebAgent(
+            state_size=state_size,
+            action_size=action_size,
+            num_options=num_options,
+            hidden_size=64,
+            learning_rate=0.001,
+            gamma=0.99,
+            epsilon=1.0,
+            epsilon_decay=0.995,
+            epsilon_min=0.01,
+            knowledge_base_path='knowledge_base.json'
+        )
+        # Initialize ToTSearch with the agent
+        self.tot_search = ToTSearch(self.agent)
+        # Few-shot examples for Tree of Thoughts
+        self.few_shot_examples = [
+            {
+                "query": "What are the effects of climate change on biodiversity?",
+                "thoughts": [
+                    "Loss of habitats due to rising sea levels and changing temperatures",
+                    "Disruption of ecosystems and food chains",
+                    "Increased extinction rates for vulnerable species"
+                ],
+                "answer": "Climate change significantly impacts biodiversity through habitat loss, ecosystem disruption, and increased extinction rates. Rising temperatures and sea levels alter habitats, forcing species to adapt or migrate. This disrupts established ecosystems and food chains. Species unable to adapt quickly face a higher risk of extinction, particularly those with specialized habitats or limited ranges."
+            },
+            {
+                "query": "How can we promote sustainable energy adoption?",
+                "thoughts": [
+                    "Government policies and incentives",
+                    "Public awareness and education campaigns",
+                    "Technological advancements and cost reduction"
+                ],
+                "answer": "Promoting sustainable energy adoption requires a multi-faceted approach. Government policies and incentives can encourage both businesses and individuals to switch to renewable sources. Public awareness and education campaigns help people understand the importance and benefits of sustainable energy. Continued technological advancements and cost reductions make sustainable energy more accessible and economically viable for widespread adoption."
+            }
+        ]
+    @defer.inlineCallbacks
+    def process_query(self, query, is_few_shot=False):
+        logger.info(f"Processing query: {query}")
+        try:
+            if is_few_shot:
+                few_shot_prompt = self.create_few_shot_prompt(query)
+                enhanced_query = f"{few_shot_prompt}\n\nQuery: {query}"
+                logger.debug(f"Enhanced query for few-shot learning: {enhanced_query[:100]}...")
+                final_answer = yield self.tot_search.search(enhanced_query)
+            else:
+                final_answer = yield self.tot_search.search(query)
+            logger.info(f"Final answer for '{query}':")
+            logger.info(final_answer)
+            yield self.agent.add_document_to_kb(title=f"ToT Search Result: {query}", content=final_answer)
+            yield self.agent.replay_worker(batch_size=32)
+            yield self.agent.replay_manager(batch_size=32)
+            return final_answer
+        except Exception as e:
+            logger.error(f"Error processing query '{query}': {str(e)}", exc_info=True)
+            return f"An error occurred: {str(e)}"
+    def create_few_shot_prompt(self, query):
+        prompt = "Here are some examples of how to approach queries using a Tree of Thoughts:\n\n"
+        for example in self.few_shot_examples:
+            prompt += f"Query: {example['query']}\n"
+            prompt += "Thoughts:\n"
+            for thought in example['thoughts']:
+                prompt += f"- {thought}\n"
+            prompt += f"Answer: {example['answer']}\n\n"
+        prompt += f"Now, let's approach the following query in a similar manner:\n\nQuery: {query}\n"
+        return prompt
+    def save_models(self):
+        self.agent.save_worker_model("worker_model_final.pth")
+        self.agent.save_manager_model("manager_model_final.pth")
+        logger.info("Agent models saved.")
+def get_user_input():
+    return input("Enter your query (or 'quit' to exit): ")
+@defer.inlineCallbacks
+def run_test_session():
+    test_agent = TestAgent()
+    logger.info("Starting few-shot learning phase...")
+    for example in test_agent.few_shot_examples:
+        logger.info(f"Processing few-shot example: {example['query']}")
+        try:
+            yield test_agent.process_query(example['query'], is_few_shot=True)
+        except Exception as e:
+            logger.error(f"Error in few-shot learning: {str(e)}", exc_info=True)
+    logger.info("Few-shot learning phase completed. Starting interactive session.")
+    while True:
+        query = yield threads.deferToThread(get_user_input)
+        if query.lower() == 'quit':
+            break
+        try:
+            answer = yield test_agent.process_query(query)
+            print("\nAgent's response:")
+            print(answer)
+            print("\n" + "-"*50 + "\n")
+        except Exception as e:
+            logger.error(f"Error in interactive session: {str(e)}", exc_info=True)
+    test_agent.save_models()
+    reactor.stop()
+if __name__ == "__main__":
+    reactor.callWhenRunning(run_test_session)
+    reactor.run()

train_agent.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# train_agent.py
+from twisted.internet import reactor, defer, task
+from agent import AutonomousWebAgent
+import random
+import logging
+import sys
+import time
+import codecs
+# Configure logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    handlers=[
+                        logging.FileHandler("agent_training.log", encoding='utf-8'),
+                        logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer))
+                    ])
+logger = logging.getLogger(__name__)
+# List of diverse queries
+QUERIES = [
+    "machine learning", "climate change", "renewable energy", "artificial intelligence",
+    "quantum computing", "blockchain technology", "gene editing", "virtual reality",
+    "space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things",
+    "3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics",
+    "data science", "neural networks", "cloud computing", "edge computing", "5G technology",
+    "cryptocurrency", "natural language processing", "computer vision"
+]
+@defer.inlineCallbacks
+def train_agent():
+    # Updated state_size to 7 to match the feature extraction in AutonomousWebAgent
+    state_size = 7  # word_count, link_count, header_count, semantic_similarity, image_count, script_count, css_count
+    action_size = 3  # 0: Click Link, 1: Summarize, 2: RAG Generate
+    num_options = 3  # 0: Search, 1: Summarize, 2: RAG Generate
+    # Initialize the AutonomousWebAgent with the required arguments
+    agent = AutonomousWebAgent(
+        state_size=state_size,
+        action_size=action_size,
+        num_options=num_options,  # Added parameter for HRL
+        hidden_size=64,
+        learning_rate=0.001,
+        gamma=0.99,
+        epsilon=1.0,
+        epsilon_decay=0.995,
+        epsilon_min=0.01,
+        knowledge_base_path='knowledge_base.json'
+    )
+    logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}")
+    num_episodes = 10  # Adjust as needed
+    total_training_reward = 0
+    start_time = time.time()
+    for episode in range(num_episodes):
+        query = random.choice(QUERIES)
+        logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}")
+        episode_start_time = time.time()
+        try:
+            # Initiate the search process
+            search_deferred = agent.search(query)
+            search_deferred.addTimeout(300, reactor)  # 5-minute timeout
+            total_reward = yield search_deferred
+            total_training_reward += total_reward
+            episode_duration = time.time() - episode_start_time
+            logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds")
+        except defer.TimeoutError:
+            logger.error(f"Episode {episode + 1} timed out")
+            total_reward = -1  # Assign a negative reward for timeout
+            total_training_reward += total_reward
+        except Exception as e:
+            logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True)
+            total_reward = -1  # Assign a negative reward for errors
+            total_training_reward += total_reward
+        # Update target models periodically
+        if (episode + 1) % 10 == 0:
+            logger.info(f"Updating target models at episode {episode + 1}")
+            agent.update_worker_target_model()
+            agent.update_manager_target_model()
+            agent.manager.update_target_model()
+        # Log overall progress
+        progress = (episode + 1) / num_episodes
+        elapsed_time = time.time() - start_time
+        estimated_total_time = elapsed_time / progress if progress > 0 else 0
+        remaining_time = estimated_total_time - elapsed_time
+        logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s")
+    total_training_time = time.time() - start_time
+    average_reward = total_training_reward / num_episodes
+    logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}")
+    logger.info(f"Total training time: {total_training_time:.2f} seconds")
+    logger.info("Saving models.")
+    # Save both Worker and Manager models
+    agent.save_worker_model("worker_model.pth")
+    agent.save_manager_model("manager_model.pth")
+    agent.save("web_agent_model.pth")  # Assuming this saves additional components if needed
+    if reactor.running:
+        logger.info("Stopping reactor")
+        reactor.stop()
+def main():
+    logger.info("Starting agent training")
+    d = task.deferLater(reactor, 0, train_agent)
+    d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True))
+    d.addBoth(lambda _: reactor.stop())
+    reactor.run()
+if __name__ == "__main__":
+    main()