!pip install datasets
!pip install datasets sentence_transformers

"""Dataset Address  [link text](#https://huggingface.co/datasets/gopalkalpande/bbc-news-summary)"""

from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util

# Load dataset
dataset = load_dataset("gopalkalpande/bbc-news-summary")['train']

# Initialize model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Encode summaries (consider checking if "Summaries" is available)
passage_embeddings = model.encode([summary for summary in dataset["Summaries"] if summary], show_progress_bar=True, convert_to_tensor=True)

# Function to find relevant news articles based on a prompt
def find_relevant_news(prompt, top_k=3):
    """
    Finds and returns the top_k relevant news summaries based on the given prompt.

    Parameters:
    - prompt (str): The subject or query to search for relevant news.
    - top_k (int): Number of top relevant articles to return.

    Returns:
    - List of top_k relevant news summaries.
    """
    # Encode the prompt into an embedding
    prompt_embedding = model.encode(prompt, convert_to_tensor=True)

    # Calculate cosine similarities between the prompt and all news summaries
    similarities = util.cos_sim(prompt_embedding, passage_embeddings)

    # Find the indices of the top_k most similar news summaries
    top_indices = similarities.topk(k=top_k).indices.squeeze()

    # Extract and return the top_k relevant news summaries
    return [dataset["Summaries"][index][:200] + "." for index in top_indices]

# Example usage
find_relevant_news("latest football math of championsship")

