import gradio as gr
import torch
from transformers import pipeline

# 1. Load a model capable of following instructions
# TinyLlama is small (1.1B parameters) and chat-tuned.
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

print("Loading model... this may take a minute.")

# We use the 'pipeline' abstraction which makes text generation much easier
# If you have a GPU, change device to 0. If CPU only, keep it -1.
pipe = pipeline("text-generation", model=model_id, torch_dtype=torch.float32, device_map="auto")

def study_bot(user_input, history):
    # 2. Structure the prompt correctly for a Chat Model
    # We must tell the model exactly how to behave using a "System" prompt.
    
    system_prompt = "You are a helpful study assistant. Create detailed, personalized day-wise study plans for exams based on the user's request."
    
    # TinyLlama expects a specific format: <|system|>...<|user|>...<|assistant|>
    formatted_prompt = f"<|system|>\n{system_prompt}</s>\n"

    # Add conversation history to context so it remembers previous messages
    for user_msg, bot_msg in history:
        formatted_prompt += f"<|user|>\n{user_msg}</s>\n<|assistant|>\n{bot_msg}</s>\n"
    
    # Add the current new question
    formatted_prompt += f"<|user|>\n{user_input}</s>\n<|assistant|>\n"

    # 3. Generate the response
    outputs = pipe(
        formatted_prompt, 
        max_new_tokens=512, 
        do_sample=True, 
        temperature=0.7, 
        top_k=50, 
        top_p=0.95
    )
    
    # Extract only the new text generated
    generated_text = outputs[0]['generated_text']
    # The model returns the whole prompt + answer. We cut off the prompt.
    response = generated_text[len(formatted_prompt):]
    
    return response

# 4. Gradio ChatInterface (Simpler way to handle history)
iface = gr.ChatInterface(
    fn=study_bot,
    title="Study Plan Assistant 📚",
    description="Ask for a study plan (e.g., 'I have a Biology exam in 3 days, help me plan').",
)

if __name__ == "__main__":
    iface.launch()