import gradio as gr import torch from transformers import pipeline # 1. Load a model capable of following instructions # TinyLlama is small (1.1B parameters) and chat-tuned. model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" print("Loading model... this may take a minute.") # We use the 'pipeline' abstraction which makes text generation much easier # If you have a GPU, change device to 0. If CPU only, keep it -1. pipe = pipeline("text-generation", model=model_id, torch_dtype=torch.float32, device_map="auto") def study_bot(user_input, history): # 2. Structure the prompt correctly for a Chat Model # We must tell the model exactly how to behave using a "System" prompt. system_prompt = "You are a helpful study assistant. Create detailed, personalized day-wise study plans for exams based on the user's request." # TinyLlama expects a specific format: <|system|>...<|user|>...<|assistant|> formatted_prompt = f"<|system|>\n{system_prompt}\n" # Add conversation history to context so it remembers previous messages for user_msg, bot_msg in history: formatted_prompt += f"<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}\n" # Add the current new question formatted_prompt += f"<|user|>\n{user_input}\n<|assistant|>\n" # 3. Generate the response outputs = pipe( formatted_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_k=50, top_p=0.95 ) # Extract only the new text generated generated_text = outputs[0]['generated_text'] # The model returns the whole prompt + answer. We cut off the prompt. response = generated_text[len(formatted_prompt):] return response # 4. Gradio ChatInterface (Simpler way to handle history) iface = gr.ChatInterface( fn=study_bot, title="Study Plan Assistant 📚", description="Ask for a study plan (e.g., 'I have a Biology exam in 3 days, help me plan').", ) if __name__ == "__main__": iface.launch()