Prashasst commited on
Commit
771604e
Β·
verified Β·
1 Parent(s): 775a63b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py for your Gradio Space
2
+
3
+ import torch
4
+ import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
6
+
7
+ # --- 1. Configuration ---
8
+ # This is the ID of your LoRA adapter repository on the Hub
9
+ hub_adapter_id = "Prashasst/Sushruta-P3.8Q" # Make sure this is correct!
10
+
11
+ # Define the quantization configuration for efficient inference
12
+ quantization_config = BitsAndBytesConfig(
13
+ load_in_4bit=True,
14
+ bnb_4bit_quant_type="nf4",
15
+ bnb_4bit_compute_dtype=torch.bfloat16,
16
+ )
17
+
18
+ # Your system prompt from training
19
+ SYSTEM_PROMPT = """You are Sushruta-P3.8Q, a specialized medical AI assistant from Prashasst's AI Labs. You were created and fine-tuned by Prashasst Dongre to serve as a reliable and accessible educational tool for the public. Your primary purpose is to help any user understand complex medical topics by providing clear, logical, step-by-step analyses in a <think>...</think><solution>...</solution> format."""
20
+
21
+ # --- 2. Load the Model and Tokenizer ---
22
+ print("Loading base model and tokenizer...")
23
+ # This single command downloads the base Phi-3 model, quantizes it,
24
+ # and applies your LoRA adapters from the Hub automatically.
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ hub_adapter_id,
27
+ quantization_config=quantization_config,
28
+ device_map="auto",
29
+ trust_remote_code=True,
30
+ low_cpu_mem_usage=True
31
+ )
32
+ tokenizer = AutoTokenizer.from_pretrained(hub_adapter_id)
33
+ print("Model loaded successfully!")
34
+
35
+ # --- 3. Define the Prediction Function ---
36
+ def generate_response(user_question):
37
+ """
38
+ This function takes a user's question, formats the prompt,
39
+ runs it through the model, and returns the clean response.
40
+ """
41
+ # Format the prompt using the official Phi-3 chat template
42
+ prompt = f"<|system|>\n{SYSTEM_PROMPT}<|end|>\n<|user|>\n{user_question}<|end|>\n<|assistant|>\n"
43
+
44
+ # Tokenize the input
45
+ inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to("cuda")
46
+
47
+ # Generate the response
48
+ outputs = model.generate(**inputs, max_new_tokens=1024)
49
+
50
+ # Decode and clean up the response
51
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
+
53
+ # Return only the assistant's part of the response
54
+ return response_text.split("<|assistant|>")[1]
55
+
56
+ # --- 4. Create the Gradio Interface ---
57
+ with gr.Blocks(theme='soft') as demo:
58
+ gr.Markdown("# Sushruta-P3.8Q: Your Medical AI Assistant")
59
+ gr.Markdown("Created by Prashasst Dongre.")
60
+
61
+ with gr.Row():
62
+ question_box = gr.Textbox(label="Enter your medical question here")
63
+ submit_button = gr.Button("Ask Sushruta")
64
+
65
+ answer_box = gr.Markdown(label="Sushruta's Analysis:")
66
+
67
+ submit_button.click(generate_response, inputs=question_box, outputs=answer_box)
68
+
69
+ # --- 5. Make the Interface Publicly Accessible ---
70
+ # This makes the Gradio app accessible as a web page and an API
71
+ demo.queue().launch(share=True)