Arjon07CSE commited on
Commit
33c09e3
·
verified ·
1 Parent(s): 8367aa6

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +135 -177
src/streamlit_app.py CHANGED
@@ -13,36 +13,34 @@ st.set_page_config(
13
  layout="wide"
14
  )
15
 
16
- # --- KEYWORD DATABASE (To make the AI Smarter) ---
17
- # This dictionary helps the AI explicitly understand symbols associated with parties.
18
  POLITICAL_CONTEXT = {
19
  "BNP": {
20
- "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy",
21
- "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ"
22
  },
23
  "Awami League": {
24
- "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat",
25
- "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত"
26
  },
27
  "Jamaat-e-Islami": {
28
- "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul",
29
- "rival_keywords": "নাস্তিক, লীগ, শাহবাগ"
30
  },
31
  "General/Interim Govt": {
32
- "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ",
33
- "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা"
34
  }
35
  }
36
 
37
  # --- MODEL LOADER ---
38
  @st.cache_resource
39
  def load_model():
 
40
  model_id = "hishab/titulm-llama-3.2-3b-v2.0"
41
  try:
42
  tokenizer = AutoTokenizer.from_pretrained(model_id)
43
- # Load in 4-bit or float16 depending on available hardware
44
- # For Hugging Face Spaces (CPU), we use float32 or float16.
45
- # For GPU, float16 is best.
46
  dtype = torch.float16 if torch.cuda.is_available() else torch.float32
47
 
48
  model = AutoModelForCausalLM.from_pretrained(
@@ -55,228 +53,188 @@ def load_model():
55
  "text-generation",
56
  model=model,
57
  tokenizer=tokenizer,
58
- max_new_tokens=150, # Keep it short for JSON
59
  do_sample=True,
60
- temperature=0.2, # Lower temperature = More strict/logical
61
  top_p=0.9
62
  )
63
  return pipe
64
  except Exception as e:
65
  return None
66
 
67
- # Load Model
68
  with st.sidebar:
69
- st.image("https://cdn-icons-png.flaticon.com/512/6656/6656046.png", width=50)
70
- st.title("AI Settings")
71
  if torch.cuda.is_available():
72
- st.success("🚀 GPU Detected! Inference will be fast.")
73
  else:
74
- st.warning("⚠️ Running on CPU. Inference might be slow.")
75
 
76
- with st.spinner("Waking up the Neural Network..."):
77
  llm = load_model()
78
 
79
  if not llm:
80
- st.error("Model failed to load.")
81
  st.stop()
 
 
82
 
83
  # --- HELPER FUNCTIONS ---
84
  def clean_json_output(text):
85
  """Robustly extract JSON from the LLM's chatter."""
86
- # Look for the last occurrence of { and the matching }
87
  try:
88
- # Regex to find JSON block
89
  matches = re.findall(r'\{.*?\}', text, re.DOTALL)
90
  if matches:
91
- # Get the last match as it's usually the actual answer after the reasoning
92
  return json.loads(matches[-1])
93
- else:
94
- return None
95
  except:
96
  return None
97
 
98
- # --- PROMPT GENERATORS ---
99
-
100
- def generate_news_prompt(news_text, target):
101
- return [
102
- {"role": "system", "content": f"""You are a Political Analyst for Bangladesh.
103
- Task: Analyze if the news is FAVOURABLE or UNFAVORABLE for: {target}.
104
-
105
- DEFINITIONS:
106
- - FAVOURABLE: Positive news, legal wins, return to power, praise.
107
- - UNFAVORABLE: Negative news, arrest, criticism, loss.
108
- - NEUTRAL: Factual news with no clear bias.
109
-
110
- Response Format: JSON only -> {{"label": "FAVOURABLE"|"UNFAVORABLE"|"NEUTRAL", "reasoning": "Bangla sentence"}}
111
- """},
112
- {"role": "user", "content": f"News: {news_text}"}
113
- ]
114
-
115
  def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
116
  return [
117
  {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
118
  Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
119
 
120
- RULES:
121
- 1. If comment mentions {party} symbols ({keywords}) or praises {target} -> POSITIVE.
122
- 2. If comment supports {party}'s rivals ({rival_keywords}) or attacks {target} -> NEGATIVE.
123
- 3. If comment is sarcastic (mocking praise) -> NEGATIVE.
 
124
 
125
  Examples:
126
- - Comment: "Zindabad!" (Context: {party}) -> POSITIVE
127
- - Comment: "Chor!" (Context: {party}) -> NEGATIVE
 
128
 
129
- Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short Bangla explanation"}}
130
  """},
131
  {"role": "user", "content": f"Comment: {comment_text}"}
132
  ]
133
 
134
  # --- MAIN UI ---
135
-
136
  st.title("🇧🇩 Smart Political Sentiment Analyzer")
137
- st.markdown("Context-Aware Analysis for Bangladesh Politics")
138
 
139
- # Tabs for the two sections
140
- tab_news, tab_comments = st.tabs(["📰 Political News Analysis", "📣 Public Sentiment (Comments)"])
 
 
 
 
 
141
 
142
- # =======================
143
- # SECTION 1: NEWS
144
- # =======================
145
- with tab_news:
146
- st.header("Is this news Good or Bad for the Candidate?")
147
-
148
- col1, col2 = st.columns(2)
149
- with col1:
150
- target_name_news = st.text_input("Candidate Name (Who is this about?)", "তারেক রহমান")
151
- with col2:
152
- news_input_method = st.radio("Input Method", ["Paste Text", "Upload CSV"])
153
-
154
- if news_input_method == "Paste Text":
155
- news_text = st.text_area("Paste News Headline:", height=100)
156
- if st.button("Analyze News Impact", type="primary"):
157
- if news_text:
158
- with st.spinner("Analyzing impact..."):
159
- prompt = generate_news_prompt(news_text, target_name_news)
160
- res = llm(prompt)
161
- output_text = res[0]['generated_text'][-1]['content']
162
- data = clean_json_output(output_text)
163
-
164
- if data:
165
- st.subheader(f"Result: {data.get('label', 'ERROR')}")
166
- st.write(f"**Reasoning:** {data.get('reasoning', '')}")
167
- else:
168
- st.error("Could not parse AI response.")
169
- st.code(output_text)
170
 
171
- elif news_input_method == "Upload CSV":
172
- uploaded_news = st.file_uploader("Upload News CSV", type=["csv"])
173
- if uploaded_news:
174
- df_news = pd.read_csv(uploaded_news)
175
- text_col = st.selectbox("Select Headline Column", df_news.columns)
176
-
177
- if st.button("Analyze Batch News"):
178
- results = []
179
- prog_bar = st.progress(0)
180
-
181
- for i, row in df_news.iterrows():
182
- prompt = generate_news_prompt(str(row[text_col]), target_name_news)
183
- res = llm(prompt)
184
- data = clean_json_output(res[0]['generated_text'][-1]['content'])
185
-
186
- results.append({
187
- "News": row[text_col],
188
- "Impact": data['label'] if data else "ERROR",
189
- "Reasoning": data['reasoning'] if data else ""
190
- })
191
- prog_bar.progress((i+1)/len(df_news))
192
-
193
- res_df = pd.DataFrame(results)
194
- st.dataframe(res_df)
195
-
196
- # Chart
197
- fig = px.pie(res_df, names="Impact", title=f"Media Sentiment for {target_name_news}")
198
- st.plotly_chart(fig)
199
 
200
- # =======================
201
- # SECTION 2: COMMENTS
202
- # =======================
203
- with tab_comments:
204
- st.header("Context-Aware Comment Labeling")
205
- st.info("The AI uses the 'Target Party' to understand slogans like 'Dhaner Sheesh' or 'Nouka'.")
206
-
207
- # 1. ESTABLISH CONTEXT
208
- c1, c2 = st.columns(2)
209
- with c1:
210
- target_entity_cmt = st.text_input("Target Person (e.g., Khaleda Zia)", "Khaleda Zia")
211
- with c2:
212
- party_context = st.selectbox("Political Affiliation (Defines Symbols)", list(POLITICAL_CONTEXT.keys()))
213
-
214
- # Get keywords based on selection
215
- selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
216
- selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]
217
-
218
- st.caption(f"**AI Context Memory:** Positive Keywords = [{selected_keywords}] | Negative Keywords = [{selected_rivals}]")
219
-
220
- # 2. INPUT
221
- uploaded_comments = st.file_uploader("Upload Comments CSV", type=["csv"], key="cmt_up")
222
-
223
- if uploaded_comments:
224
- df_cmt = pd.read_csv(uploaded_comments)
225
- st.write("Preview:", df_cmt.head(3))
226
- comment_col = st.selectbox("Which column contains the comments?", df_cmt.columns)
227
 
228
- if st.button("Start Intelligent Labeling", type="primary"):
229
- final_data = []
230
- bar = st.progress(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
- total = len(df_cmt)
233
- for idx, row in df_cmt.iterrows():
234
- txt = str(row[comment_col])
 
235
 
236
- # Skip empty or very short comments
237
- if len(txt) < 3:
238
  continue
239
-
240
- prompt = generate_comment_prompt(txt, target_entity_cmt, party_context, selected_keywords, selected_rivals)
241
 
 
 
 
 
242
  try:
243
  out = llm(prompt)
244
- raw_str = out[0]['generated_text'][-1]['content']
245
- json_dat = clean_json_output(raw_str)
246
-
247
- label = json_dat.get("label", "NEUTRAL") if json_dat else "ERROR"
248
- reason = json_dat.get("reasoning", "Parse Fail") if json_dat else raw_str
249
 
 
 
250
  except Exception as e:
251
  label = "ERROR"
252
  reason = str(e)
253
 
254
- final_data.append({
255
- "Original Comment": txt,
 
 
256
  "Sentiment": label,
257
- "Why?": reason
258
  })
259
- bar.progress((idx+1)/total)
 
 
 
260
 
261
- # RESULTS
262
- res_df_cmt = pd.DataFrame(final_data)
263
- st.success("Analysis Complete!")
 
264
 
265
- # Visualization
266
- row1, row2 = st.columns([2, 1])
267
- with row1:
268
- st.dataframe(res_df_cmt)
269
- with row2:
270
- # Custom colors for politics
271
- color_map = {
272
- "POSITIVE": "#00CC96", # Green
273
- "NEGATIVE": "#EF553B", # Red
274
- "NEUTRAL": "#636EFA", # Blue
275
- "ERROR": "#000000"
276
- }
277
- fig = px.pie(res_df_cmt, names="Sentiment", title="Public Sentiment", color="Sentiment", color_discrete_map=color_map)
278
- st.plotly_chart(fig)
279
 
280
- # Download
281
- csv_dl = res_df_cmt.to_csv(index=False).encode('utf-8')
282
- st.download_button("Download Labeled Data", csv_dl, "analyzed_comments.csv", "text/csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  layout="wide"
14
  )
15
 
16
+ # --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) ---
 
17
  POLITICAL_CONTEXT = {
18
  "BNP": {
19
+ "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy, BNP, 71 chetona",
20
+ "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ, চাঁদাবাজ, চান্দা, দুর্নীতি, terrorist, arson"
21
  },
22
  "Awami League": {
23
+ "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat, development, 71 er chetona",
24
+ "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত, rajakar, killer, dictator, fascist"
25
  },
26
  "Jamaat-e-Islami": {
27
+ "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul, Jammat, Shibir, Islamic",
28
+ "rival_keywords": "নাস্তিক, লীগ, শাহবাগ, rajakar, war criminal, terrorist, jongi"
29
  },
30
  "General/Interim Govt": {
31
+ "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ, Yunus, Student Power",
32
+ "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা, anarchy, instability"
33
  }
34
  }
35
 
36
  # --- MODEL LOADER ---
37
  @st.cache_resource
38
  def load_model():
39
+ # Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU
40
  model_id = "hishab/titulm-llama-3.2-3b-v2.0"
41
  try:
42
  tokenizer = AutoTokenizer.from_pretrained(model_id)
43
+ # Auto-detect device: use float32 for CPU stability, float16 for GPU speed
 
 
44
  dtype = torch.float16 if torch.cuda.is_available() else torch.float32
45
 
46
  model = AutoModelForCausalLM.from_pretrained(
 
53
  "text-generation",
54
  model=model,
55
  tokenizer=tokenizer,
56
+ max_new_tokens=150,
57
  do_sample=True,
58
+ temperature=0.2, # Low temp = Logic focused
59
  top_p=0.9
60
  )
61
  return pipe
62
  except Exception as e:
63
  return None
64
 
65
+ # Sidebar Status
66
  with st.sidebar:
67
+ st.title("⚙️ System Status")
 
68
  if torch.cuda.is_available():
69
+ st.success("🟢 GPU Active (Fast Mode)")
70
  else:
71
+ st.warning("🟠 CPU Mode (Standard Speed)")
72
 
73
+ with st.spinner("Initializing AI Engine..."):
74
  llm = load_model()
75
 
76
  if not llm:
77
+ st.error("Model Failed to Load. Check HuggingFace Logs.")
78
  st.stop()
79
+ else:
80
+ st.success("✅ AI Brain Ready")
81
 
82
  # --- HELPER FUNCTIONS ---
83
  def clean_json_output(text):
84
  """Robustly extract JSON from the LLM's chatter."""
 
85
  try:
86
+ # Find the last JSON-like structure
87
  matches = re.findall(r'\{.*?\}', text, re.DOTALL)
88
  if matches:
 
89
  return json.loads(matches[-1])
90
+ return None
 
91
  except:
92
  return None
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
95
  return [
96
  {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
97
  Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
98
 
99
+ CRITICAL RULES:
100
+ 1. Support for {party} or '{keywords}' = POSITIVE.
101
+ 2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE.
102
+ 3. Support for RIVAL parties = NEGATIVE.
103
+ 4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE.
104
 
105
  Examples:
106
+ - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: BNP) -> POSITIVE (Loves BNP)
107
+ - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: Jamaat) -> NEGATIVE (Hates Jamaat)
108
+ - Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE
109
 
110
+ Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short explanation"}}
111
  """},
112
  {"role": "user", "content": f"Comment: {comment_text}"}
113
  ]
114
 
115
  # --- MAIN UI ---
 
116
  st.title("🇧🇩 Smart Political Sentiment Analyzer")
117
+ st.markdown("Context-Aware Analysis for Bangla & Banglish Comments")
118
 
119
+ # 1. SETUP CONTEXT
120
+ st.subheader("1. Analysis Configuration")
121
+ col1, col2 = st.columns(2)
122
+ with col1:
123
+ target_entity = st.text_input("Target Candidate/Party Name", "BNP")
124
+ with col2:
125
+ party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys()))
126
 
127
+ selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
128
+ selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ st.info(f"**AI Logic:** Detecting Support for *{target_entity}* using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]")
131
+
132
+ # 2. UPLOAD DATA
133
+ st.subheader("2. Upload Data")
134
+ uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ if uploaded_file:
137
+ try:
138
+ df = pd.read_csv(uploaded_file)
139
+ st.success(f"Loaded {len(df)} comments successfully!")
140
+
141
+ # Data Cleanup & Preview
142
+ st.dataframe(df.head(3))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ # Column Auto-Detection
145
+ cols = df.columns.tolist()
146
+ comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0])
147
+ date_col = next((c for c in cols if 'date' in c.lower()), None)
148
+
149
+ col_sel1, col_sel2 = st.columns(2)
150
+ with col_sel1:
151
+ comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col))
152
+ with col_sel2:
153
+ if date_col:
154
+ date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col))
155
+ else:
156
+ st.write("No Date column detected.")
157
+
158
+ # 3. RUN ANALYSIS
159
+ if st.button("🚀 Start AI Analysis", type="primary"):
160
+ results = []
161
+ progress_bar = st.progress(0)
162
+ status_text = st.empty()
163
 
164
+ total = len(df)
165
+
166
+ for i, row in df.iterrows():
167
+ text = str(row[comment_col])
168
 
169
+ # Basic filtering
170
+ if len(text) < 2 or text.lower() == "nan":
171
  continue
 
 
172
 
173
+ # Construct Prompt
174
+ prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals)
175
+
176
+ # Run Inference
177
  try:
178
  out = llm(prompt)
179
+ raw_res = out[0]['generated_text'][-1]['content']
180
+ data = clean_json_output(raw_res)
 
 
 
181
 
182
+ label = data.get("label", "NEUTRAL") if data else "ERROR"
183
+ reason = data.get("reasoning", "Parse Error") if data else raw_res
184
  except Exception as e:
185
  label = "ERROR"
186
  reason = str(e)
187
 
188
+ # Store Result
189
+ results.append({
190
+ "Date": row[date_col] if date_col else None,
191
+ "Comment": text,
192
  "Sentiment": label,
193
+ "Reasoning": reason
194
  })
195
+
196
+ # Update UI
197
+ progress_bar.progress((i + 1) / total)
198
+ status_text.text(f"Processing {i+1}/{total}: {label}")
199
 
200
+ # 4. VISUALIZATION
201
+ res_df = pd.DataFrame(results)
202
+ st.divider()
203
+ st.header("📊 Analysis Results")
204
 
205
+ # Layout: Pie Chart + Time Series
206
+ row1_1, row1_2 = st.columns([1, 2])
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ with row1_1:
209
+ color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"}
210
+ fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map)
211
+ st.plotly_chart(fig_pie, use_container_width=True)
212
+
213
+ # Sentiment Score Calculation
214
+ pos_count = len(res_df[res_df['Sentiment']=='POSITIVE'])
215
+ neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE'])
216
+ total_valid = pos_count + neg_count + 1 # avoid div/0
217
+ favourability = (pos_count / total_valid) * 100
218
+ st.metric("Favourability Score", f"{favourability:.1f}%")
219
+
220
+ with row1_2:
221
+ if date_col:
222
+ try:
223
+ # Convert Date and Aggregate
224
+ res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce')
225
+ time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count')
226
+
227
+ fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment',
228
+ title="Sentiment Trends Over Time",
229
+ color_discrete_map=color_map, markers=True)
230
+ st.plotly_chart(fig_line, use_container_width=True)
231
+ except Exception as e:
232
+ st.warning("Could not create timeline chart (Date format issue).")
233
+
234
+ # Data Table & Download
235
+ st.dataframe(res_df)
236
+ csv = res_df.to_csv(index=False).encode('utf-8')
237
+ st.download_button("📥 Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv")
238
+
239
+ except Exception as e:
240
+ st.error(f"Error reading CSV: {e}")