Spaces:

Arjon07CSE
/

spf_sentiment

Sleeping

App Files Files Community

Arjon07CSE commited on Dec 11, 2025

Commit

33c09e3

verified ·

1 Parent(s): 8367aa6

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +135 -177

src/streamlit_app.py CHANGED Viewed

@@ -13,36 +13,34 @@ st.set_page_config(
     layout="wide"
 )
-# --- KEYWORD DATABASE (To make the AI Smarter) ---
-# This dictionary helps the AI explicitly understand symbols associated with parties.
 POLITICAL_CONTEXT = {
     "BNP": {
-        "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy",
-        "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ"
     },
     "Awami League": {
-        "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat",
-        "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত"
     },
     "Jamaat-e-Islami": {
-        "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul",
-        "rival_keywords": "নাস্তিক, লীগ, শাহবাগ"
     },
     "General/Interim Govt": {
-        "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ",
-        "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা"
     }
 }
 # --- MODEL LOADER ---
 @st.cache_resource
 def load_model():
     model_id = "hishab/titulm-llama-3.2-3b-v2.0"
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_id)
-        # Load in 4-bit or float16 depending on available hardware
-        # For Hugging Face Spaces (CPU), we use float32 or float16.
-        # For GPU, float16 is best.
         dtype = torch.float16 if torch.cuda.is_available() else torch.float32
         model = AutoModelForCausalLM.from_pretrained(
@@ -55,228 +53,188 @@ def load_model():
             "text-generation",
             model=model,
             tokenizer=tokenizer,
-            max_new_tokens=150, # Keep it short for JSON
             do_sample=True,
-            temperature=0.2, # Lower temperature = More strict/logical
             top_p=0.9
         )
         return pipe
     except Exception as e:
         return None
-# Load Model
 with st.sidebar:
-    st.image("https://cdn-icons-png.flaticon.com/512/6656/6656046.png", width=50)
-    st.title("AI Settings")
     if torch.cuda.is_available():
-        st.success("🚀 GPU Detected! Inference will be fast.")
     else:
-        st.warning("⚠️ Running on CPU. Inference might be slow.")
-    with st.spinner("Waking up the Neural Network..."):
         llm = load_model()
     if not llm:
-        st.error("Model failed to load.")
         st.stop()
 # --- HELPER FUNCTIONS ---
 def clean_json_output(text):
     """Robustly extract JSON from the LLM's chatter."""
-    # Look for the last occurrence of { and the matching }
     try:
-        # Regex to find JSON block
         matches = re.findall(r'\{.*?\}', text, re.DOTALL)
         if matches:
-            # Get the last match as it's usually the actual answer after the reasoning
             return json.loads(matches[-1])
-        else:
-            return None
     except:
         return None
-# --- PROMPT GENERATORS ---
-def generate_news_prompt(news_text, target):
-    return [
-        {"role": "system", "content": f"""You are a Political Analyst for Bangladesh.
-        Task: Analyze if the news is FAVOURABLE or UNFAVORABLE for: {target}.
-        DEFINITIONS:
-        - FAVOURABLE: Positive news, legal wins, return to power, praise.
-        - UNFAVORABLE: Negative news, arrest, criticism, loss.
-        - NEUTRAL: Factual news with no clear bias.
-        Response Format: JSON only -> {{"label": "FAVOURABLE"|"UNFAVORABLE"|"NEUTRAL", "reasoning": "Bangla sentence"}}
-        """},
-        {"role": "user", "content": f"News: {news_text}"}
-    ]
 def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
     return [
         {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
         Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
-        RULES:
-        1. If comment mentions {party} symbols ({keywords}) or praises {target} -> POSITIVE.
-        2. If comment supports {party}'s rivals ({rival_keywords}) or attacks {target} -> NEGATIVE.
-        3. If comment is sarcastic (mocking praise) -> NEGATIVE.
         Examples:
-        - Comment: "Zindabad!" (Context: {party}) -> POSITIVE
-        - Comment: "Chor!" (Context: {party}) -> NEGATIVE
-        Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short Bangla explanation"}}
         """},
         {"role": "user", "content": f"Comment: {comment_text}"}
     ]
 # --- MAIN UI ---
 st.title("🇧🇩 Smart Political Sentiment Analyzer")
-st.markdown("Context-Aware Analysis for Bangladesh Politics")
-# Tabs for the two sections
-tab_news, tab_comments = st.tabs(["📰 Political News Analysis", "📣 Public Sentiment (Comments)"])
-# =======================
-# SECTION 1: NEWS
-# =======================
-with tab_news:
-    st.header("Is this news Good or Bad for the Candidate?")
-    col1, col2 = st.columns(2)
-    with col1:
-        target_name_news = st.text_input("Candidate Name (Who is this about?)", "তারেক রহমান")
-    with col2:
-        news_input_method = st.radio("Input Method", ["Paste Text", "Upload CSV"])
-    if news_input_method == "Paste Text":
-        news_text = st.text_area("Paste News Headline:", height=100)
-        if st.button("Analyze News Impact", type="primary"):
-            if news_text:
-                with st.spinner("Analyzing impact..."):
-                    prompt = generate_news_prompt(news_text, target_name_news)
-                    res = llm(prompt)
-                    output_text = res[0]['generated_text'][-1]['content']
-                    data = clean_json_output(output_text)
-                    if data:
-                        st.subheader(f"Result: {data.get('label', 'ERROR')}")
-                        st.write(f"**Reasoning:** {data.get('reasoning', '')}")
-                    else:
-                        st.error("Could not parse AI response.")
-                        st.code(output_text)
-    elif news_input_method == "Upload CSV":
-        uploaded_news = st.file_uploader("Upload News CSV", type=["csv"])
-        if uploaded_news:
-            df_news = pd.read_csv(uploaded_news)
-            text_col = st.selectbox("Select Headline Column", df_news.columns)
-            if st.button("Analyze Batch News"):
-                results = []
-                prog_bar = st.progress(0)
-                for i, row in df_news.iterrows():
-                    prompt = generate_news_prompt(str(row[text_col]), target_name_news)
-                    res = llm(prompt)
-                    data = clean_json_output(res[0]['generated_text'][-1]['content'])
-                    results.append({
-                        "News": row[text_col],
-                        "Impact": data['label'] if data else "ERROR",
-                        "Reasoning": data['reasoning'] if data else ""
-                    })
-                    prog_bar.progress((i+1)/len(df_news))
-                res_df = pd.DataFrame(results)
-                st.dataframe(res_df)
-                # Chart
-                fig = px.pie(res_df, names="Impact", title=f"Media Sentiment for {target_name_news}")
-                st.plotly_chart(fig)
-# =======================
-# SECTION 2: COMMENTS
-# =======================
-with tab_comments:
-    st.header("Context-Aware Comment Labeling")
-    st.info("The AI uses the 'Target Party' to understand slogans like 'Dhaner Sheesh' or 'Nouka'.")
-    # 1. ESTABLISH CONTEXT
-    c1, c2 = st.columns(2)
-    with c1:
-        target_entity_cmt = st.text_input("Target Person (e.g., Khaleda Zia)", "Khaleda Zia")
-    with c2:
-        party_context = st.selectbox("Political Affiliation (Defines Symbols)", list(POLITICAL_CONTEXT.keys()))
-    # Get keywords based on selection
-    selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
-    selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]
-    st.caption(f"**AI Context Memory:** Positive Keywords = [{selected_keywords}] | Negative Keywords = [{selected_rivals}]")
-    # 2. INPUT
-    uploaded_comments = st.file_uploader("Upload Comments CSV", type=["csv"], key="cmt_up")
-    if uploaded_comments:
-        df_cmt = pd.read_csv(uploaded_comments)
-        st.write("Preview:", df_cmt.head(3))
-        comment_col = st.selectbox("Which column contains the comments?", df_cmt.columns)
-        if st.button("Start Intelligent Labeling", type="primary"):
-            final_data = []
-            bar = st.progress(0)
-            total = len(df_cmt)
-            for idx, row in df_cmt.iterrows():
-                txt = str(row[comment_col])
-                # Skip empty or very short comments
-                if len(txt) < 3:
                     continue
-                prompt = generate_comment_prompt(txt, target_entity_cmt, party_context, selected_keywords, selected_rivals)
                 try:
                     out = llm(prompt)
-                    raw_str = out[0]['generated_text'][-1]['content']
-                    json_dat = clean_json_output(raw_str)
-                    label = json_dat.get("label", "NEUTRAL") if json_dat else "ERROR"
-                    reason = json_dat.get("reasoning", "Parse Fail") if json_dat else raw_str
                 except Exception as e:
                     label = "ERROR"
                     reason = str(e)
-                final_data.append({
-                    "Original Comment": txt,
                     "Sentiment": label,
-                    "Why?": reason
                 })
-                bar.progress((idx+1)/total)
-            # RESULTS
-            res_df_cmt = pd.DataFrame(final_data)
-            st.success("Analysis Complete!")
-            # Visualization
-            row1, row2 = st.columns([2, 1])
-            with row1:
-                st.dataframe(res_df_cmt)
-            with row2:
-                # Custom colors for politics
-                color_map = {
-                    "POSITIVE": "#00CC96", # Green
-                    "NEGATIVE": "#EF553B", # Red
-                    "NEUTRAL": "#636EFA",  # Blue
-                    "ERROR": "#000000"
-                }
-                fig = px.pie(res_df_cmt, names="Sentiment", title="Public Sentiment", color="Sentiment", color_discrete_map=color_map)
-                st.plotly_chart(fig)
-            # Download
-            csv_dl = res_df_cmt.to_csv(index=False).encode('utf-8')
-            st.download_button("Download Labeled Data", csv_dl, "analyzed_comments.csv", "text/csv")

     layout="wide"
 )
+# --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) ---
 POLITICAL_CONTEXT = {
     "BNP": {
+        "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy, BNP, 71 chetona",
+        "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ, চাঁদাবাজ, চান্দা, দুর্নীতি, terrorist, arson"
     },
     "Awami League": {
+        "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat, development, 71 er chetona",
+        "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত, rajakar, killer, dictator, fascist"
     },
     "Jamaat-e-Islami": {
+        "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul, Jammat, Shibir, Islamic",
+        "rival_keywords": "নাস্তিক, লীগ, শাহবাগ, rajakar, war criminal, terrorist, jongi"
     },
     "General/Interim Govt": {
+        "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ, Yunus, Student Power",
+        "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা, anarchy, instability"
     }
 }
 # --- MODEL LOADER ---
 @st.cache_resource
 def load_model():
+    # Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU
     model_id = "hishab/titulm-llama-3.2-3b-v2.0"
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_id)
+        # Auto-detect device: use float32 for CPU stability, float16 for GPU speed
         dtype = torch.float16 if torch.cuda.is_available() else torch.float32
         model = AutoModelForCausalLM.from_pretrained(
             "text-generation",
             model=model,
             tokenizer=tokenizer,
+            max_new_tokens=150,
             do_sample=True,
+            temperature=0.2, # Low temp = Logic focused
             top_p=0.9
         )
         return pipe
     except Exception as e:
         return None
+# Sidebar Status
 with st.sidebar:
+    st.title("⚙️ System Status")
     if torch.cuda.is_available():
+        st.success("🟢 GPU Active (Fast Mode)")
     else:
+        st.warning("🟠 CPU Mode (Standard Speed)")
+    with st.spinner("Initializing AI Engine..."):
         llm = load_model()
     if not llm:
+        st.error("❌ Model Failed to Load. Check HuggingFace Logs.")
         st.stop()
+    else:
+        st.success("✅ AI Brain Ready")
 # --- HELPER FUNCTIONS ---
 def clean_json_output(text):
     """Robustly extract JSON from the LLM's chatter."""
     try:
+        # Find the last JSON-like structure
         matches = re.findall(r'\{.*?\}', text, re.DOTALL)
         if matches:
             return json.loads(matches[-1])
+        return None
     except:
         return None
 def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
     return [
         {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
         Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
+        CRITICAL RULES:
+        1. Support for {party} or '{keywords}' = POSITIVE.
+        2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE.
+        3. Support for RIVAL parties = NEGATIVE.
+        4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE.
         Examples:
+        - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: BNP) -> POSITIVE (Loves BNP)
+        - Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: Jamaat) -> NEGATIVE (Hates Jamaat)
+        - Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE
+        Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short explanation"}}
         """},
         {"role": "user", "content": f"Comment: {comment_text}"}
     ]
 # --- MAIN UI ---
 st.title("🇧🇩 Smart Political Sentiment Analyzer")
+st.markdown("Context-Aware Analysis for Bangla & Banglish Comments")
+# 1. SETUP CONTEXT
+st.subheader("1. Analysis Configuration")
+col1, col2 = st.columns(2)
+with col1:
+    target_entity = st.text_input("Target Candidate/Party Name", "BNP")
+with col2:
+    party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys()))
+selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
+selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]
+st.info(f"**AI Logic:** Detecting Support for *{target_entity}* using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]")
+# 2. UPLOAD DATA
+st.subheader("2. Upload Data")
+uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"])
+if uploaded_file:
+    try:
+        df = pd.read_csv(uploaded_file)
+        st.success(f"Loaded {len(df)} comments successfully!")
+        # Data Cleanup & Preview
+        st.dataframe(df.head(3))
+        # Column Auto-Detection
+        cols = df.columns.tolist()
+        comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0])
+        date_col = next((c for c in cols if 'date' in c.lower()), None)
+        col_sel1, col_sel2 = st.columns(2)
+        with col_sel1:
+            comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col))
+        with col_sel2:
+            if date_col:
+                date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col))
+            else:
+                st.write("No Date column detected.")
+        # 3. RUN ANALYSIS
+        if st.button("🚀 Start AI Analysis", type="primary"):
+            results = []
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            total = len(df)
+            for i, row in df.iterrows():
+                text = str(row[comment_col])
+                # Basic filtering
+                if len(text) < 2 or text.lower() == "nan":
                     continue
+                # Construct Prompt
+                prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals)
+                # Run Inference
                 try:
                     out = llm(prompt)
+                    raw_res = out[0]['generated_text'][-1]['content']
+                    data = clean_json_output(raw_res)
+                    label = data.get("label", "NEUTRAL") if data else "ERROR"
+                    reason = data.get("reasoning", "Parse Error") if data else raw_res
                 except Exception as e:
                     label = "ERROR"
                     reason = str(e)
+                # Store Result
+                results.append({
+                    "Date": row[date_col] if date_col else None,
+                    "Comment": text,
                     "Sentiment": label,
+                    "Reasoning": reason
                 })
+                # Update UI
+                progress_bar.progress((i + 1) / total)
+                status_text.text(f"Processing {i+1}/{total}: {label}")
+            # 4. VISUALIZATION
+            res_df = pd.DataFrame(results)
+            st.divider()
+            st.header("📊 Analysis Results")
+            # Layout: Pie Chart + Time Series
+            row1_1, row1_2 = st.columns([1, 2])
+            with row1_1:
+                color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"}
+                fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map)
+                st.plotly_chart(fig_pie, use_container_width=True)
+                # Sentiment Score Calculation
+                pos_count = len(res_df[res_df['Sentiment']=='POSITIVE'])
+                neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE'])
+                total_valid = pos_count + neg_count + 1 # avoid div/0
+                favourability = (pos_count / total_valid) * 100
+                st.metric("Favourability Score", f"{favourability:.1f}%")
+            with row1_2:
+                if date_col:
+                    try:
+                        # Convert Date and Aggregate
+                        res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce')
+                        time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count')
+                        fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment',
+                                           title="Sentiment Trends Over Time",
+                                           color_discrete_map=color_map, markers=True)
+                        st.plotly_chart(fig_line, use_container_width=True)
+                    except Exception as e:
+                        st.warning("Could not create timeline chart (Date format issue).")
+            # Data Table & Download
+            st.dataframe(res_df)
+            csv = res_df.to_csv(index=False).encode('utf-8')
+            st.download_button("📥 Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv")
+    except Exception as e:
+        st.error(f"Error reading CSV: {e}")