Spaces:

BSJ2004
/

news

Sleeping

App Files Files Community

BSJ2004 commited on Mar 23, 2025

Commit

f09b42b

verified ·

1 Parent(s): f3217f4

Create app_spaces.py

Browse files

Files changed (1) hide show

app_spaces.py +524 -0

app_spaces.py ADDED Viewed

	@@ -0,0 +1,524 @@

+import streamlit as st
+import requests
+import pandas as pd
+import json
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+import base64
+from io import BytesIO
+from PIL import Image, ImageEnhance
+import time
+import threading
+import subprocess
+from typing import Dict, Any, List
+# Configure page settings for Hugging Face Spaces
+st.set_page_config(
+    page_title="News Summarization & TTS",
+    page_icon="📰",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Start the API in the background
+def start_api():
+    process = subprocess.Popen(["python", "api.py"])
+    print(f"Started API server with PID {process.pid}")
+    return process
+# Check if the API is already running, if not start it
+@st.cache_resource
+def ensure_api_running():
+    try:
+        # Try to connect to the API
+        response = requests.get("http://localhost:8000/docs", timeout=2)
+        if response.status_code == 200:
+            st.sidebar.success("✅ API server is running")
+            print("API already running")
+            return True
+    except Exception as e:
+        print(f"API not running: {str(e)}")
+        pass
+    # API not running, start it
+    print("Starting API server...")
+    st.sidebar.info("Starting API server...")
+    # Start API in a separate thread
+    api_process = start_api()
+    # Wait for API to start
+    api_started = False
+    retries = 0
+    max_retries = 15
+    while not api_started and retries < max_retries:
+        try:
+            time.sleep(2)
+            response = requests.get("http://localhost:8000/docs", timeout=2)
+            if response.status_code == 200:
+                api_started = True
+                st.sidebar.success("✅ API server is running")
+                print("API server started successfully")
+                return True
+        except:
+            retries += 1
+            print(f"Waiting for API to start... (attempt {retries}/{max_retries})")
+    if not api_started:
+        st.sidebar.error("❌ Failed to start API server")
+        print("Failed to start API server")
+        return False
+# API Base URL for Spaces deployment
+API_BASE_URL = "http://localhost:8000"
+# Function to create plot for sentiment distribution
+def plot_sentiment_distribution(sentiment_data):
+    # Extract and combine sentiment categories
+    categories = []
+    counts = []
+    # Process all sentiment categories
+    for category, count in sentiment_data.items():
+        if count > 0:  # Only include non-zero categories
+            categories.append(category)
+            counts.append(count)
+    # Create a DataFrame for easier plotting
+    df = pd.DataFrame({
+        'Sentiment': categories,
+        'Count': counts
+    })
+    # Set up colors based on sentiment
+    colors = []
+    for sentiment in df['Sentiment']:
+        if sentiment == 'Positive' or sentiment == 'Slightly Positive':
+            colors.append('#10B981')  # Green
+        elif sentiment == 'Negative' or sentiment == 'Slightly Negative':
+            colors.append('#EF4444')  # Red
+        else:
+            colors.append('#6B7280')  # Gray
+    # Create matplotlib figure
+    fig, ax = plt.subplots(figsize=(6, 4))
+    bars = ax.bar(df['Sentiment'], df['Count'], color=colors)
+    # Add count labels on top of bars
+    for bar in bars:
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height + 0.1,
+                str(int(height)), ha='center', va='bottom')
+    # Add labels and title
+    ax.set_xlabel('Sentiment')
+    ax.set_ylabel('Number of Articles')
+    ax.set_title('Sentiment Distribution')
+    # Improve aesthetics
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    return fig
+# Function to create word cloud
+def display_word_cloud(topics):
+    from wordcloud import WordCloud
+    # Convert topics to text with frequency
+    text = " ".join(topics)
+    # Generate word cloud
+    wordcloud = WordCloud(
+        width=400,
+        height=200,
+        background_color='white',
+        colormap='viridis',
+        max_words=100,
+        contour_width=1
+    ).generate(text)
+    # Display word cloud
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.imshow(wordcloud, interpolation='bilinear')
+    ax.axis('off')
+    return fig
+# Function to generate the example output format
+def generate_example_output(company_name: str) -> str:
+    """
+    Generate output in the example format for the given company.
+    Returns the formatted JSON as a string.
+    """
+    try:
+        # Make API request to get the analysis data
+        url = f"{API_BASE_URL}/api/complete_analysis"
+        response = requests.post(url, json={"company_name": company_name})
+        response.raise_for_status()
+        data = response.json()
+        # Format the data to match the example output format exactly
+        formatted_output = {
+            "Company": data["Company"],
+            "Articles": data["Articles"],
+            "Comparative Sentiment Score": {
+                "Sentiment Distribution": data["Comparative Sentiment Score"]["Sentiment Distribution"],
+                "Coverage Differences": data["Comparative Sentiment Score"]["Coverage Differences"],
+                "Topic Overlap": data["Comparative Sentiment Score"]["Topic Overlap"]
+            },
+            "Final Sentiment Analysis": data["Final Sentiment Analysis"],
+            "Audio": "[Play Hindi Speech]" if data.get("Audio") else "No audio available"
+        }
+        # Convert to JSON string with proper formatting
+        return json.dumps(formatted_output, indent=2)
+    except Exception as e:
+        return json.dumps({
+            "error": str(e),
+            "message": "Failed to generate example output"
+        }, indent=2)
+# Custom CSS for better styling
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.2rem;
+        font-weight: 600;
+        color: #1E3A8A;
+        margin-bottom: 1rem;
+    }
+    .sub-header {
+        font-size: 1.5rem;
+        font-weight: 500;
+        color: #3B82F6;
+        margin-top: 1.5rem;
+        margin-bottom: 0.5rem;
+    }
+    .info-text {
+        color: #6B7280;
+        font-style: italic;
+    }
+    .section-divider {
+        margin-top: 2rem;
+        margin-bottom: 2rem;
+        border-bottom: 1px solid #E5E7EB;
+    }
+    .stButton>button {
+        background-color: #2563EB;
+        color: white;
+        border-radius: 0.375rem;
+        padding: 0.5rem 1rem;
+        font-weight: 500;
+    }
+    .stButton>button:hover {
+        background-color: #1D4ED8;
+    }
+    .article-card {
+        padding: 1rem;
+        border-radius: 0.5rem;
+        border: 1px solid #E5E7EB;
+        margin-bottom: 1rem;
+    }
+    .sentiment-positive {
+        color: #10B981;
+        font-weight: 500;
+    }
+    .sentiment-negative {
+        color: #EF4444;
+        font-weight: 500;
+    }
+    .sentiment-neutral {
+        color: #6B7280;
+        font-weight: 500;
+    }
+    .topic-tag {
+        background-color: #E5E7EB;
+        color: #374151;
+        border-radius: 9999px;
+        padding: 0.25rem 0.75rem;
+        margin-right: 0.5rem;
+        margin-bottom: 0.5rem;
+        display: inline-block;
+        font-size: 0.875rem;
+    }
+    .audio-container {
+        margin-top: 1rem;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        background-color: #F3F4F6;
+    }
+    .stAlert {
+        border-radius: 0.5rem;
+    }
+</style>
+""", unsafe_allow_html=True)
+# App header
+st.markdown("<h1 class='main-header'>📰 News Summarization & Text-to-Speech</h1>", unsafe_allow_html=True)
+st.markdown("This application extracts news articles about a company, performs sentiment analysis, conducts comparative analysis, and generates a text-to-speech output in Hindi. Enter a company name to get started.", unsafe_allow_html=True)
+# Start the API server when the app loads
+api_running = ensure_api_running()
+# Sidebar
+st.sidebar.markdown("## Input Settings")
+company_name = st.sidebar.text_input("Company Name", value="Tesla")
+# Audio playback settings
+st.sidebar.markdown("## Audio Settings")
+audio_speed = st.sidebar.select_slider("TTS Speech Speed:", options=["Slow", "Normal", "Fast"], value="Normal")
+st.sidebar.markdown("---")
+# Add option to see JSON in example format
+st.sidebar.markdown("## Developer Options")
+show_json = st.sidebar.checkbox("Show JSON output in example format")
+st.sidebar.markdown("---")
+# About section
+st.sidebar.markdown("## About")
+st.sidebar.info("This application was developed for news analysis and translation. It uses web scraping, NLP, and TTS technologies to provide insights about companies.")
+# Analyze button
+analyze_button = st.sidebar.button("Analyze Company News", disabled=not api_running)
+# Main content
+if analyze_button and company_name and api_running:
+    with st.spinner(f"Analyzing news for {company_name}. This may take a moment..."):
+        try:
+            # Call the API to get results (with longer timeout for Spaces)
+            response = requests.post(f"{API_BASE_URL}/api/complete_analysis",
+                                    json={"company_name": company_name},
+                                    timeout=180)  # 3 minutes timeout
+            response.raise_for_status()  # Raise exception for HTTP errors
+            # Parse JSON response
+            response = response.json()
+            # Display results
+            st.markdown(f"<h2 class='sub-header'>Analysis Results for {response['Company']}</h2>", unsafe_allow_html=True)
+            # Display sentiment overview
+            st.markdown("<h3 class='sub-header'>Sentiment Overview</h3>", unsafe_allow_html=True)
+            # Get sentiment counts
+            sentiment_data = response["Comparative Sentiment Score"]["Sentiment Distribution"]
+            # Create columns for visualization
+            col1, col2 = st.columns([3, 2])
+            with col1:
+                # Extract total counts
+                positive_count = sentiment_data.get("Positive", 0) + sentiment_data.get("Slightly Positive", 0)
+                negative_count = sentiment_data.get("Negative", 0) + sentiment_data.get("Slightly Negative", 0)
+                neutral_count = sentiment_data.get("Neutral", 0)
+                total_count = positive_count + negative_count + neutral_count
+                # Show summary text
+                sentiment_text = f"The company has "
+                if positive_count > negative_count and positive_count > neutral_count:
+                    sentiment_text += f"mostly positive coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)."
+                elif negative_count > positive_count and negative_count > neutral_count:
+                    sentiment_text += f"mostly negative coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)."
+                else:
+                    sentiment_text += f"balanced coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)."
+                st.write(sentiment_text)
+                # Plot sentiment distribution
+                try:
+                    fig = plot_sentiment_distribution(sentiment_data)
+                    st.pyplot(fig)
+                except Exception as e:
+                    st.warning(f"Could not create sentiment chart: {str(e)}")
+            with col2:
+                # Summary of key points
+                st.markdown("<h4>Key Insights</h4>", unsafe_allow_html=True)
+                # Show final sentiment analysis
+                st.write(response["Final Sentiment Analysis"])
+                # Show common topics
+                common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", [])
+                if common_topics:
+                    st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True)
+                    for topic in common_topics:
+                        st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
+            # Display Hindi TTS audio
+            if "Audio" in response and response["Audio"]:
+                st.markdown("<h3 class='sub-header'>Hindi Audio Summary</h3>", unsafe_allow_html=True)
+                audio_message = response["Audio"]
+                if audio_message == "Failed to generate audio":
+                    st.warning("Hindi audio could not be generated. However, you can still read the Hindi text below.")
+                else:
+                    try:
+                        # Check if the response contains the actual audio file path
+                        audio_file_path = response.get("_audio_file_path")
+                        if audio_file_path:
+                            # Extract the filename
+                            audio_filename = os.path.basename(audio_file_path)
+                            audio_url = f"{API_BASE_URL}/api/audio/{audio_filename}"
+                        else:
+                            # If no path is provided, just display a message
+                            st.info("Audio is available but the path was not provided.")
+                            audio_url = None
+                        if audio_url:
+                            # Attempt to download the audio file
+                            audio_response = requests.get(audio_url)
+                            if audio_response.status_code == 200:
+                                # Save temporarily
+                                temp_audio_path = f"temp_audio_{os.path.basename(audio_url)}"
+                                with open(temp_audio_path, "wb") as f:
+                                    f.write(audio_response.content)
+                                # Play from local file
+                                st.markdown("<div class='audio-container'>", unsafe_allow_html=True)
+                                st.audio(temp_audio_path, format="audio/mp3")
+                                # Display audio download link
+                                st.markdown(f"<a href='{audio_url}' download='hindi_summary.mp3'>Download Hindi Audio</a>", unsafe_allow_html=True)
+                                # Clean up temp file (optional)
+                                # os.remove(temp_audio_path)  # Uncomment to delete after use
+                            else:
+                                st.warning(f"Unable to load audio file (HTTP {audio_response.status_code}). You can still read the Hindi text below.")
+                        else:
+                            st.info("Hindi audio summary would be available here.")
+                    except Exception as e:
+                        st.warning(f"Error playing audio: {str(e)}. You can still read the Hindi text below.")
+                # Display the Hindi text with better formatting
+                with st.expander("Show Hindi Text"):
+                    hindi_text = response.get("Hindi Summary", "Hindi text not available.")
+                    # Format the text for better readability
+                    paragraphs = hindi_text.split("। ")
+                    for paragraph in paragraphs:
+                        if paragraph.strip():
+                            # Add a period if it doesn't end with one
+                            if not paragraph.strip().endswith("।"):
+                                paragraph += "।"
+                            st.markdown(f"<p style='font-size: 16px; margin-bottom: 10px;'>{paragraph}</p>", unsafe_allow_html=True)
+                st.markdown("</div>", unsafe_allow_html=True)
+                st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
+            # Display news articles
+            st.markdown("<h3 class='sub-header'>News Articles</h3>", unsafe_allow_html=True)
+            # Show each article in a card
+            articles = response["Articles"]
+            for i, article in enumerate(articles):
+                with st.container():
+                    st.markdown(f"<div class='article-card'>", unsafe_allow_html=True)
+                    # Article title and sentiment
+                    sentiment_class = "sentiment-neutral"
+                    if article["Sentiment"] == "Positive" or article["Sentiment"] == "Slightly Positive":
+                        sentiment_class = "sentiment-positive"
+                    elif article["Sentiment"] == "Negative" or article["Sentiment"] == "Slightly Negative":
+                        sentiment_class = "sentiment-negative"
+                    st.markdown(f"<h4>{i+1}. {article['Title']}</h4>", unsafe_allow_html=True)
+                    st.markdown(f"<span class='{sentiment_class}'>{article['Sentiment']}</span>", unsafe_allow_html=True)
+                    # Article summary
+                    st.write(article["Summary"])
+                    # Article topics
+                    for topic in article["Topics"]:
+                        st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
+                    st.markdown("</div>", unsafe_allow_html=True)
+            # Display comparative analysis
+            st.markdown("<h3 class='sub-header'>Comparative Analysis</h3>", unsafe_allow_html=True)
+            # Common topics
+            st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True)
+            common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", [])
+            if common_topics:
+                for topic in common_topics:
+                    st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
+            else:
+                st.write("No common topics found across articles.")
+            # Coverage comparison
+            st.markdown("<h4>Coverage Comparison</h4>", unsafe_allow_html=True)
+            comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", [])
+            if comparisons:
+                # Show first comparison inline
+                first_comparison = comparisons[0]
+                st.write(first_comparison.get("Comparison", ""))
+                st.markdown(f"<p class='info-text'>{first_comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
+            else:
+                st.write("No comparative insights available.")
+            # Display full comparison in expander
+            with st.expander("View All Comparisons"):
+                comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", [])
+                for i, comparison in enumerate(comparisons):
+                    st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True)
+                    st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
+                    st.markdown("<hr>", unsafe_allow_html=True)
+            # Show JSON in example format if requested
+            if show_json:
+                st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
+                st.markdown("<h3 class='sub-header'>Example JSON Format</h3>", unsafe_allow_html=True)
+                # Get the formatted JSON
+                json_output = generate_example_output(company_name)
+                # Display the JSON in a code block
+                st.code(json_output, language="json")
+        except requests.exceptions.HTTPError as http_err:
+            if http_err.response.status_code == 404:
+                st.error(f"No news articles found for {company_name}. Please try another company name.")
+            elif http_err.response.status_code == 500:
+                error_detail = "Unknown server error"
+                try:
+                    error_data = http_err.response.json()
+                    if "detail" in error_data:
+                        error_detail = error_data["detail"]
+                except:
+                    pass
+                st.error(f"Server error: {error_detail}")
+            else:
+                st.error(f"HTTP error occurred: {http_err}")
+        except requests.exceptions.ConnectionError:
+            st.error("Failed to connect to the server. Please make sure the API is running.")
+        except requests.exceptions.Timeout:
+            st.error("Request timed out. The analysis might be taking too long to complete.")
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+elif analyze_button and not api_running:
+    st.error("Cannot perform analysis because the API server is not running. Please check the logs.")
+else:
+    # Display placeholder
+    st.info("Enter a company name and click 'Analyze Company News' to get started.")
+    # Example of what the application does
+    with st.expander("See Example Analysis"):
+        st.write("""
+        This application will provide:
+        1. Sentiment analysis of news articles about the company
+        2. Key topics mentioned in the articles
+        3. Comparative analysis of different articles
+        4. Hindi audio summary of the findings
+        Companies you can try: Apple, Microsoft, Google, Amazon, Tesla, etc.
+        """)