BSJ2004 commited on
Commit
f09b42b
·
verified ·
1 Parent(s): f3217f4

Create app_spaces.py

Browse files
Files changed (1) hide show
  1. app_spaces.py +524 -0
app_spaces.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ import json
5
+ import os
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import base64
9
+ from io import BytesIO
10
+ from PIL import Image, ImageEnhance
11
+ import time
12
+ import threading
13
+ import subprocess
14
+ from typing import Dict, Any, List
15
+
16
+ # Configure page settings for Hugging Face Spaces
17
+ st.set_page_config(
18
+ page_title="News Summarization & TTS",
19
+ page_icon="📰",
20
+ layout="wide",
21
+ initial_sidebar_state="expanded"
22
+ )
23
+
24
+ # Start the API in the background
25
+ def start_api():
26
+ process = subprocess.Popen(["python", "api.py"])
27
+ print(f"Started API server with PID {process.pid}")
28
+ return process
29
+
30
+ # Check if the API is already running, if not start it
31
+ @st.cache_resource
32
+ def ensure_api_running():
33
+ try:
34
+ # Try to connect to the API
35
+ response = requests.get("http://localhost:8000/docs", timeout=2)
36
+ if response.status_code == 200:
37
+ st.sidebar.success("✅ API server is running")
38
+ print("API already running")
39
+ return True
40
+ except Exception as e:
41
+ print(f"API not running: {str(e)}")
42
+ pass
43
+
44
+ # API not running, start it
45
+ print("Starting API server...")
46
+ st.sidebar.info("Starting API server...")
47
+
48
+ # Start API in a separate thread
49
+ api_process = start_api()
50
+
51
+ # Wait for API to start
52
+ api_started = False
53
+ retries = 0
54
+ max_retries = 15
55
+
56
+ while not api_started and retries < max_retries:
57
+ try:
58
+ time.sleep(2)
59
+ response = requests.get("http://localhost:8000/docs", timeout=2)
60
+ if response.status_code == 200:
61
+ api_started = True
62
+ st.sidebar.success("✅ API server is running")
63
+ print("API server started successfully")
64
+ return True
65
+ except:
66
+ retries += 1
67
+ print(f"Waiting for API to start... (attempt {retries}/{max_retries})")
68
+
69
+ if not api_started:
70
+ st.sidebar.error("❌ Failed to start API server")
71
+ print("Failed to start API server")
72
+ return False
73
+
74
+ # API Base URL for Spaces deployment
75
+ API_BASE_URL = "http://localhost:8000"
76
+
77
+ # Function to create plot for sentiment distribution
78
+ def plot_sentiment_distribution(sentiment_data):
79
+ # Extract and combine sentiment categories
80
+ categories = []
81
+ counts = []
82
+
83
+ # Process all sentiment categories
84
+ for category, count in sentiment_data.items():
85
+ if count > 0: # Only include non-zero categories
86
+ categories.append(category)
87
+ counts.append(count)
88
+
89
+ # Create a DataFrame for easier plotting
90
+ df = pd.DataFrame({
91
+ 'Sentiment': categories,
92
+ 'Count': counts
93
+ })
94
+
95
+ # Set up colors based on sentiment
96
+ colors = []
97
+ for sentiment in df['Sentiment']:
98
+ if sentiment == 'Positive' or sentiment == 'Slightly Positive':
99
+ colors.append('#10B981') # Green
100
+ elif sentiment == 'Negative' or sentiment == 'Slightly Negative':
101
+ colors.append('#EF4444') # Red
102
+ else:
103
+ colors.append('#6B7280') # Gray
104
+
105
+ # Create matplotlib figure
106
+ fig, ax = plt.subplots(figsize=(6, 4))
107
+ bars = ax.bar(df['Sentiment'], df['Count'], color=colors)
108
+
109
+ # Add count labels on top of bars
110
+ for bar in bars:
111
+ height = bar.get_height()
112
+ ax.text(bar.get_x() + bar.get_width()/2., height + 0.1,
113
+ str(int(height)), ha='center', va='bottom')
114
+
115
+ # Add labels and title
116
+ ax.set_xlabel('Sentiment')
117
+ ax.set_ylabel('Number of Articles')
118
+ ax.set_title('Sentiment Distribution')
119
+
120
+ # Improve aesthetics
121
+ plt.xticks(rotation=45)
122
+ plt.tight_layout()
123
+
124
+ return fig
125
+
126
+ # Function to create word cloud
127
+ def display_word_cloud(topics):
128
+ from wordcloud import WordCloud
129
+
130
+ # Convert topics to text with frequency
131
+ text = " ".join(topics)
132
+
133
+ # Generate word cloud
134
+ wordcloud = WordCloud(
135
+ width=400,
136
+ height=200,
137
+ background_color='white',
138
+ colormap='viridis',
139
+ max_words=100,
140
+ contour_width=1
141
+ ).generate(text)
142
+
143
+ # Display word cloud
144
+ fig, ax = plt.subplots(figsize=(10, 5))
145
+ ax.imshow(wordcloud, interpolation='bilinear')
146
+ ax.axis('off')
147
+
148
+ return fig
149
+
150
+ # Function to generate the example output format
151
+ def generate_example_output(company_name: str) -> str:
152
+ """
153
+ Generate output in the example format for the given company.
154
+ Returns the formatted JSON as a string.
155
+ """
156
+ try:
157
+ # Make API request to get the analysis data
158
+ url = f"{API_BASE_URL}/api/complete_analysis"
159
+ response = requests.post(url, json={"company_name": company_name})
160
+ response.raise_for_status()
161
+ data = response.json()
162
+
163
+ # Format the data to match the example output format exactly
164
+ formatted_output = {
165
+ "Company": data["Company"],
166
+ "Articles": data["Articles"],
167
+ "Comparative Sentiment Score": {
168
+ "Sentiment Distribution": data["Comparative Sentiment Score"]["Sentiment Distribution"],
169
+ "Coverage Differences": data["Comparative Sentiment Score"]["Coverage Differences"],
170
+ "Topic Overlap": data["Comparative Sentiment Score"]["Topic Overlap"]
171
+ },
172
+ "Final Sentiment Analysis": data["Final Sentiment Analysis"],
173
+ "Audio": "[Play Hindi Speech]" if data.get("Audio") else "No audio available"
174
+ }
175
+
176
+ # Convert to JSON string with proper formatting
177
+ return json.dumps(formatted_output, indent=2)
178
+
179
+ except Exception as e:
180
+ return json.dumps({
181
+ "error": str(e),
182
+ "message": "Failed to generate example output"
183
+ }, indent=2)
184
+
185
+ # Custom CSS for better styling
186
+ st.markdown("""
187
+ <style>
188
+ .main-header {
189
+ font-size: 2.2rem;
190
+ font-weight: 600;
191
+ color: #1E3A8A;
192
+ margin-bottom: 1rem;
193
+ }
194
+ .sub-header {
195
+ font-size: 1.5rem;
196
+ font-weight: 500;
197
+ color: #3B82F6;
198
+ margin-top: 1.5rem;
199
+ margin-bottom: 0.5rem;
200
+ }
201
+ .info-text {
202
+ color: #6B7280;
203
+ font-style: italic;
204
+ }
205
+ .section-divider {
206
+ margin-top: 2rem;
207
+ margin-bottom: 2rem;
208
+ border-bottom: 1px solid #E5E7EB;
209
+ }
210
+ .stButton>button {
211
+ background-color: #2563EB;
212
+ color: white;
213
+ border-radius: 0.375rem;
214
+ padding: 0.5rem 1rem;
215
+ font-weight: 500;
216
+ }
217
+ .stButton>button:hover {
218
+ background-color: #1D4ED8;
219
+ }
220
+ .article-card {
221
+ padding: 1rem;
222
+ border-radius: 0.5rem;
223
+ border: 1px solid #E5E7EB;
224
+ margin-bottom: 1rem;
225
+ }
226
+ .sentiment-positive {
227
+ color: #10B981;
228
+ font-weight: 500;
229
+ }
230
+ .sentiment-negative {
231
+ color: #EF4444;
232
+ font-weight: 500;
233
+ }
234
+ .sentiment-neutral {
235
+ color: #6B7280;
236
+ font-weight: 500;
237
+ }
238
+ .topic-tag {
239
+ background-color: #E5E7EB;
240
+ color: #374151;
241
+ border-radius: 9999px;
242
+ padding: 0.25rem 0.75rem;
243
+ margin-right: 0.5rem;
244
+ margin-bottom: 0.5rem;
245
+ display: inline-block;
246
+ font-size: 0.875rem;
247
+ }
248
+ .audio-container {
249
+ margin-top: 1rem;
250
+ padding: 1rem;
251
+ border-radius: 0.5rem;
252
+ background-color: #F3F4F6;
253
+ }
254
+ .stAlert {
255
+ border-radius: 0.5rem;
256
+ }
257
+ </style>
258
+ """, unsafe_allow_html=True)
259
+
260
+ # App header
261
+ st.markdown("<h1 class='main-header'>📰 News Summarization & Text-to-Speech</h1>", unsafe_allow_html=True)
262
+ st.markdown("This application extracts news articles about a company, performs sentiment analysis, conducts comparative analysis, and generates a text-to-speech output in Hindi. Enter a company name to get started.", unsafe_allow_html=True)
263
+
264
+ # Start the API server when the app loads
265
+ api_running = ensure_api_running()
266
+
267
+ # Sidebar
268
+ st.sidebar.markdown("## Input Settings")
269
+ company_name = st.sidebar.text_input("Company Name", value="Tesla")
270
+
271
+ # Audio playback settings
272
+ st.sidebar.markdown("## Audio Settings")
273
+ audio_speed = st.sidebar.select_slider("TTS Speech Speed:", options=["Slow", "Normal", "Fast"], value="Normal")
274
+ st.sidebar.markdown("---")
275
+
276
+ # Add option to see JSON in example format
277
+ st.sidebar.markdown("## Developer Options")
278
+ show_json = st.sidebar.checkbox("Show JSON output in example format")
279
+ st.sidebar.markdown("---")
280
+
281
+ # About section
282
+ st.sidebar.markdown("## About")
283
+ st.sidebar.info("This application was developed for news analysis and translation. It uses web scraping, NLP, and TTS technologies to provide insights about companies.")
284
+
285
+ # Analyze button
286
+ analyze_button = st.sidebar.button("Analyze Company News", disabled=not api_running)
287
+
288
+ # Main content
289
+ if analyze_button and company_name and api_running:
290
+ with st.spinner(f"Analyzing news for {company_name}. This may take a moment..."):
291
+ try:
292
+ # Call the API to get results (with longer timeout for Spaces)
293
+ response = requests.post(f"{API_BASE_URL}/api/complete_analysis",
294
+ json={"company_name": company_name},
295
+ timeout=180) # 3 minutes timeout
296
+ response.raise_for_status() # Raise exception for HTTP errors
297
+
298
+ # Parse JSON response
299
+ response = response.json()
300
+
301
+ # Display results
302
+ st.markdown(f"<h2 class='sub-header'>Analysis Results for {response['Company']}</h2>", unsafe_allow_html=True)
303
+
304
+ # Display sentiment overview
305
+ st.markdown("<h3 class='sub-header'>Sentiment Overview</h3>", unsafe_allow_html=True)
306
+
307
+ # Get sentiment counts
308
+ sentiment_data = response["Comparative Sentiment Score"]["Sentiment Distribution"]
309
+
310
+ # Create columns for visualization
311
+ col1, col2 = st.columns([3, 2])
312
+
313
+ with col1:
314
+ # Extract total counts
315
+ positive_count = sentiment_data.get("Positive", 0) + sentiment_data.get("Slightly Positive", 0)
316
+ negative_count = sentiment_data.get("Negative", 0) + sentiment_data.get("Slightly Negative", 0)
317
+ neutral_count = sentiment_data.get("Neutral", 0)
318
+ total_count = positive_count + negative_count + neutral_count
319
+
320
+ # Show summary text
321
+ sentiment_text = f"The company has "
322
+ if positive_count > negative_count and positive_count > neutral_count:
323
+ sentiment_text += f"mostly positive coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)."
324
+ elif negative_count > positive_count and negative_count > neutral_count:
325
+ sentiment_text += f"mostly negative coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)."
326
+ else:
327
+ sentiment_text += f"balanced coverage ({positive_count}/{total_count} positive, {negative_count}/{total_count} negative, {neutral_count}/{total_count} neutral)."
328
+
329
+ st.write(sentiment_text)
330
+
331
+ # Plot sentiment distribution
332
+ try:
333
+ fig = plot_sentiment_distribution(sentiment_data)
334
+ st.pyplot(fig)
335
+ except Exception as e:
336
+ st.warning(f"Could not create sentiment chart: {str(e)}")
337
+
338
+ with col2:
339
+ # Summary of key points
340
+ st.markdown("<h4>Key Insights</h4>", unsafe_allow_html=True)
341
+
342
+ # Show final sentiment analysis
343
+ st.write(response["Final Sentiment Analysis"])
344
+
345
+ # Show common topics
346
+ common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", [])
347
+ if common_topics:
348
+ st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True)
349
+ for topic in common_topics:
350
+ st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
351
+
352
+ # Display Hindi TTS audio
353
+ if "Audio" in response and response["Audio"]:
354
+ st.markdown("<h3 class='sub-header'>Hindi Audio Summary</h3>", unsafe_allow_html=True)
355
+
356
+ audio_message = response["Audio"]
357
+
358
+ if audio_message == "Failed to generate audio":
359
+ st.warning("Hindi audio could not be generated. However, you can still read the Hindi text below.")
360
+ else:
361
+ try:
362
+ # Check if the response contains the actual audio file path
363
+ audio_file_path = response.get("_audio_file_path")
364
+
365
+ if audio_file_path:
366
+ # Extract the filename
367
+ audio_filename = os.path.basename(audio_file_path)
368
+ audio_url = f"{API_BASE_URL}/api/audio/{audio_filename}"
369
+ else:
370
+ # If no path is provided, just display a message
371
+ st.info("Audio is available but the path was not provided.")
372
+ audio_url = None
373
+
374
+ if audio_url:
375
+ # Attempt to download the audio file
376
+ audio_response = requests.get(audio_url)
377
+ if audio_response.status_code == 200:
378
+ # Save temporarily
379
+ temp_audio_path = f"temp_audio_{os.path.basename(audio_url)}"
380
+ with open(temp_audio_path, "wb") as f:
381
+ f.write(audio_response.content)
382
+
383
+ # Play from local file
384
+ st.markdown("<div class='audio-container'>", unsafe_allow_html=True)
385
+ st.audio(temp_audio_path, format="audio/mp3")
386
+
387
+ # Display audio download link
388
+ st.markdown(f"<a href='{audio_url}' download='hindi_summary.mp3'>Download Hindi Audio</a>", unsafe_allow_html=True)
389
+
390
+ # Clean up temp file (optional)
391
+ # os.remove(temp_audio_path) # Uncomment to delete after use
392
+ else:
393
+ st.warning(f"Unable to load audio file (HTTP {audio_response.status_code}). You can still read the Hindi text below.")
394
+ else:
395
+ st.info("Hindi audio summary would be available here.")
396
+ except Exception as e:
397
+ st.warning(f"Error playing audio: {str(e)}. You can still read the Hindi text below.")
398
+
399
+ # Display the Hindi text with better formatting
400
+ with st.expander("Show Hindi Text"):
401
+ hindi_text = response.get("Hindi Summary", "Hindi text not available.")
402
+
403
+ # Format the text for better readability
404
+ paragraphs = hindi_text.split("। ")
405
+
406
+ for paragraph in paragraphs:
407
+ if paragraph.strip():
408
+ # Add a period if it doesn't end with one
409
+ if not paragraph.strip().endswith("।"):
410
+ paragraph += "।"
411
+ st.markdown(f"<p style='font-size: 16px; margin-bottom: 10px;'>{paragraph}</p>", unsafe_allow_html=True)
412
+
413
+ st.markdown("</div>", unsafe_allow_html=True)
414
+
415
+ st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
416
+
417
+ # Display news articles
418
+ st.markdown("<h3 class='sub-header'>News Articles</h3>", unsafe_allow_html=True)
419
+
420
+ # Show each article in a card
421
+ articles = response["Articles"]
422
+ for i, article in enumerate(articles):
423
+ with st.container():
424
+ st.markdown(f"<div class='article-card'>", unsafe_allow_html=True)
425
+
426
+ # Article title and sentiment
427
+ sentiment_class = "sentiment-neutral"
428
+ if article["Sentiment"] == "Positive" or article["Sentiment"] == "Slightly Positive":
429
+ sentiment_class = "sentiment-positive"
430
+ elif article["Sentiment"] == "Negative" or article["Sentiment"] == "Slightly Negative":
431
+ sentiment_class = "sentiment-negative"
432
+
433
+ st.markdown(f"<h4>{i+1}. {article['Title']}</h4>", unsafe_allow_html=True)
434
+ st.markdown(f"<span class='{sentiment_class}'>{article['Sentiment']}</span>", unsafe_allow_html=True)
435
+
436
+ # Article summary
437
+ st.write(article["Summary"])
438
+
439
+ # Article topics
440
+ for topic in article["Topics"]:
441
+ st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
442
+
443
+ st.markdown("</div>", unsafe_allow_html=True)
444
+
445
+ # Display comparative analysis
446
+ st.markdown("<h3 class='sub-header'>Comparative Analysis</h3>", unsafe_allow_html=True)
447
+
448
+ # Common topics
449
+ st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True)
450
+ common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", [])
451
+ if common_topics:
452
+ for topic in common_topics:
453
+ st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
454
+ else:
455
+ st.write("No common topics found across articles.")
456
+
457
+ # Coverage comparison
458
+ st.markdown("<h4>Coverage Comparison</h4>", unsafe_allow_html=True)
459
+ comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", [])
460
+ if comparisons:
461
+ # Show first comparison inline
462
+ first_comparison = comparisons[0]
463
+ st.write(first_comparison.get("Comparison", ""))
464
+ st.markdown(f"<p class='info-text'>{first_comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
465
+ else:
466
+ st.write("No comparative insights available.")
467
+
468
+ # Display full comparison in expander
469
+ with st.expander("View All Comparisons"):
470
+ comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", [])
471
+ for i, comparison in enumerate(comparisons):
472
+ st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True)
473
+ st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
474
+ st.markdown("<hr>", unsafe_allow_html=True)
475
+
476
+ # Show JSON in example format if requested
477
+ if show_json:
478
+ st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
479
+ st.markdown("<h3 class='sub-header'>Example JSON Format</h3>", unsafe_allow_html=True)
480
+
481
+ # Get the formatted JSON
482
+ json_output = generate_example_output(company_name)
483
+
484
+ # Display the JSON in a code block
485
+ st.code(json_output, language="json")
486
+
487
+ except requests.exceptions.HTTPError as http_err:
488
+ if http_err.response.status_code == 404:
489
+ st.error(f"No news articles found for {company_name}. Please try another company name.")
490
+ elif http_err.response.status_code == 500:
491
+ error_detail = "Unknown server error"
492
+ try:
493
+ error_data = http_err.response.json()
494
+ if "detail" in error_data:
495
+ error_detail = error_data["detail"]
496
+ except:
497
+ pass
498
+ st.error(f"Server error: {error_detail}")
499
+ else:
500
+ st.error(f"HTTP error occurred: {http_err}")
501
+ except requests.exceptions.ConnectionError:
502
+ st.error("Failed to connect to the server. Please make sure the API is running.")
503
+ except requests.exceptions.Timeout:
504
+ st.error("Request timed out. The analysis might be taking too long to complete.")
505
+ except Exception as e:
506
+ st.error(f"An error occurred: {str(e)}")
507
+ elif analyze_button and not api_running:
508
+ st.error("Cannot perform analysis because the API server is not running. Please check the logs.")
509
+ else:
510
+ # Display placeholder
511
+ st.info("Enter a company name and click 'Analyze Company News' to get started.")
512
+
513
+ # Example of what the application does
514
+ with st.expander("See Example Analysis"):
515
+ st.write("""
516
+ This application will provide:
517
+
518
+ 1. Sentiment analysis of news articles about the company
519
+ 2. Key topics mentioned in the articles
520
+ 3. Comparative analysis of different articles
521
+ 4. Hindi audio summary of the findings
522
+
523
+ Companies you can try: Apple, Microsoft, Google, Amazon, Tesla, etc.
524
+ """)