""" BabelDocs x Agentic AI MCP - Gradio Application PDF Translation with Google Drive Integration. Accepts public GDrive links. For Anthropic Hackathon - Track 1: Building MCP Usage: python app.py """ import os import re import base64 import tempfile import httpx import gradio as gr from pathlib import Path from datetime import datetime from dotenv import load_dotenv load_dotenv() # Modal endpoint configuration # Set BABELDOCS_MODAL_URL as HuggingFace Space secret for production MODAL_BASE_URL = os.getenv("BABELDOCS_MODAL_URL") if not MODAL_BASE_URL: raise ValueError("BABELDOCS_MODAL_URL environment variable required. Set it as a HuggingFace Space secret.") MODAL_TRANSLATE_URL = f"{MODAL_BASE_URL}-babeldocstranslator-api.modal.run" MODAL_HEALTH_URL = f"{MODAL_BASE_URL}-babeldocstranslator-health.modal.run" # Max pages limit (test phase) MAX_PAGES = 20 # Supported languages LANGUAGES = { "en": "English", "fr": "French", "es": "Spanish", "de": "German", "it": "Italian", "pt": "Portuguese", "zh": "Chinese", "ja": "Japanese", "ko": "Korean", "ru": "Russian", "ar": "Arabic", } # Sample files for testing SAMPLE_FILES = [ ("French Contract (10 pages)", "https://drive.google.com/file/d/1S9cWP7QkiqltlYJt8o1FpQRLHElZLyYx/view?usp=sharing"), ("Sample Document 2", "https://drive.google.com/file/d/1IaTjmfNRhDsCsTQIo6To1HYVZ1IkQHw3/view?usp=sharing"), ("Sample Document 3", "https://drive.google.com/file/d/1oexokd-auHnGQGvQEu-0NAB8TFI8mnye/view?usp=sharing"), ] def log_message(logs: list, message: str) -> list: """Add timestamped message to logs.""" timestamp = datetime.now().strftime("%H:%M:%S") logs.append(f"[{timestamp}] {message}") return logs def extract_gdrive_file_id(url: str) -> str | None: """Extract file ID from Google Drive URL.""" patterns = [ r"/file/d/([a-zA-Z0-9_-]+)", r"id=([a-zA-Z0-9_-]+)", r"/d/([a-zA-Z0-9_-]+)", ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None async def download_gdrive_public(url: str) -> tuple[bytes, str]: """Download file from public Google Drive link. Returns (file_bytes, filename). """ file_id = extract_gdrive_file_id(url) if not file_id: raise ValueError("Invalid Google Drive URL") # Direct download URL download_url = f"https://drive.google.com/uc?export=download&id={file_id}" async with httpx.AsyncClient(timeout=120.0, follow_redirects=True) as client: response = await client.get(download_url) response.raise_for_status() # Try to get filename from Content-Disposition header content_disp = response.headers.get("Content-Disposition", "") filename_match = re.search(r'filename="?([^";\n]+)"?', content_disp) if filename_match: filename = filename_match.group(1) else: filename = f"gdrive_{file_id}.pdf" return response.content, filename async def translate_pdf_modal( gdrive_url: str, target_lang: str, progress=gr.Progress() ) -> tuple: """Translate PDF using Modal cloud.""" logs = [] # Validate input if not gdrive_url or not gdrive_url.strip(): return None, None, "Please provide a Google Drive link or select a sample file", "", "\n".join(logs) try: logs = log_message(logs, "Starting translation...") # Get PDF bytes and filename from Google Drive logs = log_message(logs, "Downloading from Google Drive...") progress(0.05, desc="Downloading from Google Drive...") pdf_bytes, source_filename = await download_gdrive_public(gdrive_url.strip()) logs = log_message(logs, f"Downloaded: {source_filename}") pdf_base64 = base64.b64encode(pdf_bytes).decode("utf-8") logs = log_message(logs, f"Input: {source_filename}") logs = log_message(logs, f"Size: {len(pdf_bytes) / 1024:.1f} KB") logs = log_message(logs, f"Target: {LANGUAGES.get(target_lang, target_lang)}") progress(0.1, desc="Uploading to Modal...") payload = { "pdf_base64": pdf_base64, "target_lang": target_lang, } logs = log_message(logs, "Translating on Modal cloud...") logs = log_message(logs, "(This may take several minutes)") progress(0.2, desc="Translating...") start_time = datetime.now() async with httpx.AsyncClient(timeout=900.0, follow_redirects=True) as client: response = await client.post(MODAL_TRANSLATE_URL, json=payload) response.raise_for_status() result = response.json() duration = (datetime.now() - start_time).total_seconds() progress(0.8, desc="Processing result...") if not result.get("success"): error_msg = result.get("message", "Unknown error") logs = log_message(logs, f"ERROR: {error_msg}") return None, None, "Translation failed", "", "\n".join(logs) # Process mono_img PDF mono_img_path = None mono_img_base64 = result.get("mono_img_pdf_base64") if mono_img_base64: mono_img_bytes = base64.b64decode(mono_img_base64) stem = Path(source_filename).stem mono_img_filename = f"{stem}_translated.{target_lang}.pdf" mono_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") mono_img_file.write(mono_img_bytes) mono_img_file.close() mono_img_path = mono_img_file.name logs = log_message(logs, f"Mono: {mono_img_filename} ({len(mono_img_bytes) / 1024:.1f} KB)") # Process dual_img PDF dual_img_path = None dual_img_base64 = result.get("dual_img_pdf_base64") if dual_img_base64: dual_img_bytes = base64.b64decode(dual_img_base64) stem = Path(source_filename).stem dual_img_filename = f"{stem}_translated.{target_lang}.dual.pdf" dual_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") dual_img_file.write(dual_img_bytes) dual_img_file.close() dual_img_path = dual_img_file.name logs = log_message(logs, f"Dual: {dual_img_filename} ({len(dual_img_bytes) / 1024:.1f} KB)") if not mono_img_path and not dual_img_path: logs = log_message(logs, "ERROR: No output PDF in response") return None, None, "Translation failed", "", "\n".join(logs) logs = log_message(logs, f"Duration: {duration:.1f} seconds") stats_msg = f"""**Translation completed!** - **Duration:** {duration:.1f} seconds - **Target:** {LANGUAGES.get(target_lang, target_lang)}""" progress(1.0, desc="Done!") return mono_img_path, dual_img_path, "Translation successful!", stats_msg, "\n".join(logs) except httpx.TimeoutException: logs = log_message(logs, "ERROR: Translation timed out") return None, None, "Translation timed out", "", "\n".join(logs) except httpx.HTTPStatusError as e: logs = log_message(logs, f"ERROR: HTTP {e.response.status_code}") return None, None, f"HTTP error: {e.response.status_code}", "", "\n".join(logs) except Exception as e: logs = log_message(logs, f"ERROR: {str(e)}") return None, None, f"Error: {str(e)}", "", "\n".join(logs) def load_sample(sample_name: str) -> str: """Load sample file URL.""" for name, url in SAMPLE_FILES: if name == sample_name: return url return "" # Gradio Interface with gr.Blocks(title="BabelDocs x Agentic AI MCP") as demo: gr.Markdown(""" # BabelDocs x Agentic AI MCP - PDF Translation with Layout Preservation **Translate PDFs from Google Drive while preserving the original layout** --- ## Full Google Drive Workflow in Claude Desktop MCP ``` "Translate my Q3 report to French and save it to Translations folder" ↓ Claude searches → downloads → translates → uploads → done! ``` --- """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Input") gr.Markdown("**Try a sample file:**") sample_dropdown = gr.Dropdown( choices=[name for name, _ in SAMPLE_FILES], label="Sample Files", info="Select a sample PDF to test the translation", ) gr.Markdown("**Or paste your own Google Drive link:**") gdrive_url = gr.Textbox( label="Google Drive Link (public)", placeholder="https://drive.google.com/file/d/...", info="Paste a public Google Drive link", ) target_lang = gr.Dropdown( choices=list(LANGUAGES.keys()), value="en", label="Target Language", ) translate_btn = gr.Button( "Translate PDF", variant="primary", size="lg", ) with gr.Column(scale=1): gr.Markdown("### Result") status_output = gr.Textbox( label="Status", interactive=False, ) stats_output = gr.Markdown(label="Statistics") gr.Markdown("**Downloads:**") with gr.Row(): mono_img_output = gr.File(label="Mono (translated + images)") dual_img_output = gr.File(label="Dual (bilingual + images)") logs_output = gr.Textbox( label="Logs", interactive=False, lines=10, max_lines=15, ) gr.Markdown(""" --- ### How it works ``` 1. Paste Google Drive link (or select sample) ↓ 2. Send to Modal cloud (serverless) ↓ 3. BabelDOC translates text + images, preserves layout ↓ 4. Download translated PDF ``` ### Test Phase Limits - **Maximum 20 pages per PDF** (to prevent token abuse) - Oversized documents will be rejected automatically --- **Built with:** BabelDOC, Modal, Nebius AI, Gradio | **Hackathon:** Anthropic MCP Track 1 """) # Load sample URL when selected sample_dropdown.change( fn=load_sample, inputs=[sample_dropdown], outputs=[gdrive_url], ) translate_btn.click( fn=translate_pdf_modal, inputs=[gdrive_url, target_lang], outputs=[mono_img_output, dual_img_output, status_output, stats_output, logs_output], ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, ssr_mode=False, )