|
|
""" |
|
|
Translation Service Module |
|
|
Contains the core translation logic separated from the UI. |
|
|
""" |
|
|
|
|
|
from typing import Tuple, Optional |
|
|
from huggingface_hub import InferenceClient |
|
|
from langdetect import detect, LangDetectException |
|
|
|
|
|
from config import ( |
|
|
ModelConfig, |
|
|
LanguageConfig, |
|
|
PromptConfig, |
|
|
ErrorMessages, |
|
|
get_language_name |
|
|
) |
|
|
|
|
|
|
|
|
class LanguageDetector: |
|
|
"""Handles language detection for input text.""" |
|
|
|
|
|
@staticmethod |
|
|
def detect_language(text: str) -> Tuple[str, str]: |
|
|
""" |
|
|
Detect the language of the input text. |
|
|
|
|
|
Args: |
|
|
text: Input text to detect language |
|
|
|
|
|
Returns: |
|
|
Tuple of (language_code, language_name) |
|
|
""" |
|
|
try: |
|
|
lang_code = detect(text) |
|
|
lang_name = get_language_name(lang_code) |
|
|
return lang_code, lang_name |
|
|
except LangDetectException: |
|
|
return "unknown", ErrorMessages.LANGUAGE_DETECTION_FAILED |
|
|
|
|
|
|
|
|
class TranslationEngine: |
|
|
"""Handles translation using the Apertus model.""" |
|
|
|
|
|
def __init__(self, model_name: Optional[str] = None): |
|
|
""" |
|
|
Initialize the translation engine. |
|
|
|
|
|
Args: |
|
|
model_name: Optional model name override |
|
|
""" |
|
|
self.model_name = model_name or ModelConfig.MODEL_NAME |
|
|
self.client = InferenceClient(model=self.model_name) |
|
|
self.language_detector = LanguageDetector() |
|
|
|
|
|
def translate( |
|
|
self, |
|
|
text: str, |
|
|
target_language: str, |
|
|
max_tokens: int = None, |
|
|
temperature: float = None |
|
|
) -> Tuple[str, str, str]: |
|
|
""" |
|
|
Translate text to target language. |
|
|
|
|
|
Args: |
|
|
text: Text to translate |
|
|
target_language: Target language name (e.g., 'Spanish', 'French') |
|
|
max_tokens: Maximum tokens for response (defaults to config value) |
|
|
temperature: Model temperature (defaults to config value) |
|
|
|
|
|
Returns: |
|
|
Tuple of (translated_text, source_lang_code, source_lang_name) |
|
|
""" |
|
|
if not text.strip(): |
|
|
return "", "unknown", ErrorMessages.LANGUAGE_DETECTION_FAILED |
|
|
|
|
|
|
|
|
max_tokens = max_tokens or ModelConfig.DEFAULT_MAX_TOKENS |
|
|
temperature = temperature or ModelConfig.DEFAULT_TEMPERATURE |
|
|
|
|
|
|
|
|
source_lang_code, source_lang_name = self.language_detector.detect_language(text) |
|
|
|
|
|
|
|
|
system_prompt = PromptConfig.SYSTEM_PROMPT_TEMPLATE.format( |
|
|
target_language=target_language |
|
|
) |
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": text} |
|
|
] |
|
|
|
|
|
try: |
|
|
|
|
|
response = self.client.chat_completion( |
|
|
messages=messages, |
|
|
max_tokens=max_tokens, |
|
|
temperature=temperature, |
|
|
stream=True |
|
|
) |
|
|
|
|
|
|
|
|
translated_text = "" |
|
|
for chunk in response: |
|
|
|
|
|
delta_content = getattr(chunk.choices[0].delta, 'content', None) |
|
|
if delta_content: |
|
|
translated_text += delta_content |
|
|
|
|
|
translated_text = translated_text.strip() |
|
|
return translated_text, source_lang_code, source_lang_name |
|
|
|
|
|
except Exception as e: |
|
|
error_message = ErrorMessages.TRANSLATION_ERROR.format(error=str(e)) |
|
|
return error_message, source_lang_code, source_lang_name |
|
|
|
|
|
|
|
|
class TranslationService: |
|
|
""" |
|
|
High-level translation service that orchestrates translation and voice processing. |
|
|
""" |
|
|
|
|
|
def __init__(self, model_name: Optional[str] = None): |
|
|
""" |
|
|
Initialize the translation service. |
|
|
|
|
|
Args: |
|
|
model_name: Optional model name override |
|
|
""" |
|
|
self.engine = TranslationEngine(model_name) |
|
|
|
|
|
def translate_text( |
|
|
self, |
|
|
text: str, |
|
|
target_language: str, |
|
|
max_tokens: Optional[int] = None, |
|
|
temperature: Optional[float] = None |
|
|
) -> Tuple[str, str]: |
|
|
""" |
|
|
Translate text and return formatted results. |
|
|
|
|
|
Args: |
|
|
text: Text to translate |
|
|
target_language: Target language name |
|
|
max_tokens: Maximum tokens for response |
|
|
temperature: Model temperature |
|
|
|
|
|
Returns: |
|
|
Tuple of (translated_text, detected_language_info) |
|
|
""" |
|
|
if not text.strip(): |
|
|
return "", ErrorMessages.NO_INPUT |
|
|
|
|
|
translated_text, source_code, source_name = self.engine.translate( |
|
|
text, target_language, max_tokens, temperature |
|
|
) |
|
|
|
|
|
|
|
|
detected_info = f"Detected: {source_name} ({source_code})" |
|
|
|
|
|
return translated_text, detected_info |
|
|
|
|
|
def detect_language_only(self, text: str) -> str: |
|
|
""" |
|
|
Detect language without translating. |
|
|
|
|
|
Args: |
|
|
text: Text to detect language |
|
|
|
|
|
Returns: |
|
|
Formatted language detection string |
|
|
""" |
|
|
if not text.strip(): |
|
|
return ErrorMessages.NO_INPUT |
|
|
|
|
|
source_code, source_name = self.engine.language_detector.detect_language(text) |
|
|
return f"Detected: {source_name} ({source_code})" |
|
|
|