diff --git a/examples/04-transcription-prerecorded-url.py b/examples/10-transcription-prerecorded-url.py similarity index 100% rename from examples/04-transcription-prerecorded-url.py rename to examples/10-transcription-prerecorded-url.py diff --git a/examples/05-transcription-prerecorded-file.py b/examples/11-transcription-prerecorded-file.py similarity index 100% rename from examples/05-transcription-prerecorded-file.py rename to examples/11-transcription-prerecorded-file.py diff --git a/examples/06-transcription-prerecorded-callback.py b/examples/12-transcription-prerecorded-callback.py similarity index 100% rename from examples/06-transcription-prerecorded-callback.py rename to examples/12-transcription-prerecorded-callback.py diff --git a/examples/07-transcription-live-websocket.py b/examples/13-transcription-live-websocket.py similarity index 100% rename from examples/07-transcription-live-websocket.py rename to examples/13-transcription-live-websocket.py diff --git a/examples/26-transcription-live-websocket-v2.py b/examples/14-transcription-live-websocket-v2.py similarity index 100% rename from examples/26-transcription-live-websocket-v2.py rename to examples/14-transcription-live-websocket-v2.py diff --git a/examples/22-transcription-advanced-options.py b/examples/15-transcription-advanced-options.py similarity index 100% rename from examples/22-transcription-advanced-options.py rename to examples/15-transcription-advanced-options.py diff --git a/examples/10-text-to-speech-single.py b/examples/20-text-to-speech-single.py similarity index 100% rename from examples/10-text-to-speech-single.py rename to examples/20-text-to-speech-single.py diff --git a/examples/11-text-to-speech-streaming.py b/examples/21-text-to-speech-streaming.py similarity index 100% rename from examples/11-text-to-speech-streaming.py rename to examples/21-text-to-speech-streaming.py diff --git a/examples/22-text-builder-demo.py b/examples/22-text-builder-demo.py new file mode 100644 index 00000000..9a9e2adf --- /dev/null +++ b/examples/22-text-builder-demo.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 +""" +TextBuilder Demo - Interactive demonstration of all TextBuilder features + +This demo script showcases all TextBuilder capabilities without requiring +an API key. It generates the formatted text that would be sent to the API. +""" + +from deepgram import ( + TextBuilder, + add_pronunciation, + ssml_to_deepgram, + validate_ipa, + validate_pause, +) + + +def print_section(title: str): + """Print a formatted section header""" + print("\n" + "=" * 70) + print(f" {title}") + print("=" * 70) + + +def demo_basic_text_builder(): + """Demonstrate basic TextBuilder usage""" + print_section("1. Basic TextBuilder Usage") + + text = ( + TextBuilder() + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .text(" twice daily with ") + .pronunciation("dupilumab", "duːˈpɪljuːmæb") + .text(" injections") + .pause(500) + .text(" Do not exceed prescribed dosage.") + .build() + ) + + print("\nCode:") + print(""" + text = ( + TextBuilder() + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .text(" twice daily with ") + .pronunciation("dupilumab", "duːˈpɪljuːmæb") + .text(" injections") + .pause(500) + .text(" Do not exceed prescribed dosage.") + .build() + ) + """) + + print("\nGenerated TTS Text:") + print(f" {text}") + + +def demo_standalone_functions(): + """Demonstrate standalone helper functions""" + print_section("2. Standalone Helper Functions") + + # add_pronunciation + print("\n▸ add_pronunciation()") + text = "The patient should take methotrexate weekly." + print(f" Original: {text}") + + text = add_pronunciation(text, "methotrexate", "mɛθəˈtrɛkseɪt") + print(f" Modified: {text}") + + +def demo_ssml_conversion(): + """Demonstrate SSML to Deepgram conversion""" + print_section("3. SSML Migration") + + ssml = """ + Welcome to your medication guide. + + Take azathioprine + as prescribed. + + Contact your doctor if you experience side effects. +""" + + print("\nOriginal SSML:") + print(ssml) + + text = ssml_to_deepgram(ssml) + print("\nConverted to Deepgram Format:") + print(f" {text}") + + +def demo_mixed_usage(): + """Demonstrate mixing SSML with builder methods""" + print_section("4. Mixed SSML + Builder Methods") + + ssml = 'Take medicine daily.' + + text = ( + TextBuilder() + .from_ssml(ssml) + .pause(500) + .text(" Store at room temperature.") + .pause(500) + .text(" Keep out of reach of children.") + .build() + ) + + print("\nStarting SSML:") + print(f" {ssml}") + + print("\nAdded via builder:") + print(" .pause(500)") + print(" .text(' Store at room temperature.')") + print(" .pause(500)") + print(" .text(' Keep out of reach of children.')") + + print("\nFinal Result:") + print(f" {text}") + + +def demo_validation(): + """Demonstrate validation functions""" + print_section("5. Validation Functions") + + print("\n▸ validate_ipa()") + + # Valid IPA + is_valid, msg = validate_ipa("ˌæzəˈθaɪəpriːn") + print(f" validate_ipa('ˌæzəˈθaɪəpriːn'): {is_valid} {msg}") + + # Invalid IPA (contains quote) + is_valid, msg = validate_ipa('test"quote') + print(f" validate_ipa('test\"quote'): {is_valid} - {msg}") + + # Too long + is_valid, msg = validate_ipa("x" * 101) + print(f" validate_ipa('x' * 101): {is_valid} - {msg}") + + print("\n▸ validate_pause()") + + # Valid pauses + is_valid, msg = validate_pause(500) + print(f" validate_pause(500): {is_valid}") + + is_valid, msg = validate_pause(5000) + print(f" validate_pause(5000): {is_valid}") + + # Invalid pauses + is_valid, msg = validate_pause(400) + print(f" validate_pause(400): {is_valid} - {msg}") + + is_valid, msg = validate_pause(550) + print(f" validate_pause(550): {is_valid} - {msg}") + + +def demo_error_handling(): + """Demonstrate error handling""" + print_section("6. Error Handling") + + print("\n▸ Pronunciation limit (500 max)") + try: + builder = TextBuilder() + for i in range(501): + builder.pronunciation(f"word{i}", "test") + builder.build() + except ValueError as e: + print(f" ✓ Caught expected error: {e}") + + print("\n▸ Pause limit (50 max)") + try: + builder = TextBuilder() + for i in range(51): + builder.pause(500) + builder.build() + except ValueError as e: + print(f" ✓ Caught expected error: {e}") + + print("\n▸ Character limit (2000 max)") + try: + builder = TextBuilder() + builder.text("x" * 2001) + builder.build() + except ValueError as e: + print(f" ✓ Caught expected error: {e}") + + print("\n▸ Invalid pause duration") + try: + builder = TextBuilder() + builder.pause(450) + except ValueError as e: + print(f" ✓ Caught expected error: {e}") + + +def demo_real_world_examples(): + """Demonstrate real-world use cases""" + print_section("7. Real-World Examples") + + print("\n▸ Pharmacy Prescription Instructions") + text = ( + TextBuilder() + .text("Prescription for ") + .pronunciation("lisinopril", "laɪˈsɪnəprɪl") + .pause(500) + .text(" Take one tablet daily for hypertension.") + .pause(500) + .text(" Common side effects may include ") + .pronunciation("hypotension", "ˌhaɪpoʊˈtɛnʃən") + .text(" or dizziness.") + .build() + ) + print(f"\n {text}") + + print("\n▸ Medical Device Instructions") + text = ( + TextBuilder() + .text("Insert the ") + .pronunciation("cannula", "ˈkænjʊlə") + .text(" at a forty-five degree angle.") + .pause(1000) + .text(" Ensure the ") + .pronunciation("catheter", "ˈkæθɪtər") + .text(" is properly secured.") + .build() + ) + print(f"\n {text}") + + print("\n▸ Scientific Terminology") + text = ( + TextBuilder() + .text("The study examined ") + .pronunciation("mitochondrial", "ˌmaɪtəˈkɑːndriəl") + .text(" function in ") + .pronunciation("erythrocytes", "ɪˈrɪθrəsaɪts") + .pause(500) + .text(" using advanced imaging.") + .build() + ) + print(f"\n {text}") + + +def demo_api_limits(): + """Display API limits summary""" + print_section("8. API Limits Summary") + + print("\n Limit Type Maximum Unit") + print(" " + "-" * 60) + print(" Pronunciations per request 500 count") + print(" Pauses per request 50 count") + print(" Total characters 2000 characters*") + print(" IPA string length 100 characters") + print(" Pause duration (min) 500 milliseconds") + print(" Pause duration (max) 5000 milliseconds") + print(" Pause increment 100 milliseconds") + print("\n * Character count excludes pronunciation IPA and control syntax") + + +def main(): + """Run all demonstrations""" + print("\n" + "█" * 70) + print(" DEEPGRAM TEXTBUILDER - COMPREHENSIVE DEMONSTRATION") + print("█" * 70) + + demo_basic_text_builder() + demo_standalone_functions() + demo_ssml_conversion() + demo_mixed_usage() + demo_validation() + demo_error_handling() + demo_real_world_examples() + demo_api_limits() + + print("\n" + "=" * 70) + print(" Demo Complete!") + print("=" * 70) + print("\n REST API generation: examples/23-text-builder-helper.py") + print(" Streaming TTS: examples/24-text-builder-streaming.py") + print("=" * 70 + "\n") + + +if __name__ == "__main__": + main() diff --git a/examples/23-text-builder-helper.py b/examples/23-text-builder-helper.py new file mode 100644 index 00000000..b15705a2 --- /dev/null +++ b/examples/23-text-builder-helper.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +""" +Example: TextBuilder with REST API TTS + +This example demonstrates using TextBuilder with the REST API to generate +complete audio files with custom pronunciations and pauses. +""" + +import os + +from deepgram import DeepgramClient, TextBuilder, add_pronunciation, ssml_to_deepgram +from deepgram.speak.v1.audio.types import ( + AudioGenerateRequestEncoding, + AudioGenerateRequestModel, +) + + +def example_basic_text_builder(): + """Example 1: Basic TextBuilder usage with pronunciations and pauses""" + print("Example 1: Basic TextBuilder Usage") + print("-" * 50) + + # Build text with pronunciations and pauses + text = ( + TextBuilder() + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .text(" twice daily with ") + .pronunciation("dupilumab", "duːˈpɪljuːmæb") + .text(" injections") + .pause(500) + .text(" Do not exceed prescribed dosage.") + .build() + ) + + print(f"Generated text: {text}\n") + + # Use with Deepgram client + api_key = os.getenv("DEEPGRAM_API_KEY") + if api_key: + client = DeepgramClient(api_key=api_key) + + # Generate speech with custom pronunciations + response = client.speak.v1.generate( + text, + model=AudioGenerateRequestModel.AURA_ASTERIA_EN, + encoding=AudioGenerateRequestEncoding.LINEAR16, + ) + + # Save to file + with open("output_example1.wav", "wb") as f: + f.write(response) + + print("✓ Audio saved to output_example1.wav") + else: + print("ℹ Set DEEPGRAM_API_KEY to generate audio") + + +def example_add_pronunciation_function(): + """Example 2: Using add_pronunciation standalone function""" + print("\nExample 2: Standalone add_pronunciation Function") + print("-" * 50) + + # Start with plain text + text = "The patient should take methotrexate weekly and adalimumab biweekly." + + # Add pronunciations for medical terms + text = add_pronunciation(text, "methotrexate", "mɛθəˈtrɛkseɪt") + text = add_pronunciation(text, "adalimumab", "ˌædəˈljuːməb") + + print(f"Generated text: {text}") + + api_key = os.getenv("DEEPGRAM_API_KEY") + if api_key: + client = DeepgramClient(api_key=api_key) + + response = client.speak.v1.generate( + text, + model=AudioGenerateRequestModel.AURA_ASTERIA_EN, + ) + + with open("output_example2.wav", "wb") as f: + f.write(response) + + print("✓ Audio saved to output_example2.wav") + else: + print("ℹ Set DEEPGRAM_API_KEY to generate audio") + + +def example_ssml_migration(): + """Example 3: Migrating from SSML to Deepgram format""" + print("\nExample 3: SSML Migration") + print("-" * 50) + + # Existing SSML from another TTS provider + ssml = """ + Welcome to your medication guide. + + Take azathioprine + as prescribed. + + Contact your doctor if you experience side effects. + """ + + # Convert to Deepgram format + text = ssml_to_deepgram(ssml) + + print(f"Converted SSML: {text}") + + api_key = os.getenv("DEEPGRAM_API_KEY") + if api_key: + client = DeepgramClient(api_key=api_key) + + response = client.speak.v1.generate( + text, + model=AudioGenerateRequestModel.AURA_ASTERIA_EN, + ) + + with open("output_example3.wav", "wb") as f: + f.write(response) + + print("✓ Audio saved to output_example3.wav") + else: + print("ℹ Set DEEPGRAM_API_KEY to generate audio") + + +def example_mixed_ssml_and_builder(): + """Example 4: Mixing SSML parsing with additional builder methods""" + print("\nExample 4: Mixed SSML and Builder") + print("-" * 50) + + # Start with some SSML content + ssml = 'Take medicine daily.' + + # Use builder to add more content + text = ( + TextBuilder() + .from_ssml(ssml) + .pause(500) + .text(" Store at room temperature.") + .pause(500) + .text(" Keep out of reach of children.") + .build() + ) + + print(f"Generated text: {text}") + + api_key = os.getenv("DEEPGRAM_API_KEY") + if api_key: + client = DeepgramClient(api_key=api_key) + + response = client.speak.v1.generate( + text, + model=AudioGenerateRequestModel.AURA_ASTERIA_EN, + ) + + with open("output_example4.wav", "wb") as f: + f.write(response) + + print("✓ Audio saved to output_example4.wav") + else: + print("ℹ Set DEEPGRAM_API_KEY to generate audio") + + +def example_pharmacy_instructions(): + """Example 5: Complete pharmacy instruction with multiple pronunciations""" + print("\nExample 5: Pharmacy Instructions") + print("-" * 50) + + text = ( + TextBuilder() + .text("Prescription for ") + .pronunciation("lisinopril", "laɪˈsɪnəprɪl") + .pause(300) + .text(" Take one tablet by mouth daily for hypertension.") + .pause(500) + .text(" Common side effects may include ") + .pronunciation("hypotension", "ˌhaɪpoʊˈtɛnʃən") + .text(" or dizziness.") + .pause(500) + .text(" Do not take with ") + .pronunciation("aliskiren", "əˈlɪskɪrɛn") + .text(" or ") + .pronunciation("sacubitril", "səˈkjuːbɪtrɪl") + .pause(500) + .text(" Call your doctor if symptoms worsen.") + .build() + ) + + print(f"Generated text: {text}") + + api_key = os.getenv("DEEPGRAM_API_KEY") + if api_key: + client = DeepgramClient(api_key=api_key) + + response = client.speak.v1.generate( + text, + model=AudioGenerateRequestModel.AURA_ASTERIA_EN, + encoding=AudioGenerateRequestEncoding.LINEAR16, + ) + + with open("output_example5.wav", "wb") as f: + f.write(response) + + print("✓ Audio saved to output_example5.wav") + else: + print("ℹ Set DEEPGRAM_API_KEY to generate audio") + + +def main(): + """Run all examples""" + example_basic_text_builder() + example_add_pronunciation_function() + example_ssml_migration() + example_mixed_ssml_and_builder() + example_pharmacy_instructions() + + print("\n" + "=" * 50) + print("All examples completed!") + print("=" * 50) + + +if __name__ == "__main__": + main() diff --git a/examples/24-text-builder-streaming.py b/examples/24-text-builder-streaming.py new file mode 100644 index 00000000..a239f88e --- /dev/null +++ b/examples/24-text-builder-streaming.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +""" +Example: TextBuilder with Streaming TTS (WebSocket) + +This example demonstrates using TextBuilder with streaming text-to-speech +over WebSocket for real-time audio generation. +""" + +import os +from typing import Union + +from deepgram import DeepgramClient, TextBuilder +from deepgram.core.events import EventType +from deepgram.speak.v1.types import SpeakV1Close, SpeakV1Flush, SpeakV1Text + +SpeakV1SocketClientResponse = Union[str, bytes] + + +def example_streaming_with_textbuilder(): + """Stream TTS audio using TextBuilder for pronunciation control""" + print("Example: Streaming TTS with TextBuilder") + print("-" * 50) + + # Build text with pronunciations and pauses + text = ( + TextBuilder() + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .text(" twice daily with ") + .pronunciation("dupilumab", "duːˈpɪljuːmæb") + .text(" injections.") + .pause(500) + .text(" Do not exceed prescribed dosage.") + .build() + ) + + print(f"Generated text: {text}\n") + + api_key = os.getenv("DEEPGRAM_API_KEY") + if not api_key: + print("ℹ Set DEEPGRAM_API_KEY to stream audio") + return + + client = DeepgramClient(api_key=api_key) + + try: + with client.speak.v1.connect( + model="aura-asteria-en", encoding="linear16", sample_rate=24000 + ) as connection: + + def on_message(message: SpeakV1SocketClientResponse) -> None: + if isinstance(message, bytes): + print(f"Received {len(message)} bytes of audio data") + # Write audio to file + with open("streaming_output.raw", "ab") as audio_file: + audio_file.write(message) + else: + msg_type = getattr(message, "type", "Unknown") + print(f"Received {msg_type} event") + + connection.on(EventType.OPEN, lambda _: print("✓ Connection opened")) + connection.on(EventType.MESSAGE, on_message) + connection.on(EventType.CLOSE, lambda _: print("✓ Connection closed")) + connection.on(EventType.ERROR, lambda error: print(f"✗ Error: {error}")) + + # Send the TextBuilder-generated text + text_message = SpeakV1Text(text=text) + connection.send_speak_v_1_text(text_message) + + # Flush to ensure all text is processed + flush_message = SpeakV1Flush() + connection.send_speak_v_1_flush(flush_message) + + # Close the connection when done + close_message = SpeakV1Close() + connection.send_speak_v_1_close(close_message) + + # Start listening - this blocks until the connection closes + connection.start_listening() + + print("\n✓ Audio saved to streaming_output.raw") + print(" Convert to WAV: ffmpeg -f s16le -ar 24000 -ac 1 -i streaming_output.raw output.wav") + + except Exception as e: + print(f"✗ Error: {e}") + + +def example_multiple_messages(): + """Stream multiple TextBuilder messages sequentially""" + print("\n\nExample: Multiple Messages with Streaming") + print("-" * 50) + + # Build multiple text segments + intro = TextBuilder().text("Welcome to your medication guide.").build() + + instruction1 = ( + TextBuilder() + .text("First, take ") + .pronunciation("methotrexate", "mɛθəˈtrɛkseɪt") + .text(" on Mondays.") + .build() + ) + + instruction2 = ( + TextBuilder() + .text("Then, inject ") + .pronunciation("adalimumab", "ˌædəˈljuːməb") + .text(" on Fridays.") + .build() + ) + + closing = TextBuilder().text("Contact your doctor with any questions.").build() + + api_key = os.getenv("DEEPGRAM_API_KEY") + if not api_key: + print("ℹ Set DEEPGRAM_API_KEY to stream audio") + return + + client = DeepgramClient(api_key=api_key) + + try: + with client.speak.v1.connect( + model="aura-asteria-en", encoding="linear16", sample_rate=24000 + ) as connection: + + audio_chunks = [] + + def on_message(message: SpeakV1SocketClientResponse) -> None: + if isinstance(message, bytes): + audio_chunks.append(message) + print(f"Received {len(message)} bytes") + + connection.on(EventType.OPEN, lambda _: print("✓ Connection opened")) + connection.on(EventType.MESSAGE, on_message) + connection.on(EventType.CLOSE, lambda _: print("✓ Connection closed")) + + # Send multiple messages + for i, text in enumerate([intro, instruction1, instruction2, closing], 1): + print(f"Sending message {i}: {text[:50]}...") + connection.send_speak_v_1_text(SpeakV1Text(text=text)) + + connection.send_speak_v_1_flush(SpeakV1Flush()) + connection.send_speak_v_1_close(SpeakV1Close()) + + connection.start_listening() + + # Save all audio + with open("streaming_multi.raw", "wb") as f: + for chunk in audio_chunks: + f.write(chunk) + + print(f"\n✓ Saved {len(audio_chunks)} audio chunks to streaming_multi.raw") + + except Exception as e: + print(f"✗ Error: {e}") + + +def main(): + """Run all streaming examples""" + example_streaming_with_textbuilder() + example_multiple_messages() + + print("\n" + "=" * 50) + print("All streaming examples completed!") + print("=" * 50) + + +if __name__ == "__main__": + main() + diff --git a/examples/09-voice-agent.py b/examples/30-voice-agent.py similarity index 100% rename from examples/09-voice-agent.py rename to examples/30-voice-agent.py diff --git a/examples/12-text-intelligence.py b/examples/40-text-intelligence.py similarity index 100% rename from examples/12-text-intelligence.py rename to examples/40-text-intelligence.py diff --git a/examples/13-management-projects.py b/examples/50-management-projects.py similarity index 100% rename from examples/13-management-projects.py rename to examples/50-management-projects.py diff --git a/examples/14-management-keys.py b/examples/51-management-keys.py similarity index 100% rename from examples/14-management-keys.py rename to examples/51-management-keys.py diff --git a/examples/15-management-members.py b/examples/52-management-members.py similarity index 100% rename from examples/15-management-members.py rename to examples/52-management-members.py diff --git a/examples/16-management-invites.py b/examples/53-management-invites.py similarity index 100% rename from examples/16-management-invites.py rename to examples/53-management-invites.py diff --git a/examples/17-management-usage.py b/examples/54-management-usage.py similarity index 100% rename from examples/17-management-usage.py rename to examples/54-management-usage.py diff --git a/examples/18-management-billing.py b/examples/55-management-billing.py similarity index 100% rename from examples/18-management-billing.py rename to examples/55-management-billing.py diff --git a/examples/19-management-models.py b/examples/56-management-models.py similarity index 100% rename from examples/19-management-models.py rename to examples/56-management-models.py diff --git a/examples/20-onprem-credentials.py b/examples/60-onprem-credentials.py similarity index 100% rename from examples/20-onprem-credentials.py rename to examples/60-onprem-credentials.py diff --git a/examples/23-request-options.py b/examples/70-request-options.py similarity index 100% rename from examples/23-request-options.py rename to examples/70-request-options.py diff --git a/examples/24-error-handling.py b/examples/71-error-handling.py similarity index 100% rename from examples/24-error-handling.py rename to examples/71-error-handling.py diff --git a/examples/README.md b/examples/README.md index 4499527e..61cf030c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,54 +1,57 @@ # Deepgram Python SDK Examples -This directory contains comprehensive examples demonstrating how to use the Deepgram Python SDK. These examples cover all major use cases and demonstrate production-ready patterns. +This directory contains comprehensive examples demonstrating how to use the Deepgram Python SDK. Examples are organized by feature area, with each section starting at a multiple of 10. ## Examples Overview -### Authentication +### 01-09: Authentication - **01-authentication-api-key.py** - API key authentication - **02-authentication-access-token.py** - Access token authentication -### Transcription +### 10-19: Transcription (Listen) -- **04-transcription-prerecorded-url.py** - Transcribe audio from URL -- **05-transcription-prerecorded-file.py** - Transcribe audio from local file -- **06-transcription-prerecorded-callback.py** - Async transcription with callbacks -- **07-transcription-live-websocket.py** - Live transcription via WebSocket (Listen V1) -- **22-transcription-advanced-options.py** - Advanced transcription options -- **26-transcription-live-websocket-v2.py** - Live transcription via WebSocket (Listen V2) +- **10-transcription-prerecorded-url.py** - Transcribe audio from URL +- **11-transcription-prerecorded-file.py** - Transcribe audio from local file +- **12-transcription-prerecorded-callback.py** - Async transcription with callbacks +- **13-transcription-live-websocket.py** - Live transcription via WebSocket (Listen V1) +- **14-transcription-live-websocket-v2.py** - Live transcription via WebSocket (Listen V2) +- **15-transcription-advanced-options.py** - Advanced transcription options -### Voice Agent +### 20-29: Text-to-Speech (Speak) -- **09-voice-agent.py** - Voice Agent configuration and usage +- **20-text-to-speech-single.py** - Single request TTS (REST API) +- **21-text-to-speech-streaming.py** - Streaming TTS via WebSocket +- **22-text-builder-demo.py** - TextBuilder demo (no API key required) +- **23-text-builder-helper.py** - TextBuilder with REST API generation +- **24-text-builder-streaming.py** - TextBuilder with streaming TTS (WebSocket) -### Text-to-Speech +### 30-39: Voice Agent -- **10-text-to-speech-single.py** - Single request TTS -- **11-text-to-speech-streaming.py** - Streaming TTS via WebSocket +- **30-voice-agent.py** - Voice Agent configuration and usage -### Text Intelligence +### 40-49: Text Intelligence (Read) -- **12-text-intelligence.py** - Text analysis using AI features +- **40-text-intelligence.py** - Text analysis using AI features -### Management API +### 50-59: Management API -- **13-management-projects.py** - Project management (list, get, update, delete) -- **14-management-keys.py** - API key management (list, get, create, delete) -- **15-management-members.py** - Member management (list, remove, scopes) -- **16-management-invites.py** - Invitation management (list, send, delete, leave) -- **17-management-usage.py** - Usage statistics and request information -- **18-management-billing.py** - Billing and balance information -- **19-management-models.py** - Model information +- **50-management-projects.py** - Project management (list, get, update, delete) +- **51-management-keys.py** - API key management (list, get, create, delete) +- **52-management-members.py** - Member management (list, remove, scopes) +- **53-management-invites.py** - Invitation management (list, send, delete, leave) +- **54-management-usage.py** - Usage statistics and request information +- **55-management-billing.py** - Billing and balance information +- **56-management-models.py** - Model information -### On-Premises +### 60-69: On-Premises -- **20-onprem-credentials.py** - On-premises credentials management +- **60-onprem-credentials.py** - On-premises credentials management -### Configuration & Advanced +### 70-79: Configuration & Advanced -- **23-request-options.py** - Request options including additional query parameters -- **24-error-handling.py** - Error handling patterns +- **70-request-options.py** - Request options including additional query parameters +- **71-error-handling.py** - Error handling patterns ## Usage diff --git a/src/deepgram/__init__.py b/src/deepgram/__init__.py index 548f35cb..aa16ece1 100644 --- a/src/deepgram/__init__.py +++ b/src/deepgram/__init__.py @@ -187,6 +187,7 @@ from . import agent, auth, listen, manage, read, self_hosted, speak from .client import AsyncDeepgramClient, DeepgramClient from .environment import DeepgramClientEnvironment + from .helpers import TextBuilder, add_pronunciation, ssml_to_deepgram, validate_ipa, validate_pause from .requests import ( AgentThinkModelsV1ResponseModelsItemIdParams, AgentThinkModelsV1ResponseModelsItemOneParams, @@ -604,7 +605,12 @@ "SpeakV1Model": ".types", "SpeakV1Response": ".types", "SpeakV1SampleRate": ".types", + "TextBuilder": ".helpers", "UpdateProjectMemberScopesV1Response": ".types", + "add_pronunciation": ".helpers", + "ssml_to_deepgram": ".helpers", + "validate_ipa": ".helpers", + "validate_pause": ".helpers", "UpdateProjectMemberScopesV1ResponseParams": ".requests", "UpdateProjectV1Response": ".types", "UpdateProjectV1ResponseParams": ".requests", @@ -943,7 +949,12 @@ def __dir__(): "SpeakV1Model", "SpeakV1Response", "SpeakV1SampleRate", + "TextBuilder", "UpdateProjectMemberScopesV1Response", + "add_pronunciation", + "ssml_to_deepgram", + "validate_ipa", + "validate_pause", "UpdateProjectMemberScopesV1ResponseParams", "UpdateProjectV1Response", "UpdateProjectV1ResponseParams", diff --git a/src/deepgram/helpers/README.md b/src/deepgram/helpers/README.md new file mode 100644 index 00000000..bfb4e214 --- /dev/null +++ b/src/deepgram/helpers/README.md @@ -0,0 +1,56 @@ +# Deepgram SDK Helpers + +This module contains custom helper utilities for working with Deepgram APIs that are not auto-generated. + +## TextBuilder + +The `TextBuilder` class provides a fluent interface for constructing Text-to-Speech (TTS) text with pronunciation and pause controls. + +### Quick Example + +```python +from deepgram import DeepgramClient, TextBuilder + +# Build text with pronunciations and pauses +text = ( + TextBuilder() + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .pause(500) + .text(" twice daily.") + .build() +) + +# Use with Deepgram TTS +client = DeepgramClient(api_key="YOUR_API_KEY") +response = client.speak.v1.generate(text, model="aura-asteria-en") +``` + +### Available Functions + +#### TextBuilder Class + +- `text(content: str)` - Add plain text +- `pronunciation(word: str, ipa: str)` - Add word with IPA pronunciation +- `pause(duration_ms: int)` - Add pause (500-5000ms, 100ms increments) +- `from_ssml(ssml_text: str)` - Parse and convert SSML markup +- `build()` - Return final formatted text + +#### Standalone Functions + +- `add_pronunciation(text, word, ipa)` - Replace word with pronunciation +- `ssml_to_deepgram(ssml_text)` - Convert SSML to Deepgram format +- `validate_ipa(ipa)` - Validate IPA pronunciation string +- `validate_pause(duration_ms)` - Validate pause duration + +### Documentation + +See [TextBuilder-Guide.md](../../../docs/TextBuilder-Guide.md) for comprehensive documentation. + +### Examples + +See [examples/25-text-builder-helper.py](../../../examples/25-text-builder-helper.py) for usage examples. + +## Future Helpers + +This module may be extended with additional helper utilities for other Deepgram features. diff --git a/src/deepgram/helpers/__init__.py b/src/deepgram/helpers/__init__.py new file mode 100644 index 00000000..d975d017 --- /dev/null +++ b/src/deepgram/helpers/__init__.py @@ -0,0 +1,21 @@ +""" +Deepgram Helper Utilities + +Custom helper functions and classes for working with Deepgram APIs. +""" + +from .text_builder import ( + TextBuilder, + add_pronunciation, + ssml_to_deepgram, + validate_ipa, + validate_pause, +) + +__all__ = [ + "TextBuilder", + "add_pronunciation", + "ssml_to_deepgram", + "validate_ipa", + "validate_pause", +] diff --git a/src/deepgram/helpers/text_builder.py b/src/deepgram/helpers/text_builder.py new file mode 100644 index 00000000..b859b217 --- /dev/null +++ b/src/deepgram/helpers/text_builder.py @@ -0,0 +1,332 @@ +""" +TTS Text Builder and Utilities + +Provides helper methods for constructing TTS text with pronunciation, pause, +and speed controls for Deepgram's Text-to-Speech API. +""" + +import json +import re +from typing import Tuple + + +class TextBuilder: + """ + Fluent builder for constructing TTS text with pronunciation and pause controls. + + Example: + text = TextBuilder() \\ + .text("Take ") \\ + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") \\ + .text(" twice daily with ") \\ + .pronunciation("dupilumab", "duːˈpɪljuːmæb") \\ + .text(" injections") \\ + .pause(500) \\ + .text(" Do not exceed prescribed dosage.") \\ + .build() + """ + + def __init__(self): + """Initialize empty text builder.""" + self._parts = [] + self._pronunciation_count = 0 + self._pause_count = 0 + self._char_count = 0 + + def text(self, content: str) -> "TextBuilder": + """ + Add plain text. Returns self for chaining. + + Args: + content: Plain text to add + + Returns: + Self for method chaining + """ + if content: + self._parts.append(content) + self._char_count += len(content) + return self + + def pronunciation(self, word: str, ipa: str) -> "TextBuilder": + """ + Add a word with custom pronunciation. + Formats as: {"word": "word", "pronounce":"ipa"} + Returns self for chaining. + + Args: + word: The word to be pronounced + ipa: IPA pronunciation string + + Returns: + Self for method chaining + + Raises: + ValueError: If pronunciation limit exceeded or validation fails + """ + # Validate IPA + is_valid, error_msg = validate_ipa(ipa) + if not is_valid: + raise ValueError(error_msg) + + # Check pronunciation limit + if self._pronunciation_count >= 500: + raise ValueError("Maximum 500 pronunciations per request exceeded") + + # Format as JSON (ensure proper escaping) + pronunciation_json = json.dumps({"word": word, "pronounce": ipa}, ensure_ascii=False) + self._parts.append(pronunciation_json) + self._pronunciation_count += 1 + self._char_count += len(word) # Count original word, not IPA + + return self + + def pause(self, duration_ms: int) -> "TextBuilder": + """ + Add a pause in milliseconds. + Formats as: {pause:duration_ms} + Valid range: 500-5000ms in 100ms increments. + Returns self for chaining. + + Args: + duration_ms: Pause duration in milliseconds (500-5000, increments of 100) + + Returns: + Self for method chaining + + Raises: + ValueError: If pause limit exceeded or validation fails + """ + # Validate pause + is_valid, error_msg = validate_pause(duration_ms) + if not is_valid: + raise ValueError(error_msg) + + # Check pause limit + if self._pause_count >= 50: + raise ValueError("Maximum 50 pauses per request exceeded") + + # Format as JSON-style pause marker + self._parts.append(f"{{pause:{duration_ms}}}") + self._pause_count += 1 + + return self + + def from_ssml(self, ssml_text: str) -> "TextBuilder": + """ + Parse SSML and convert to Deepgram's inline format. + Supports: + - word → pronunciation() + - → pause() + - Plain text → text() + Returns self for chaining. + + Args: + ssml_text: SSML-formatted text + + Returns: + Self for method chaining + """ + # Convert SSML to Deepgram format and append + converted = ssml_to_deepgram(ssml_text) + if converted: + self._parts.append(converted) + # Update counters by parsing the converted text + self._update_counts_from_text(converted) + + return self + + def _update_counts_from_text(self, text: str) -> None: + """Update internal counters from parsed text.""" + # Count pronunciations (JSON objects with "word" and "pronounce") + pronunciation_pattern = r'\{"word":\s*"[^"]*",\s*"pronounce":\s*"[^"]*"\}' + pronunciations = re.findall(pronunciation_pattern, text) + self._pronunciation_count += len(pronunciations) + + # Count pauses + pause_pattern = r"\{pause:\d+\}" + pauses = re.findall(pause_pattern, text) + self._pause_count += len(pauses) + + # Character count (approximate - remove control syntax) + clean_text = re.sub(pronunciation_pattern, "", text) + clean_text = re.sub(pause_pattern, "", clean_text) + self._char_count += len(clean_text) + + def build(self) -> str: + """ + Return the final formatted text string. + + Returns: + The complete formatted text ready for TTS + + Raises: + ValueError: If character limit exceeded + """ + result = "".join(self._parts) + + # Validate character count (2000 max, excluding control syntax) + if self._char_count > 2000: + raise ValueError(f"Text exceeds 2000 character limit (current: {self._char_count} characters)") + + return result + + +def add_pronunciation(text: str, word: str, ipa: str) -> str: + """ + Replace word in text with pronunciation control. + + Args: + text: Source text containing the word + word: Word to replace + ipa: IPA pronunciation string + + Returns: + Text with word replaced by {"word": "word", "pronounce":"ipa"} + + Example: + text = "Take azathioprine twice daily with dupilumab injections." + text = add_pronunciation(text, "azathioprine", "ˌæzəˈθaɪəpriːn") + text = add_pronunciation(text, "dupilumab", "duːˈpɪljuːmæb") + """ + # Validate IPA + is_valid, error_msg = validate_ipa(ipa) + if not is_valid: + raise ValueError(error_msg) + + # Create pronunciation JSON + pronunciation_json = json.dumps({"word": word, "pronounce": ipa}, ensure_ascii=False) + + # Replace word with pronunciation (case-sensitive, whole word only) + pattern = r"\b" + re.escape(word) + r"\b" + result = re.sub(pattern, pronunciation_json, text, count=1) + + return result + + +def ssml_to_deepgram(ssml_text: str) -> str: + """ + Convert SSML markup to Deepgram's inline JSON format. + + Supports: + - word + - or + - Strips wrapper tags + + Args: + ssml_text: SSML-formatted text + + Returns: + Deepgram-formatted text + + Example: + ssml = ''' + Take azathioprine + Do not exceed dosage. + ''' + text = ssml_to_deepgram(ssml) + """ + # Strip leading/trailing whitespace + ssml_text = ssml_text.strip() + + # If wrapped in tags, extract content + speak_pattern = r"]*>(.*?)" + speak_match = re.search(speak_pattern, ssml_text, re.DOTALL) + if speak_match: + ssml_text = speak_match.group(1) + + # Process the SSML text + # Parse XML fragments manually to handle mixed content + # Use regex to find and replace SSML elements + + # Handle tags + phoneme_pattern = r'(.*?)' + + def replace_phoneme(match): + ipa = match.group(1) + word = match.group(2) + return json.dumps({"word": word, "pronounce": ipa}, ensure_ascii=False) + + ssml_text = re.sub(phoneme_pattern, replace_phoneme, ssml_text) + + # Handle tags + break_pattern = r'' + + def replace_break(match): + value = float(match.group(1)) + unit = match.group(2) + + # Convert to milliseconds + if unit == "s": + duration_ms = int(value * 1000) + else: + duration_ms = int(value) + + # Validate + is_valid, error_msg = validate_pause(duration_ms) + if not is_valid: + # Round to nearest valid value + duration_ms = max(500, min(5000, round(duration_ms / 100) * 100)) + + return f"{{pause:{duration_ms}}}" + + ssml_text = re.sub(break_pattern, replace_break, ssml_text) + + # Remove any remaining XML tags + ssml_text = re.sub(r"<[^>]+>", "", ssml_text) + + return ssml_text.strip() + + +def validate_ipa(ipa: str) -> Tuple[bool, str]: + """ + Validate IPA string format. + + Args: + ipa: IPA pronunciation string + + Returns: + Tuple of (is_valid, error_message) + """ + if not ipa: + return False, "IPA pronunciation cannot be empty" + + if not isinstance(ipa, str): + return False, "IPA pronunciation must be a string" + + # IPA should not contain certain characters that would break JSON + invalid_chars = ['"', "\\", "\n", "\r", "\t"] + for char in invalid_chars: + if char in ipa: + return False, f"IPA pronunciation contains invalid character: {repr(char)}" + + # IPA should be reasonable length (max 100 characters) + if len(ipa) > 100: + return False, "IPA pronunciation exceeds 100 character limit" + + return True, "" + + +def validate_pause(duration_ms: int) -> Tuple[bool, str]: + """ + Validate pause duration (500-5000ms, 100ms increments). + + Args: + duration_ms: Pause duration in milliseconds + + Returns: + Tuple of (is_valid, error_message) + """ + if not isinstance(duration_ms, int): + return False, "Pause duration must be an integer" + + if duration_ms < 500: + return False, "Pause duration must be at least 500ms" + + if duration_ms > 5000: + return False, "Pause duration must not exceed 5000ms" + + if duration_ms % 100 != 0: + return False, "Pause duration must be in 100ms increments" + + return True, "" diff --git a/tests/custom/test_text_builder.py b/tests/custom/test_text_builder.py new file mode 100644 index 00000000..77a7ed1b --- /dev/null +++ b/tests/custom/test_text_builder.py @@ -0,0 +1,500 @@ +""" +Tests for TextBuilder and TTS helper utilities +""" + +import pytest +from deepgram.helpers import ( + TextBuilder, + add_pronunciation, + ssml_to_deepgram, + validate_ipa, + validate_pause, +) + + +class TestTextBuilder: + """Tests for the TextBuilder class""" + + def test_basic_text(self): + """Test adding plain text""" + builder = TextBuilder() + result = builder.text("Hello world").build() + assert result == "Hello world" + + def test_multiple_text_parts(self): + """Test chaining multiple text parts""" + builder = TextBuilder() + result = builder.text("Hello ").text("world").build() + assert result == "Hello world" + + def test_pronunciation(self): + """Test adding pronunciation""" + builder = TextBuilder() + result = builder.pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn").build() + assert '"word": "azathioprine"' in result + assert '"pronounce": "ˌæzəˈθaɪəpriːn"' in result + + def test_text_with_pronunciation(self): + """Test mixing text and pronunciation""" + builder = TextBuilder() + result = ( + builder + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .text(" twice daily") + .build() + ) + assert "Take " in result + assert '"word": "azathioprine"' in result + assert " twice daily" in result + + def test_pause(self): + """Test adding pause""" + builder = TextBuilder() + result = builder.pause(500).build() + assert result == "{pause:500}" + + def test_text_with_pause(self): + """Test mixing text and pause""" + builder = TextBuilder() + result = ( + builder + .text("Hello") + .pause(1000) + .text("world") + .build() + ) + assert result == "Hello{pause:1000}world" + + def test_complex_chain(self): + """Test complex chaining with all features""" + builder = TextBuilder() + result = ( + builder + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .text(" twice daily with ") + .pronunciation("dupilumab", "duːˈpɪljuːmæb") + .text(" injections") + .pause(500) + .text(" Do not exceed prescribed dosage.") + .build() + ) + + assert "Take " in result + assert '"word": "azathioprine"' in result + assert " twice daily with " in result + assert '"word": "dupilumab"' in result + assert " injections" in result + assert "{pause:500}" in result + assert " Do not exceed prescribed dosage." in result + + def test_pronunciation_limit(self): + """Test pronunciation count limit (500 max)""" + builder = TextBuilder() + + # Add 500 pronunciations (should work) + for i in range(500): + builder.pronunciation(f"word{i}", "test") + + # 501st should raise error + with pytest.raises(ValueError, match="Maximum 500 pronunciations"): + builder.pronunciation("extra", "test") + + def test_pause_limit(self): + """Test pause count limit (50 max)""" + builder = TextBuilder() + + # Add 50 pauses (should work) + for i in range(50): + builder.pause(500) + + # 51st should raise error + with pytest.raises(ValueError, match="Maximum 50 pauses"): + builder.pause(500) + + def test_character_limit(self): + """Test character count validation (2000 max)""" + builder = TextBuilder() + + # Add text approaching the limit + builder.text("x" * 2000) + + # Should work at exactly 2000 + result = builder.build() + assert len(result) == 2000 + + # Exceeding should raise error + builder2 = TextBuilder() + builder2.text("x" * 2001) + with pytest.raises(ValueError, match="exceeds 2000 character limit"): + builder2.build() + + def test_empty_builder(self): + """Test building with no content""" + builder = TextBuilder() + result = builder.build() + assert result == "" + + def test_invalid_ipa(self): + """Test validation of IPA pronunciation""" + builder = TextBuilder() + + # Should reject IPA with invalid characters + with pytest.raises(ValueError, match="invalid character"): + builder.pronunciation("word", 'invalid"quote') + + with pytest.raises(ValueError, match="invalid character"): + builder.pronunciation("word", "invalid\nline") + + def test_invalid_pause_duration(self): + """Test pause duration validation""" + builder = TextBuilder() + + # Too short + with pytest.raises(ValueError, match="at least 500ms"): + builder.pause(400) + + # Too long + with pytest.raises(ValueError, match="not exceed 5000ms"): + builder.pause(5001) + + # Not in 100ms increments + with pytest.raises(ValueError, match="100ms increments"): + builder.pause(550) + + def test_pause_boundary_values(self): + """Test pause at valid boundaries""" + builder = TextBuilder() + + # Minimum valid + result1 = builder.pause(500).build() + assert "{pause:500}" in result1 + + # Maximum valid + builder2 = TextBuilder() + result2 = builder2.pause(5000).build() + assert "{pause:5000}" in result2 + + +class TestAddPronunciation: + """Tests for the add_pronunciation function""" + + def test_basic_replacement(self): + """Test basic word replacement""" + text = "Take azathioprine twice daily." + result = add_pronunciation(text, "azathioprine", "ˌæzəˈθaɪəpriːn") + + assert '"word": "azathioprine"' in result + assert '"pronounce": "ˌæzəˈθaɪəpriːn"' in result + assert "Take " in result + assert " twice daily." in result + + def test_multiple_replacements(self): + """Test replacing multiple words""" + text = "Take azathioprine twice daily with dupilumab injections." + text = add_pronunciation(text, "azathioprine", "ˌæzəˈθaɪəpriːn") + text = add_pronunciation(text, "dupilumab", "duːˈpɪljuːmæb") + + assert '"word": "azathioprine"' in text + assert '"word": "dupilumab"' in text + + def test_whole_word_only(self): + """Test that replacement only affects whole words""" + text = "The therapist prescribed therapy." + result = add_pronunciation(text, "The", "ðiː") + + # Should only replace "The", not "the" in "therapist" or "therapy" + assert result.count('"word"') == 1 + assert "therapist" in result + assert "therapy" in result + + def test_case_sensitive(self): + """Test that replacement is case-sensitive""" + text = "Take azathioprine. AZATHIOPRINE is different." + result = add_pronunciation(text, "azathioprine", "test") + + # Should only replace lowercase version (first occurrence) + assert result.count('"word"') == 1 + assert "AZATHIOPRINE" in result + + def test_word_not_found(self): + """Test replacement when word is not in text""" + text = "Hello world" + result = add_pronunciation(text, "missing", "test") + + # Text should be unchanged + assert result == text + + +class TestSsmlToDeepgram: + """Tests for SSML conversion""" + + def test_basic_phoneme(self): + """Test converting basic phoneme tag""" + ssml = 'azathioprine' + result = ssml_to_deepgram(ssml) + + assert '"word": "azathioprine"' in result + assert '"pronounce": "ˌæzəˈθaɪəpriːn"' in result + + def test_basic_break(self): + """Test converting break tag (milliseconds)""" + ssml = '' + result = ssml_to_deepgram(ssml) + + assert result == "{pause:500}" + + def test_break_seconds(self): + """Test converting break tag (seconds)""" + ssml = '' + result = ssml_to_deepgram(ssml) + + assert result == "{pause:500}" + + def test_speak_wrapper(self): + """Test handling wrapper tag""" + ssml = 'Hello world' + result = ssml_to_deepgram(ssml) + + assert result == "Hello world" + + def test_complex_ssml(self): + """Test complex SSML with multiple elements""" + ssml = ''' + Take azathioprine + Do not exceed dosage. + ''' + result = ssml_to_deepgram(ssml) + + assert '"word": "azathioprine"' in result + assert "{pause:500}" in result + assert "Do not exceed dosage." in result + + def test_multiple_phonemes(self): + """Test multiple phoneme tags""" + ssml = '''Take azathioprine + with dupilumab''' + result = ssml_to_deepgram(ssml) + + assert '"word": "azathioprine"' in result + assert '"word": "dupilumab"' in result + + def test_plain_text(self): + """Test plain text without SSML tags""" + text = "Hello world" + result = ssml_to_deepgram(text) + + assert result == text + + def test_break_out_of_range(self): + """Test break with out-of-range duration (should round to valid)""" + ssml = '' + result = ssml_to_deepgram(ssml) + + # Should round to nearest valid value (500ms) + assert "{pause:" in result + + +class TestFromSsml: + """Tests for TextBuilder.from_ssml() method""" + + def test_from_ssml_basic(self): + """Test parsing SSML into builder""" + ssml = 'Hello world' + builder = TextBuilder() + result = builder.from_ssml(ssml).build() + + assert "Hello world" in result + + def test_from_ssml_with_additional_text(self): + """Test mixing SSML parsing with additional builder methods""" + ssml = 'Take medicine' + builder = TextBuilder() + result = ( + builder + .from_ssml(ssml) + .pause(500) + .text(" Do not exceed dosage.") + .build() + ) + + assert '"word": "medicine"' in result + assert "{pause:500}" in result + assert "Do not exceed dosage." in result + + def test_from_ssml_counts_pronunciations(self): + """Test that from_ssml updates internal counters""" + # Create SSML with pronunciations + pronunciations = ''.join([ + f'word{i} ' + for i in range(500) + ]) + ssml = f'{pronunciations}' + + builder = TextBuilder() + builder.from_ssml(ssml) + + # Should hit the limit + with pytest.raises(ValueError, match="Maximum 500 pronunciations"): + builder.pronunciation("extra", "test") + + +class TestValidateIpa: + """Tests for IPA validation""" + + def test_valid_ipa(self): + """Test valid IPA strings""" + is_valid, msg = validate_ipa("ˌæzəˈθaɪəpriːn") + assert is_valid is True + assert msg == "" + + def test_empty_ipa(self): + """Test empty IPA string""" + is_valid, msg = validate_ipa("") + assert is_valid is False + assert "cannot be empty" in msg + + def test_invalid_characters(self): + """Test IPA with invalid characters""" + # Double quote + is_valid, msg = validate_ipa('test"quote') + assert is_valid is False + assert "invalid character" in msg + + # Newline + is_valid, msg = validate_ipa("test\nline") + assert is_valid is False + assert "invalid character" in msg + + def test_too_long(self): + """Test IPA exceeding length limit""" + is_valid, msg = validate_ipa("x" * 101) + assert is_valid is False + assert "exceeds 100 character limit" in msg + + def test_not_string(self): + """Test non-string IPA""" + is_valid, msg = validate_ipa(123) + assert is_valid is False + assert "must be a string" in msg + + +class TestValidatePause: + """Tests for pause validation""" + + def test_valid_pauses(self): + """Test valid pause durations""" + # Minimum + is_valid, msg = validate_pause(500) + assert is_valid is True + + # Maximum + is_valid, msg = validate_pause(5000) + assert is_valid is True + + # Mid-range + is_valid, msg = validate_pause(2500) + assert is_valid is True + + def test_too_short(self): + """Test pause below minimum""" + is_valid, msg = validate_pause(400) + assert is_valid is False + assert "at least 500ms" in msg + + def test_too_long(self): + """Test pause above maximum""" + is_valid, msg = validate_pause(5001) + assert is_valid is False + assert "not exceed 5000ms" in msg + + def test_invalid_increment(self): + """Test pause not in 100ms increments""" + is_valid, msg = validate_pause(550) + assert is_valid is False + assert "100ms increments" in msg + + def test_not_integer(self): + """Test non-integer pause""" + is_valid, msg = validate_pause(500.5) + assert is_valid is False + assert "must be an integer" in msg + + +class TestIntegration: + """Integration tests combining multiple features""" + + def test_medical_example(self): + """Test the medical prescription example from the spec""" + text = ( + TextBuilder() + .text("Take ") + .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn") + .text(" twice daily with ") + .pronunciation("dupilumab", "duːˈpɪljuːmæb") + .text(" injections") + .pause(500) + .text(" Do not exceed prescribed dosage.") + .build() + ) + + # Verify all components are present + assert "Take " in text + assert '"word": "azathioprine"' in text + assert '"pronounce": "ˌæzəˈθaɪəpriːn"' in text + assert " twice daily with " in text + assert '"word": "dupilumab"' in text + assert '"pronounce": "duːˈpɪljuːmæb"' in text + assert " injections" in text + assert "{pause:500}" in text + assert " Do not exceed prescribed dosage." in text + + def test_ssml_migration(self): + """Test SSML to Deepgram migration workflow""" + ssml = ''' + Take azathioprine + Do not exceed dosage. + ''' + + # Method 1: Direct conversion + text1 = ssml_to_deepgram(ssml) + + # Method 2: Using builder + text2 = TextBuilder().from_ssml(ssml).build() + + # Both should produce similar results + assert '"word": "azathioprine"' in text1 + assert "{pause:500}" in text1 + assert '"word": "azathioprine"' in text2 + assert "{pause:500}" in text2 + + def test_builder_with_ssml_and_additions(self): + """Test the mixed usage example from the spec""" + some_imported_ssml = ''' + Take medicine + ''' + + text = ( + TextBuilder() + .from_ssml(some_imported_ssml) + .pause(500) + .text(" Do not exceed prescribed dosage.") + .build() + ) + + assert '"word": "medicine"' in text + assert "{pause:500}" in text + assert " Do not exceed prescribed dosage." in text + + def test_standalone_function_workflow(self): + """Test using standalone add_pronunciation function""" + text = "Take azathioprine twice daily with dupilumab injections." + text = add_pronunciation(text, "azathioprine", "ˌæzəˈθaɪəpriːn") + text = add_pronunciation(text, "dupilumab", "duːˈpɪljuːmæb") + + assert '"word": "azathioprine"' in text + assert '"pronounce": "ˌæzəˈθaɪəpriːn"' in text + assert '"word": "dupilumab"' in text + assert '"pronounce": "duːˈpɪljuːmæb"' in text +