o
    lWi:                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ zd dl	m
Z W n ey5   edw ddlmZ G d	d
 d
eeZG dd deeZG dd dejddZG dd dejddZG dd dZdS )    N)Enum)overload)Required)connectzgThe websockets package is required for realtime speech-to-text. Install it with: pip install websockets   )RealtimeConnectionc                   @   s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
AudioFormatz/Audio format options for realtime transcriptionpcm_8000	pcm_16000	pcm_22050	pcm_24000	pcm_44100	pcm_48000	ulaw_8000N)__name__
__module____qualname____doc__PCM_8000	PCM_16000	PCM_22050	PCM_24000	PCM_44100	PCM_48000	ULAW_8000 r   r   ^/var/www/html/asistente-voz-ia/venv/lib/python3.10/site-packages/elevenlabs/realtime/scribe.pyr      s    r   c                   @   s   e Zd ZdZdZdZdS )CommitStrategyz
    Strategy for committing transcription results.

    VAD: Voice Activity Detection - automatically commits when speech ends
    MANUAL: Manual commit - requires calling commit() to commit the segment
    vadmanualN)r   r   r   r   VADMANUALr   r   r   r   r       s    r   c                   @   sn   e Zd ZU dZee ed< ee ed< ee ed< e	ed< e
ed< e
ed< eed< eed	< eed
< eed< dS )RealtimeAudioOptionsa0  
    Options for providing audio chunks manually.

    Attributes:
        model_id: The model ID to use for transcription (required)
        audio_format: The audio format (required)
        sample_rate: The sample rate in Hz (required)
        commit_strategy: Strategy for committing transcriptions (optional, defaults to MANUAL)
        vad_silence_threshold_secs: Silence threshold in seconds for VAD (must be between 0.3 and 3.0)
        vad_threshold: Threshold for voice activity detection (must be between 0.1 and 0.9)
        min_speech_duration_ms: Minimum speech duration in milliseconds (must be between 50 and 2000)
        min_silence_duration_ms: Minimum silence duration in milliseconds (must be between 50 and 2000)
        language_code: An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand.
        include_timestamps: Whether to receive the committed_transcript_with_timestamps event after committing the segment (optional, defaults to False)
    model_idaudio_formatsample_ratecommit_strategyvad_silence_threshold_secsvad_thresholdmin_speech_duration_msmin_silence_duration_mslanguage_codeinclude_timestampsN)r   r   r   r   r   str__annotations__r   intr   floatboolr   r   r   r   r"   +   s   
 r"   F)totalc                   @   sb   e Zd ZU dZee ed< ee ed< eed< eed< eed< e	ed< e	ed< eed	< e
ed
< dS )RealtimeUrlOptionsa  
    Options for streaming audio from a URL.

    Attributes:
        model_id: The model ID to use for transcription (required)
        url: The URL of the audio stream (required)
        commit_strategy: Strategy for committing transcriptions (optional, defaults to MANUAL)
        vad_silence_threshold_secs: Silence threshold in seconds for VAD (must be between 0.3 and 3.0)
        vad_threshold: Threshold for voice activity detection (must be between 0.1 and 0.9)
        min_speech_duration_ms: Minimum speech duration in milliseconds (must be between 50 and 2000)
        min_silence_duration_ms: Minimum silence duration in milliseconds (must be between 50 and 2000)
        language_code: An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand.
        include_timestamps: Whether to receive the committed_transcript_with_timestamps event after committing the segment (optional, defaults to False)
    r#   urlr&   r'   r(   r)   r*   r+   r,   N)r   r   r   r   r   r-   r.   r   r0   r/   r1   r   r   r   r   r3   G   s   
 r3   c                   @   s  e Zd ZdZd dedefddZededefd	d
Z	ede
defdd
Z	dejee
f defdd
Z	dedefddZde
defddZdeddfddZ						d!dedededeje deje deje deje deje deje defddZdS )"ScribeRealtimea+  
    Helper class for creating realtime speech-to-text connections.

    Supports two modes:
    1. URL-based: Stream audio from a URL (uses ffmpeg for conversion)
    2. Manual: Send audio chunks yourself

    Example (URL-based):
        ```python
        connection = await elevenlabs.speech_to_text.realtime.connect({
            "model_id": "scribe_v2_realtime",
            "url": "https://stream.example.com/audio.mp3"
        })
        ```

    Example (Manual chunks):
        ```python
        connection = await elevenlabs.speech_to_text.realtime.connect({
            "model_id": "scribe_v2_realtime",
            "audio_format": AudioFormat.PCM_16000,
            "sample_rate": 16000
        })

        # Send audio chunks
        await connection.send({"audio_base_64": chunk})
        ```
    wss://api.elevenlabs.ioapi_keybase_urlc                 C   s   || _ || _d S N)r7   r8   )selfr7   r8   r   r   r   __init__~   s   
zScribeRealtime.__init__optionsreturnc                       d S r9   r   r:   r<   r   r   r   r         zScribeRealtime.connectc                    r>   r9   r   r?   r   r   r   r      r@   c                    sN   d|v }d|vrt d|r| tt|I dH S | tt|I dH S )a  
        Create a realtime transcription connection.

        Args:
            options: Either RealtimeAudioOptions for manual chunk sending or RealtimeUrlOptions for URL streaming

        Returns:
            RealtimeConnection instance ready to send/receive data

        Raises:
            ValueError: If invalid options are provided
            RuntimeError: If ffmpeg is not available (for URL-based streaming)

        Example:
            ```python
            # URL-based streaming
            connection = await elevenlabs.speech_to_text.realtime.connect({
                "model_id": "scribe_v2_realtime",
                "url": "https://stream.example.com/audio.mp3",
            })

            # Manual chunks
            connection = await elevenlabs.speech_to_text.realtime.connect({
                "model_id": "scribe_v2_realtime",
                "audio_format": AudioFormat.PCM_16000,
                "sample_rate": 16000,
                "commit_strategy": CommitStrategy.MANUAL
            })
            ```
        r4   r#   z/model_id is required for realtime transcriptionN)
ValueError_connect_urltypingcastr3   _connect_audior"   )r:   r<   is_url_moder   r   r   r      s   #c                    s   |d }| d}| d}| dtj}| d}| d}| d}| d}	| d	}
| d
d}|r9|s=td| j||j|j||||	|
|d	}t|d| jidI dH }t||dd}t	
| |_|d |S )z'Connect with manual audio chunk sendingr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   Fz?audio_format and sample_rate are required for manual audio mode	r#   r$   r&   r'   r(   r)   r*   r+   r,   
xi-api-keyadditional_headersN	websocketcurrent_sample_rateffmpeg_processopen)getr   r!   rA   _build_websocket_urlvaluewebsocket_connectr7   r   asynciocreate_task_start_message_handler_message_task_emit)r:   r<   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   ws_urlrL   
connectionr   r   r   rE      sH   







zScribeRealtime._connect_audioc                    sD  |d }| d}| dtj}| d}| d}| d}| d}| d}	| d	d
}
|s6tdd}tj}| j|||j|||||	|
d	}t|d| j	idI dH }zt
jdd|dddt|dddg
t
jt
jdd}W n ty   |ddI dH  tdw t|||d}t| |_|d t| | |S ) z3Connect with URL-based audio streaming using ffmpegr#   r4   r&   r'   r(   r)   r*   r+   r,   Fzurl is required for URL modei>  rG   rH   rI   Nffmpegz-iz-fs16lez-arz-ac1-r   )stdoutstderrbufsizei  zffmpeg not foundziffmpeg is required for URL-based audio streaming. Please install ffmpeg: https://ffmpeg.org/download.htmlrK   rO   )rP   r   r!   rA   r   r   rQ   rR   rS   r7   
subprocessPopenr-   PIPEDEVNULLFileNotFoundErrorcloseRuntimeErrorr   rT   rU   rV   rW   rX   _stream_ffmpeg_to_websocket)r:   r<   r#   r4   r&   r'   r(   r)   r*   r+   r,   r%   r$   rY   rL   rN   rZ   r   r   r   rB      sr   







zScribeRealtime._connect_urlrZ   Nc                    s  |j r|j js
dS zzTd}t }	 |d|j jj|I dH }|s#n;t|d}z|	d|iI dH  W n t
yT } z|ddd| i W Y d}~nd}~ww tdI dH  qW n t
y| } z|ddd	| i W Y d}~nd}~ww W |j r|j   z
|j jd
d W dS  tjy   |j   Y dS w dS |j r|j   z	|j jd
d W w  tjy   |j   Y w w w )z-Stream audio from ffmpeg process to WebSocketNi    Tzutf-8audio_base_64errorzFailed to send audio: g{Gz?zFFmpeg streaming error: r   )timeout)rN   r_   rT   get_event_looprun_in_executorreadbase64	b64encodedecodesend	ExceptionrX   sleepkillwaitrb   TimeoutExpired)r:   rZ   
chunk_sizeloopchunkchunk_base64er   r   r   ri   7  sZ   
"


z*ScribeRealtime._stream_ffmpeg_to_websocketr#   r$   r&   r'   r(   r)   r*   r+   r,   c
                 C   s   | j dddd}
d| d| d| g}|dur%|d	|  |dur1|d
|  |dur=|d|  |durI|d|  |durU|d|  |	dura|d|	  d|}|
 d| S )z-Build the WebSocket URL with query parameterszhttps://zwss://zhttp://zws://z	model_id=zaudio_format=zcommit_strategy=Nzvad_silence_threshold_secs=zvad_threshold=zmin_speech_duration_ms=zmin_silence_duration_ms=zlanguage_code=zinclude_timestamps=&z/v1/speech-to-text/realtime?)r8   replaceappendjoin)r:   r#   r$   r&   r'   r(   r)   r*   r+   r,   baseparamsquery_stringr   r   r   rQ   e  s&   
z#ScribeRealtime._build_websocket_url)r6   )NNNNNN)r   r   r   r   r-   r;   r   r"   r   r   r3   rC   UnionrE   rB   ri   Optionalr0   r/   r1   rQ   r   r   r   r   r5   a   sd    
-0L3	
r5   )rT   rp   rb   rC   enumr   r   typing_extensionsr   websockets.asyncio.clientr   rS   ImportErrorrZ   r   r-   r   r   	TypedDictr"   r3   r5   r   r   r   r   <module>   s(    