- Navigation GuideYou are on a Command (operation) page with structural examples. Use the navigation breadcrumb if you would like to return to the Client landing page.
StartStreamTranscriptionCommand
Starts a bidirectional HTTP/2 or WebSocket stream where audio is streamed to Amazon Transcribe and the transcription results are streamed to your application.
The following parameters are required:
-
language-code
oridentify-language
oridentify-multiple-language
-
media-encoding
-
sample-rate
For more information on streaming with Amazon Transcribe, see Transcribing streaming audio .
Example Syntax
Use a bare-bones client and the command you need to make an API call.
import { TranscribeStreamingClient, StartStreamTranscriptionCommand } from "@aws-sdk/client-transcribe-streaming"; // ES Modules import
// const { TranscribeStreamingClient, StartStreamTranscriptionCommand } = require("@aws-sdk/client-transcribe-streaming"); // CommonJS import
const client = new TranscribeStreamingClient(config);
const input = { // StartStreamTranscriptionRequest
LanguageCode: "en-US" || "en-GB" || "es-US" || "fr-CA" || "fr-FR" || "en-AU" || "it-IT" || "de-DE" || "pt-BR" || "ja-JP" || "ko-KR" || "zh-CN" || "th-TH" || "es-ES" || "ar-SA" || "pt-PT" || "ca-ES" || "ar-AE" || "hi-IN" || "zh-HK" || "nl-NL" || "no-NO" || "sv-SE" || "pl-PL" || "fi-FI" || "zh-TW" || "en-IN" || "en-IE" || "en-NZ" || "en-AB" || "en-ZA" || "en-WL" || "de-CH" || "af-ZA" || "eu-ES" || "hr-HR" || "cs-CZ" || "da-DK" || "fa-IR" || "gl-ES" || "el-GR" || "he-IL" || "id-ID" || "lv-LV" || "ms-MY" || "ro-RO" || "ru-RU" || "sr-RS" || "sk-SK" || "so-SO" || "tl-PH" || "uk-UA" || "vi-VN" || "zu-ZA",
MediaSampleRateHertz: Number("int"), // required
MediaEncoding: "pcm" || "ogg-opus" || "flac", // required
VocabularyName: "STRING_VALUE",
SessionId: "STRING_VALUE",
AudioStream: { // AudioStream Union: only one key present
AudioEvent: { // AudioEvent
AudioChunk: new Uint8Array(), // e.g. Buffer.from("") or new TextEncoder().encode("")
},
ConfigurationEvent: { // ConfigurationEvent
ChannelDefinitions: [ // ChannelDefinitions
{ // ChannelDefinition
ChannelId: Number("int"), // required
ParticipantRole: "AGENT" || "CUSTOMER", // required
},
],
PostCallAnalyticsSettings: { // PostCallAnalyticsSettings
OutputLocation: "STRING_VALUE", // required
DataAccessRoleArn: "STRING_VALUE", // required
ContentRedactionOutput: "redacted" || "redacted_and_unredacted",
OutputEncryptionKMSKeyId: "STRING_VALUE",
},
},
},
VocabularyFilterName: "STRING_VALUE",
VocabularyFilterMethod: "remove" || "mask" || "tag",
ShowSpeakerLabel: true || false,
EnableChannelIdentification: true || false,
NumberOfChannels: Number("int"),
EnablePartialResultsStabilization: true || false,
PartialResultsStability: "high" || "medium" || "low",
ContentIdentificationType: "PII",
ContentRedactionType: "PII",
PiiEntityTypes: "STRING_VALUE",
LanguageModelName: "STRING_VALUE",
IdentifyLanguage: true || false,
LanguageOptions: "STRING_VALUE",
PreferredLanguage: "en-US" || "en-GB" || "es-US" || "fr-CA" || "fr-FR" || "en-AU" || "it-IT" || "de-DE" || "pt-BR" || "ja-JP" || "ko-KR" || "zh-CN" || "th-TH" || "es-ES" || "ar-SA" || "pt-PT" || "ca-ES" || "ar-AE" || "hi-IN" || "zh-HK" || "nl-NL" || "no-NO" || "sv-SE" || "pl-PL" || "fi-FI" || "zh-TW" || "en-IN" || "en-IE" || "en-NZ" || "en-AB" || "en-ZA" || "en-WL" || "de-CH" || "af-ZA" || "eu-ES" || "hr-HR" || "cs-CZ" || "da-DK" || "fa-IR" || "gl-ES" || "el-GR" || "he-IL" || "id-ID" || "lv-LV" || "ms-MY" || "ro-RO" || "ru-RU" || "sr-RS" || "sk-SK" || "so-SO" || "tl-PH" || "uk-UA" || "vi-VN" || "zu-ZA",
IdentifyMultipleLanguages: true || false,
VocabularyNames: "STRING_VALUE",
VocabularyFilterNames: "STRING_VALUE",
};
const command = new StartStreamTranscriptionCommand(input);
const response = await client.send(command);
// { // StartStreamTranscriptionResponse
// RequestId: "STRING_VALUE",
// LanguageCode: "en-US" || "en-GB" || "es-US" || "fr-CA" || "fr-FR" || "en-AU" || "it-IT" || "de-DE" || "pt-BR" || "ja-JP" || "ko-KR" || "zh-CN" || "th-TH" || "es-ES" || "ar-SA" || "pt-PT" || "ca-ES" || "ar-AE" || "hi-IN" || "zh-HK" || "nl-NL" || "no-NO" || "sv-SE" || "pl-PL" || "fi-FI" || "zh-TW" || "en-IN" || "en-IE" || "en-NZ" || "en-AB" || "en-ZA" || "en-WL" || "de-CH" || "af-ZA" || "eu-ES" || "hr-HR" || "cs-CZ" || "da-DK" || "fa-IR" || "gl-ES" || "el-GR" || "he-IL" || "id-ID" || "lv-LV" || "ms-MY" || "ro-RO" || "ru-RU" || "sr-RS" || "sk-SK" || "so-SO" || "tl-PH" || "uk-UA" || "vi-VN" || "zu-ZA",
// MediaSampleRateHertz: Number("int"),
// MediaEncoding: "pcm" || "ogg-opus" || "flac",
// VocabularyName: "STRING_VALUE",
// SessionId: "STRING_VALUE",
// TranscriptResultStream: { // TranscriptResultStream Union: only one key present
// TranscriptEvent: { // TranscriptEvent
// Transcript: { // Transcript
// Results: [ // ResultList
// { // Result
// ResultId: "STRING_VALUE",
// StartTime: Number("double"),
// EndTime: Number("double"),
// IsPartial: true || false,
// Alternatives: [ // AlternativeList
// { // Alternative
// Transcript: "STRING_VALUE",
// Items: [ // ItemList
// { // Item
// StartTime: Number("double"),
// EndTime: Number("double"),
// Type: "pronunciation" || "punctuation",
// Content: "STRING_VALUE",
// VocabularyFilterMatch: true || false,
// Speaker: "STRING_VALUE",
// Confidence: Number("double"),
// Stable: true || false,
// },
// ],
// Entities: [ // EntityList
// { // Entity
// StartTime: Number("double"),
// EndTime: Number("double"),
// Category: "STRING_VALUE",
// Type: "STRING_VALUE",
// Content: "STRING_VALUE",
// Confidence: Number("double"),
// },
// ],
// },
// ],
// ChannelId: "STRING_VALUE",
// LanguageCode: "en-US" || "en-GB" || "es-US" || "fr-CA" || "fr-FR" || "en-AU" || "it-IT" || "de-DE" || "pt-BR" || "ja-JP" || "ko-KR" || "zh-CN" || "th-TH" || "es-ES" || "ar-SA" || "pt-PT" || "ca-ES" || "ar-AE" || "hi-IN" || "zh-HK" || "nl-NL" || "no-NO" || "sv-SE" || "pl-PL" || "fi-FI" || "zh-TW" || "en-IN" || "en-IE" || "en-NZ" || "en-AB" || "en-ZA" || "en-WL" || "de-CH" || "af-ZA" || "eu-ES" || "hr-HR" || "cs-CZ" || "da-DK" || "fa-IR" || "gl-ES" || "el-GR" || "he-IL" || "id-ID" || "lv-LV" || "ms-MY" || "ro-RO" || "ru-RU" || "sr-RS" || "sk-SK" || "so-SO" || "tl-PH" || "uk-UA" || "vi-VN" || "zu-ZA",
// LanguageIdentification: [ // LanguageIdentification
// { // LanguageWithScore
// LanguageCode: "en-US" || "en-GB" || "es-US" || "fr-CA" || "fr-FR" || "en-AU" || "it-IT" || "de-DE" || "pt-BR" || "ja-JP" || "ko-KR" || "zh-CN" || "th-TH" || "es-ES" || "ar-SA" || "pt-PT" || "ca-ES" || "ar-AE" || "hi-IN" || "zh-HK" || "nl-NL" || "no-NO" || "sv-SE" || "pl-PL" || "fi-FI" || "zh-TW" || "en-IN" || "en-IE" || "en-NZ" || "en-AB" || "en-ZA" || "en-WL" || "de-CH" || "af-ZA" || "eu-ES" || "hr-HR" || "cs-CZ" || "da-DK" || "fa-IR" || "gl-ES" || "el-GR" || "he-IL" || "id-ID" || "lv-LV" || "ms-MY" || "ro-RO" || "ru-RU" || "sr-RS" || "sk-SK" || "so-SO" || "tl-PH" || "uk-UA" || "vi-VN" || "zu-ZA",
// Score: Number("double"),
// },
// ],
// },
// ],
// },
// },
// BadRequestException: { // BadRequestException
// Message: "STRING_VALUE",
// },
// LimitExceededException: { // LimitExceededException
// Message: "STRING_VALUE",
// },
// InternalFailureException: { // InternalFailureException
// Message: "STRING_VALUE",
// },
// ConflictException: { // ConflictException
// Message: "STRING_VALUE",
// },
// ServiceUnavailableException: { // ServiceUnavailableException
// Message: "STRING_VALUE",
// },
// },
// VocabularyFilterName: "STRING_VALUE",
// VocabularyFilterMethod: "remove" || "mask" || "tag",
// ShowSpeakerLabel: true || false,
// EnableChannelIdentification: true || false,
// NumberOfChannels: Number("int"),
// EnablePartialResultsStabilization: true || false,
// PartialResultsStability: "high" || "medium" || "low",
// ContentIdentificationType: "PII",
// ContentRedactionType: "PII",
// PiiEntityTypes: "STRING_VALUE",
// LanguageModelName: "STRING_VALUE",
// IdentifyLanguage: true || false,
// LanguageOptions: "STRING_VALUE",
// PreferredLanguage: "en-US" || "en-GB" || "es-US" || "fr-CA" || "fr-FR" || "en-AU" || "it-IT" || "de-DE" || "pt-BR" || "ja-JP" || "ko-KR" || "zh-CN" || "th-TH" || "es-ES" || "ar-SA" || "pt-PT" || "ca-ES" || "ar-AE" || "hi-IN" || "zh-HK" || "nl-NL" || "no-NO" || "sv-SE" || "pl-PL" || "fi-FI" || "zh-TW" || "en-IN" || "en-IE" || "en-NZ" || "en-AB" || "en-ZA" || "en-WL" || "de-CH" || "af-ZA" || "eu-ES" || "hr-HR" || "cs-CZ" || "da-DK" || "fa-IR" || "gl-ES" || "el-GR" || "he-IL" || "id-ID" || "lv-LV" || "ms-MY" || "ro-RO" || "ru-RU" || "sr-RS" || "sk-SK" || "so-SO" || "tl-PH" || "uk-UA" || "vi-VN" || "zu-ZA",
// IdentifyMultipleLanguages: true || false,
// VocabularyNames: "STRING_VALUE",
// VocabularyFilterNames: "STRING_VALUE",
// };
StartStreamTranscriptionCommand Input
Parameter | Type | Description |
---|
Parameter | Type | Description |
---|---|---|
AudioStream Required | AsyncIterable<AudioStream> | undefined | An encoded stream of audio blobs. Audio streams are encoded as either HTTP/2 or WebSocket data frames. For more information, see Transcribing streaming audio . |
MediaEncoding Required | MediaEncoding | undefined | Specify the encoding of your input audio. Supported formats are:
For more information, see Media formats . |
MediaSampleRateHertz Required | number | undefined | The sample rate of the input audio (in hertz). Low-quality audio, such as telephone audio, is typically around 8,000 Hz. High-quality audio typically ranges from 16,000 Hz to 48,000 Hz. Note that the sample rate you specify must match that of your audio. |
ContentIdentificationType | ContentIdentificationType | undefined | Labels all personally identifiable information (PII) identified in your transcript. Content identification is performed at the segment level; PII specified in You can’t set For more information, see Redacting or identifying personally identifiable information . |
ContentRedactionType | ContentRedactionType | undefined | Redacts all personally identifiable information (PII) identified in your transcript. Content redaction is performed at the segment level; PII specified in You can’t set For more information, see Redacting or identifying personally identifiable information . |
EnableChannelIdentification | boolean | undefined | Enables channel identification in multi-channel audio. Channel identification transcribes the audio on each channel independently, then appends the output for each channel into one transcript. If you have multi-channel audio and do not enable channel identification, your audio is transcribed in a continuous manner and your transcript is not separated by channel. If you include For more information, see Transcribing multi-channel audio . |
EnablePartialResultsStabilization | boolean | undefined | Enables partial result stabilization for your transcription. Partial result stabilization can reduce latency in your output, but may impact accuracy. For more information, see Partial-result stabilization . |
IdentifyLanguage | boolean | undefined | Enables automatic language identification for your transcription. If you include You can also include a preferred language using If you have multi-channel audio that contains different languages on each channel, and you've enabled channel identification, automatic language identification identifies the dominant language on each audio channel. Note that you must include either Streaming language identification can't be combined with custom language models or redaction. |
IdentifyMultipleLanguages | boolean | undefined | Enables automatic multi-language identification in your transcription job request. Use this parameter if your stream contains more than one language. If your stream contains only one language, use IdentifyLanguage instead. If you include If you want to apply a custom vocabulary or a custom vocabulary filter to your automatic multiple language identification request, include Note that you must include one of |
LanguageCode | LanguageCode | undefined | Specify the language code that represents the language spoken in your audio. If you're unsure of the language spoken in your audio, consider using For a list of languages supported with Amazon Transcribe streaming, refer to the Supported languages table. |
LanguageModelName | string | undefined | Specify the name of the custom language model that you want to use when processing your transcription. Note that language model names are case sensitive. The language of the specified language model must match the language code you specify in your transcription request. If the languages don't match, the custom language model isn't applied. There are no errors or warnings associated with a language mismatch. For more information, see Custom language models . |
LanguageOptions | string | undefined | Specify two or more language codes that represent the languages you think may be present in your media; including more than five is not recommended. Including language options can improve the accuracy of language identification. If you include For a list of languages supported with Amazon Transcribe streaming, refer to the Supported languages table. You can only include one language dialect per language per stream. For example, you cannot include |
NumberOfChannels | number | undefined | Specify the number of channels in your audio stream. This value must be If you include |
PartialResultsStability | PartialResultsStability | undefined | Specify the level of stability to use when you enable partial results stabilization ( Low stability provides the highest accuracy. High stability transcribes faster, but with slightly lower accuracy. For more information, see Partial-result stabilization . |
PiiEntityTypes | string | undefined | Specify which types of personally identifiable information (PII) you want to redact in your transcript. You can include as many types as you'd like, or you can select Values must be comma-separated and can include: Note that if you include If you include |
PreferredLanguage | LanguageCode | undefined | Specify a preferred language from the subset of languages codes you specified in You can only use this parameter if you've included |
SessionId | string | undefined | Specify a name for your transcription session. If you don't include this parameter in your request, Amazon Transcribe generates an ID and returns it in the response. |
ShowSpeakerLabel | boolean | undefined | Enables speaker partitioning (diarization) in your transcription output. Speaker partitioning labels the speech from individual speakers in your media file. For more information, see Partitioning speakers (diarization) . |
VocabularyFilterMethod | VocabularyFilterMethod | undefined | Specify how you want your vocabulary filter applied to your transcript. To replace words with To delete words, choose To flag words without changing them, choose |
VocabularyFilterName | string | undefined | Specify the name of the custom vocabulary filter that you want to use when processing your transcription. Note that vocabulary filter names are case sensitive. If the language of the specified custom vocabulary filter doesn't match the language identified in your media, the vocabulary filter is not applied to your transcription. This parameter is not intended for use with the For more information, see Using vocabulary filtering with unwanted words . |
VocabularyFilterNames | string | undefined | Specify the names of the custom vocabulary filters that you want to use when processing your transcription. Note that vocabulary filter names are case sensitive. If none of the languages of the specified custom vocabulary filters match the language identified in your media, your job fails. This parameter is only intended for use with the For more information, see Using vocabulary filtering with unwanted words . |
VocabularyName | string | undefined | Specify the name of the custom vocabulary that you want to use when processing your transcription. Note that vocabulary names are case sensitive. If the language of the specified custom vocabulary doesn't match the language identified in your media, the custom vocabulary is not applied to your transcription. This parameter is not intended for use with the For more information, see Custom vocabularies . |
VocabularyNames | string | undefined | Specify the names of the custom vocabularies that you want to use when processing your transcription. Note that vocabulary names are case sensitive. If none of the languages of the specified custom vocabularies match the language identified in your media, your job fails. This parameter is only intended for use with the For more information, see Custom vocabularies . |
StartStreamTranscriptionCommand Output
Parameter | Type | Description |
---|
Parameter | Type | Description |
---|---|---|
$metadata Required | ResponseMetadata | Metadata pertaining to this request. |
ContentIdentificationType | ContentIdentificationType | undefined | Shows whether content identification was enabled for your transcription. |
ContentRedactionType | ContentRedactionType | undefined | Shows whether content redaction was enabled for your transcription. |
EnableChannelIdentification | boolean | undefined | Shows whether channel identification was enabled for your transcription. |
EnablePartialResultsStabilization | boolean | undefined | Shows whether partial results stabilization was enabled for your transcription. |
IdentifyLanguage | boolean | undefined | Shows whether automatic language identification was enabled for your transcription. |
IdentifyMultipleLanguages | boolean | undefined | Shows whether automatic multi-language identification was enabled for your transcription. |
LanguageCode | LanguageCode | undefined | Provides the language code that you specified in your request. |
LanguageModelName | string | undefined | Provides the name of the custom language model that you specified in your request. |
LanguageOptions | string | undefined | Provides the language codes that you specified in your request. |
MediaEncoding | MediaEncoding | undefined | Provides the media encoding you specified in your request. |
MediaSampleRateHertz | number | undefined | Provides the sample rate that you specified in your request. |
NumberOfChannels | number | undefined | Provides the number of channels that you specified in your request. |
PartialResultsStability | PartialResultsStability | undefined | Provides the stabilization level used for your transcription. |
PiiEntityTypes | string | undefined | Lists the PII entity types you specified in your request. |
PreferredLanguage | LanguageCode | undefined | Provides the preferred language that you specified in your request. |
RequestId | string | undefined | Provides the identifier for your streaming request. |
SessionId | string | undefined | Provides the identifier for your transcription session. |
ShowSpeakerLabel | boolean | undefined | Shows whether speaker partitioning was enabled for your transcription. |
TranscriptResultStream | AsyncIterable<TranscriptResultStream> | undefined | Provides detailed information about your streaming session. |
VocabularyFilterMethod | VocabularyFilterMethod | undefined | Provides the vocabulary filtering method used in your transcription. |
VocabularyFilterName | string | undefined | Provides the name of the custom vocabulary filter that you specified in your request. |
VocabularyFilterNames | string | undefined | Provides the names of the custom vocabulary filters that you specified in your request. |
VocabularyName | string | undefined | Provides the name of the custom vocabulary that you specified in your request. |
VocabularyNames | string | undefined | Provides the names of the custom vocabularies that you specified in your request. |
Throws
Name | Fault | Details |
---|
Name | Fault | Details |
---|---|---|
BadRequestException | client | One or more arguments to the |
ConflictException | client | A new stream started with the same session ID. The current stream has been terminated. |
InternalFailureException | server | A problem occurred while processing the audio. Amazon Transcribe terminated processing. |
LimitExceededException | client | Your client has exceeded one of the Amazon Transcribe limits. This is typically the audio length limit. Break your audio stream into smaller chunks and try your request again. |
ServiceUnavailableException | server | The service is currently unavailable. Try your request later. |
TranscribeStreamingServiceException | Base exception class for all service exceptions from TranscribeStreaming service. |