Transcrever um arquivo local com pontuação automática

Transcreva um arquivo de áudio local, incluindo a pontuação automática.

Mais informações

Para conferir a documentação detalhada que inclui este exemplo de código, consulte:

Exemplo de código

Go

Para saber como instalar e usar a biblioteca de cliente da Cloud STT, consulte Bibliotecas de cliente da Cloud STT. Saiba mais na documentação de referência da API Cloud STT para Go.

Para se autenticar na Cloud STT, configure o Application Default Credentials. Se quiser mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.


import (
	"context"
	"fmt"
	"io"
	"os"
	"strings"

	speech "cloud.google.com/go/speech/apiv1"
	"cloud.google.com/go/speech/apiv1/speechpb"
)

func autoPunctuation(w io.Writer, path string) error {
	ctx := context.Background()

	client, err := speech.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	// path = "../testdata/commercial_mono.wav"
	data, err := os.ReadFile(path)
	if err != nil {
		return fmt.Errorf("ReadFile: %w", err)
	}

	resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{
		Config: &speechpb.RecognitionConfig{
			Encoding:        speechpb.RecognitionConfig_LINEAR16,
			SampleRateHertz: 8000,
			LanguageCode:    "en-US",
			// Enable automatic punctuation.
			EnableAutomaticPunctuation: true,
		},
		Audio: &speechpb.RecognitionAudio{
			AudioSource: &speechpb.RecognitionAudio_Content{Content: data},
		},
	})
	if err != nil {
		return fmt.Errorf("Recognize: %w", err)
	}

	for i, result := range resp.Results {
		fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
		fmt.Fprintf(w, "Result %d\n", i+1)
		for j, alternative := range result.Alternatives {
			fmt.Fprintf(w, "Alternative %d: %s\n", j+1, alternative.Transcript)
		}
	}
	return nil
}

Java

Para saber como instalar e usar a biblioteca de cliente da Cloud STT, consulte Bibliotecas de cliente da Cloud STT. Saiba mais na documentação de referência da API Cloud STT para Java.

Para se autenticar na Cloud STT, configure o Application Default Credentials. Se quiser mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.

/**
 * Performs transcription on remote FLAC file and prints the transcription.
 *
 * @param gcsUri the path to the remote FLAC audio file to transcribe.
 */
public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
  try (SpeechClient speechClient = SpeechClient.create()) {
    // Configure request with raw PCM audio
    RecognitionConfig config =
        RecognitionConfig.newBuilder()
            .setEncoding(AudioEncoding.FLAC)
            .setLanguageCode("en-US")
            .setSampleRateHertz(16000)
            .setEnableAutomaticPunctuation(true)
            .build();

    // Set the remote path for the audio file
    RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();

    // Use non-blocking call for getting file transcription
    OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
        speechClient.longRunningRecognizeAsync(config, audio);

    while (!response.isDone()) {
      System.out.println("Waiting for response...");
      Thread.sleep(10000);
    }

    // Just print the first result here.
    SpeechRecognitionResult result = response.get().getResultsList().get(0);

    // There can be several alternative transcripts for a given chunk of speech. Just use the
    // first (most likely) one here.
    SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);

    // Print out the result
    System.out.printf("Transcript : %s\n", alternative.getTranscript());
  }
}

Node.js

Para saber como instalar e usar a biblioteca de cliente da Cloud STT, consulte Bibliotecas de cliente da Cloud STT. Saiba mais na documentação de referência da API Cloud STT para Node.js.

Para se autenticar na Cloud STT, configure o Application Default Credentials. Se quiser mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.

// Imports the Google Cloud client library for API
/**
 * TODO(developer): Update client library import to use new
 * version of API when desired features become available
 */

const speech = require('@google-cloud/speech');
const fs = require('fs');

// Creates a client
const client = new speech.SpeechClient();

/**
 * TODO(developer): Uncomment the following lines before running the sample.
 * Include the sampleRateHertz field in the config object.
 */
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
  encoding: encoding,
  languageCode: languageCode,
  enableAutomaticPunctuation: true,
};

const audio = {
  content: fs.readFileSync(filename).toString('base64'),
};

const request = {
  config: config,
  audio: audio,
};

// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results
  .map(result => result.alternatives[0].transcript)
  .join('\n');
console.log('Transcription: ', transcription);

PHP

Para saber como instalar e usar a biblioteca de cliente da Cloud STT, consulte Bibliotecas de cliente da Cloud STT.

Para se autenticar na Cloud STT, configure o Application Default Credentials. Se quiser mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.

use Google\Cloud\Speech\V2\Client\SpeechClient;
use Google\Cloud\Speech\V2\RecognizeRequest;
use Google\Cloud\Speech\V2\RecognitionConfig;
use Google\Cloud\Speech\V2\RecognitionFeatures;
use Google\Cloud\Speech\V2\AutoDetectDecodingConfig;

/**
 * @param string $projectId The Google Cloud project ID.
 * @param string $location The location of the recognizer.
 * @param string $recognizerId The ID of the recognizer to use.
 * @param string $audioFile path to an audio file (e.x. "test/data/audio32KHz.flac").
 */
function transcribe_auto_punctuation(string $projectId, string $location, string $recognizerId, string $audioFile)
{
    // create the speech client
    $apiEndpoint = $location === 'global' ? null : sprintf('%s-speech.googleapis.com', $location);
    $speech = new SpeechClient(['apiEndpoint' => $apiEndpoint]);

    // get contents of a file into a string
    $content = file_get_contents($audioFile);

    $recognizerName = SpeechClient::recognizerName($projectId, $location, $recognizerId);

    // When true, automatic punctuation will be enabled.
    $features = new RecognitionFeatures([
        'enable_automatic_punctuation' => true
    ]);

    $config = (new RecognitionConfig())
        ->setFeatures($features)

        // Can also use {@see Google\Cloud\Speech\V2\ExplicitDecodingConfig}
        // ->setExplicitDecodingConfig(new ExplicitDecodingConfig([...]);

        ->setAutoDecodingConfig(new AutoDetectDecodingConfig());

    $request = (new RecognizeRequest())
        ->setRecognizer($recognizerName)
        ->setConfig($config)
        ->setContent($content);

    // make the API call
    $response = $speech->recognize($request);
    $results = $response->getResults();

    // print results
    foreach ($results as $result) {
        $alternatives = $result->getAlternatives();
        $mostLikely = $alternatives[0];
        $transcript = $mostLikely->getTranscript();
        $confidence = $mostLikely->getConfidence();
        printf('Transcript: %s' . PHP_EOL, $transcript);
        printf('Confidence: %s' . PHP_EOL, $confidence);
    }

    $speech->close();
}

Python

Para saber como instalar e usar a biblioteca de cliente da Cloud STT, consulte Bibliotecas de cliente da Cloud STT. Saiba mais na documentação de referência da API Cloud STT para Python.

Para se autenticar na Cloud STT, configure o Application Default Credentials. Se quiser mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.


from google.cloud import speech


def transcribe_file_with_auto_punctuation(audio_file: str) -> speech.RecognizeResponse:
    """Transcribe the given audio file with auto punctuation enabled.
    Args:
        audio_file (str): Path to the local audio file to be transcribed.
    Returns:
        speech.RecognizeResponse: The response containing the transcription results.
    """
    client = speech.SpeechClient()

    with open(audio_file, "rb") as f:
        audio_content = f.read()

    audio = speech.RecognitionAudio(content=audio_content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=8000,
        language_code="en-US",
        # Enable automatic punctuation
        enable_automatic_punctuation=True,
    )

    response = client.recognize(config=config, audio=audio)

    for i, result in enumerate(response.results):
        alternative = result.alternatives[0]
        print("-" * 20)
        print(f"First alternative of result {i}")
        print(f"Transcript: {alternative.transcript}")

    return response

Ruby

Para saber como instalar e usar a biblioteca de cliente da Cloud STT, consulte Bibliotecas de cliente da Cloud STT.

Para se autenticar na Cloud STT, configure o Application Default Credentials. Se quiser mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.

# audio_file_path = "path/to/audio.wav"

require "google/cloud/speech"

speech = Google::Cloud::Speech.speech version: :v1

config = {
  encoding:                     :LINEAR16,
  sample_rate_hertz:            8000,
  language_code:                "en-US",
  enable_automatic_punctuation: true
}

audio_file = File.binread audio_file_path
audio      = { content: audio_file }

operation = speech.long_running_recognize config: config, audio: audio

puts "Operation started"

operation.wait_until_done!

raise operation.results.message if operation.error?

results = operation.response.results

results.each_with_index do |result, i|
  alternative = result.alternatives.first
  puts "-" * 20
  puts "First alternative of result #{i}"
  puts "Transcript: #{alternative.transcript}"
end

A seguir

Para pesquisar e filtrar exemplos de código de outros Google Cloud produtos, consulte a Google Cloud pesquisa de exemplos de código.