תמלול עם חלוקה לדוברים

לזהות את הדוברים השונים בדגימת האודיו.

דוגמת קוד

Java

מידע על התקנה ושימוש בספריית הלקוח של Cloud STT מופיע במאמר ספריות הלקוח של Cloud STT. מידע נוסף מופיע במאמרי העזרה של Cloud STT Java API.

כדי לבצע אימות ב-Cloud STT, צריך להגדיר את Application Default Credentials. מידע נוסף זמין במאמר הגדרת אימות לסביבת פיתוח מקומית.


import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.WordInfo;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

class TranscribeDiarization {

  static void transcribeDiarization() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String fileName = "resources/commercial_mono.wav";
    transcribeDiarization(fileName);
  }

  // Transcribe the given audio file using speaker diarization.
  static void transcribeDiarization(String fileName) throws IOException {
    Path path = Paths.get(fileName);
    byte[] content = Files.readAllBytes(path);

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (SpeechClient client = SpeechClient.create()) {
      // Get the contents of the local audio file
      RecognitionAudio recognitionAudio =
          RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
      SpeakerDiarizationConfig speakerDiarizationConfig =
          SpeakerDiarizationConfig.newBuilder()
              .setEnableSpeakerDiarization(true)
              .setMinSpeakerCount(2)
              .setMaxSpeakerCount(2)
              .build();
      // Configure request to enable Speaker diarization
      RecognitionConfig config =
          RecognitionConfig.newBuilder()
              .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
              .setLanguageCode("en-US")
              .setSampleRateHertz(8000)
              .setDiarizationConfig(speakerDiarizationConfig)
              .build();

      // Perform the transcription request
      RecognizeResponse recognizeResponse = client.recognize(config, recognitionAudio);

      // Speaker Tags are only included in the last result object, which has only one alternative.
      SpeechRecognitionAlternative alternative =
          recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0);
      // The alternative is made up of WordInfo objects that contain the speaker_tag.
      WordInfo wordInfo = alternative.getWords(0);
      int currentSpeakerTag = wordInfo.getSpeakerTag();
      // For each word, get all the words associated with one speaker, once the speaker changes,
      // add a new line with the new speaker and their spoken words.
      StringBuilder speakerWords =
          new StringBuilder(
              String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
      for (int i = 1; i < alternative.getWordsCount(); i++) {
        wordInfo = alternative.getWords(i);
        if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
          speakerWords.append(" ");
          speakerWords.append(wordInfo.getWord());
        } else {
          speakerWords.append(
              String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
          currentSpeakerTag = wordInfo.getSpeakerTag();
        }
      }
      System.out.println(speakerWords.toString());
    }
  }
}

Node.js

מידע על התקנה ושימוש בספריית הלקוח של Cloud STT מופיע במאמר ספריות הלקוח של Cloud STT. מידע נוסף מופיע במאמרי העזרה של Cloud STT Node.js API.

כדי לבצע אימות ב-Cloud STT, צריך להגדיר את Application Default Credentials. מידע נוסף זמין במאמר הגדרת אימות לסביבת פיתוח מקומית.

const fs = require('fs');

// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');

// Creates a client
const client = new speech.SpeechClient();

// Set config for Diarization
const diarizationConfig = {
  enableSpeakerDiarization: true,
  maxSpeakerCount: 2,
};

const config = {
  encoding: 'LINEAR16',
  sampleRateHertz: 8000,
  languageCode: 'en-US',
  diarizationConfig: diarizationConfig,
  model: 'phone_call',
};

/**
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';

const audio = {
  content: fs.readFileSync(fileName).toString('base64'),
};

const request = {
  config: config,
  audio: audio,
};

const [response] = await client.recognize(request);
const transcription = response.results
  .map(result => result.alternatives[0].transcript)
  .join('\n');
console.log(`Transcription: ${transcription}`);
console.log('Speaker Diarization:');
const result = response.results[response.results.length - 1];
const wordsInfo = result.alternatives[0].words;
// Note: The transcript within each result is separate and sequential per result.
// However, the words list within an alternative includes all the words
// from all the results thus far. Thus, to get all the words with speaker
// tags, you only have to take the words list from the last result:
wordsInfo.forEach(a =>
  console.log(` word: ${a.word}, speakerTag: ${a.speakerTag}`)
);

המאמרים הבאים

כדי לחפש ולסנן דוגמאות קוד למוצרים אחרים של Google Cloud , אפשר להיעזר בדפדפן לדוגמאות שלGoogle Cloud .