IBM Watson Speech to Text Service not responding in Unity3d

I have an ExampleSstreaming class that I actually got from GitHub from the IBM Watson SDK (Speech to Text Service) . Here

public class ExampleStreaming : MonoBehaviour
  private int m_RecordingRoutine = 0;
  private string m_MicrophoneID = null;
  private AudioClip m_Recording = null;
  private int m_RecordingBufferSize = 5;
  private int m_RecordingHZ = 22050;

  private SpeechToText m_SpeechToText = new SpeechToText();

  void Start()
    Log.Debug("ExampleStreaming", "Start();");

    Active = true;

    public void Update() {

  public bool Active
    get { return m_SpeechToText.IsListening; }
      if (value && !m_SpeechToText.IsListening)
        m_SpeechToText.DetectSilence = true;
        m_SpeechToText.EnableWordConfidence = false;
        m_SpeechToText.EnableTimestamps = false;
        m_SpeechToText.SilenceThreshold = 0.03f;
        m_SpeechToText.MaxAlternatives = 1;
        m_SpeechToText.EnableContinousRecognition = true;
        m_SpeechToText.EnableInterimResults = true;
        m_SpeechToText.OnError = OnError;
      else if (!value && m_SpeechToText.IsListening)

  private void StartRecording()
    if (m_RecordingRoutine == 0)
      m_RecordingRoutine = Runnable.Run(RecordingHandler());

  private void StopRecording()
    if (m_RecordingRoutine != 0)
      m_RecordingRoutine = 0;

  private void OnError(string error)
    Active = false;

    Log.Debug("ExampleStreaming", "Error! {0}", error);

  private IEnumerator RecordingHandler()
    Log.Debug("ExampleStreaming", "devices: {0}", Microphone.devices);

        m_MicrophoneID = Microphone.devices[0];
        Debug.Log("m_MicrophoneID : " + m_MicrophoneID);
        m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);
    yield return null;      // let m_RecordingRoutine get set..
        Debug.Log("m_Recording : " + m_Recording.length);
        if (m_Recording == null)
            Debug.Log("m_Recording is null");
      yield break;

    bool bFirstBlock = true;
    int midPoint = m_Recording.samples / 2;
    float[] samples = null;

    while (m_RecordingRoutine != 0 && m_Recording != null)
      int writePos = Microphone.GetPosition(m_MicrophoneID);
      if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
        Log.Error("MicrophoneWidget", "Microphone disconnected.");

        yield break;

      if ((bFirstBlock && writePos >= midPoint)
        || (!bFirstBlock && writePos < midPoint))
        // front block is recorded, make a RecordClip and pass it onto our callback.
        samples = new float[midPoint];
        m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);

        AudioData record = new AudioData();
        record.MaxLevel = Mathf.Max(samples);
        record.Clip = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
        record.Clip.SetData(samples, 0);


        bFirstBlock = !bFirstBlock;
        // calculate the number of samples remaining until we ready for a block of audio, 
        // and wait that amount of time it will take to record.
        int remaining = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
        float timeRemaining = (float)remaining / (float)m_RecordingHZ;

        yield return new WaitForSeconds(timeRemaining);


    yield break;

  private void OnRecognize(SpeechRecognitionEvent result)
        if (result != null && result.results.Length > 0)
      foreach (var res in result.results)
        foreach (var alt in res.alternatives)
          string text = alt.transcript;

          Log.Debug("ExampleStreaming", string.Format("{0} ({1}, {2:0.00})\n", text, ? "Final" : "Interim", alt.confidence));


and this is the line i am adding to get the microphone. I just edited it to provide a microphone device with index zero, which was actually zero (I don't know why, this is intentionally left over or a bug) in the RecordingHandler function .

 m_MicrophoneID = Microphone.devices[0];


but unfortunately it doesn't show any kind of output log in the OnRecognize event which I think it should execute.

We'll see it display these logs after a few seconds (since I gave the length 5 audio). What am I doing wrong, I cannot figure out how the speech is in the text .

[DEBUG] OnListenClosed(), State = DISCONNECTED 
[DEBUG] KeepAlive exited.


I also tried the IBM Watson Speech To Text Scene also shows nothing.


source to share

1 answer

I can't stream live, but now I can convert the audio clip to text via the watson service, and here's a simple code (which took three days).

using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;

public class AudioClipToTextWatson : MonoBehaviour {
    // Non-streaming
    SpeechToText m_SpeechToText = new SpeechToText();
    public AudioClip m_AudioClip = new AudioClip();
    public bool on = false;

    void Start () {
        m_AudioClip = Microphone.Start(Microphone.devices[0], false, 4, 44100);

            m_SpeechToText.Recognize(m_AudioClip, OnRecognize);
            //  Streaming
            //  Stop listening

    private void OnRecognize(SpeechRecognitionEvent result)
        Debug.Log("result : " + result);
        if (result != null && result.results.Length > 0)
            foreach (var res in result.results)
                foreach (var alt in res.alternatives)
                    string text = alt.transcript;



Note. You can record and audio clip using the microphone and convert it to text. If you already have a sound, drop it into the inspector and comment out the first line in Start Event.



All Articles