IBM Watson Speech to Text Service not responding in Unity3d

Question

IBM Watson Speech to Text Service not responding in Unity3d

I have an ExampleSstreaming class that I actually got from GitHub from the IBM Watson SDK (Speech to Text Service) . Here

public class ExampleStreaming : MonoBehaviour
{
  private int m_RecordingRoutine = 0;
  private string m_MicrophoneID = null;
  private AudioClip m_Recording = null;
  private int m_RecordingBufferSize = 5;
  private int m_RecordingHZ = 22050;

  private SpeechToText m_SpeechToText = new SpeechToText();

  void Start()
  {
    LogSystem.InstallDefaultReactors();
    Log.Debug("ExampleStreaming", "Start();");

    Active = true;
        Debug.Log("start");
    StartRecording();
  }

    public void Update() {
        Debug.Log(m_SpeechToText.IsListening);
    }

  public bool Active
  {
    get { return m_SpeechToText.IsListening; }
    set
    {
      if (value && !m_SpeechToText.IsListening)
      {
        m_SpeechToText.DetectSilence = true;
        m_SpeechToText.EnableWordConfidence = false;
        m_SpeechToText.EnableTimestamps = false;
        m_SpeechToText.SilenceThreshold = 0.03f;
        m_SpeechToText.MaxAlternatives = 1;
        m_SpeechToText.EnableContinousRecognition = true;
        m_SpeechToText.EnableInterimResults = true;
        m_SpeechToText.OnError = OnError;
        m_SpeechToText.StartListening(OnRecognize);
      }
      else if (!value && m_SpeechToText.IsListening)
      {
        m_SpeechToText.StopListening();
      }
    }
  }

  private void StartRecording()
  {
    if (m_RecordingRoutine == 0)
    {
            Debug.Log("m_RecordingRoutine");
            UnityObjectUtil.StartDestroyQueue();
      m_RecordingRoutine = Runnable.Run(RecordingHandler());
    }
  }

  private void StopRecording()
  {
    if (m_RecordingRoutine != 0)
    {
      Microphone.End(m_MicrophoneID);
      Runnable.Stop(m_RecordingRoutine);
      m_RecordingRoutine = 0;
    }
  }

  private void OnError(string error)
  {
    Active = false;

    Log.Debug("ExampleStreaming", "Error! {0}", error);
  }

  private IEnumerator RecordingHandler()
  {
    Log.Debug("ExampleStreaming", "devices: {0}", Microphone.devices);

        m_MicrophoneID = Microphone.devices[0];
        Debug.Log("m_MicrophoneID : " + m_MicrophoneID);
        m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);
    yield return null;      // let m_RecordingRoutine get set..
        Debug.Log("m_Recording : " + m_Recording.length);
        if (m_Recording == null)
    {
            Debug.Log("m_Recording is null");
            StopRecording();
      yield break;
    }

    bool bFirstBlock = true;
    int midPoint = m_Recording.samples / 2;
    float[] samples = null;

    while (m_RecordingRoutine != 0 && m_Recording != null)
    {
      int writePos = Microphone.GetPosition(m_MicrophoneID);
      if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
      {
        Log.Error("MicrophoneWidget", "Microphone disconnected.");

        StopRecording();
        yield break;
      }

      if ((bFirstBlock && writePos >= midPoint)
        || (!bFirstBlock && writePos < midPoint))
      {
        // front block is recorded, make a RecordClip and pass it onto our callback.
        samples = new float[midPoint];
        m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);

        AudioData record = new AudioData();
        record.MaxLevel = Mathf.Max(samples);
        record.Clip = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
        record.Clip.SetData(samples, 0);

        m_SpeechToText.OnListen(record);

        bFirstBlock = !bFirstBlock;
      }
      else
      {
        // calculate the number of samples remaining until we ready for a block of audio, 
        // and wait that amount of time it will take to record.
        int remaining = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
        float timeRemaining = (float)remaining / (float)m_RecordingHZ;

        yield return new WaitForSeconds(timeRemaining);
      }

    }

    yield break;
  }

  private void OnRecognize(SpeechRecognitionEvent result)
  {
        Debug.Log("OnRecognize");
        if (result != null && result.results.Length > 0)
    {
      foreach (var res in result.results)
      {
        foreach (var alt in res.alternatives)
        {
          string text = alt.transcript;
                    Debug.Log(text);

          Log.Debug("ExampleStreaming", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
        }
      }
    }
  }
}

and this is the line i am adding to get the microphone. I just edited it to provide a microphone device with index zero, which was actually zero (I don't know why, this is intentionally left over or a bug) in the RecordingHandler function .

 m_MicrophoneID = Microphone.devices[0];

but unfortunately it doesn't show any kind of output log in the OnRecognize event which I think it should execute.

We'll see it display these logs after a few seconds (since I gave the length 5 audio). What am I doing wrong, I cannot figure out how the speech is in the text .

[DEBUG] OnListenClosed(), State = DISCONNECTED 
[DEBUG] KeepAlive exited.

I also tried the IBM Watson Speech To Text Scene also shows nothing.

0

c # unity3d speech-to-text ibm-cloud

Muhammad faizan khan 07 Sep 17 at 12:08

source to share

1 answer

Muhammad faizan khan · Accepted Answer · 2017-09-09T12:46:46+0000

I can't stream live, but now I can convert the audio clip to text via the watson service, and here's a simple code (which took three days).

using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;

public class AudioClipToTextWatson : MonoBehaviour {
    // Non-streaming
    SpeechToText m_SpeechToText = new SpeechToText();
    public AudioClip m_AudioClip = new AudioClip();
    public bool on = false;

    void Start () {
        m_AudioClip = Microphone.Start(Microphone.devices[0], false, 4, 44100);

            m_SpeechToText.Recognize(m_AudioClip, OnRecognize);
            //  Streaming
            m_SpeechToText.StartListening(OnRecognize);
            //  Stop listening
            m_SpeechToText.StopListening();
    }


    private void OnRecognize(SpeechRecognitionEvent result)
    {
        Debug.Log("result : " + result);
        if (result != null && result.results.Length > 0)
        {
            foreach (var res in result.results)
            {
                foreach (var alt in res.alternatives)
                {
                    string text = alt.transcript;
                    Debug.Log(text);
                    Debug.Log(res.final);
                }
            }
        }
    }

}

Note. You can record and audio clip using the microphone and convert it to text. If you already have a sound, drop it into the inspector and comment out the first line in Start Event.

IBM Watson Speech to Text Service not responding in Unity3d

More articles: