(original) (raw)

using DeepSpeechClient; using DeepSpeechClient.Interfaces; using DeepSpeechClient.Models; using NAudio.Wave; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; namespace CSharpExamples { class Program { /// /// Get the value of an argurment. /// /// Argument list. /// Key of the argument. /// Value of the argument. static string GetArgument(IEnumerable args, string option) => args.SkipWhile(i => i != option).Skip(1).Take(1).FirstOrDefault(); static string MetadataToString(CandidateTranscript transcript) { var nl = Environment.NewLine; string retval = Environment.NewLine + "Recognizedtext:string.Join("",transcript?.Tokens?.Select(x=>x.Text))nl"+"Recognized text: {string.Join("", transcript?.Tokens?.Select(x => x.Text))} {nl}" + "Recognizedtext:string.Join("",transcript?.Tokens?.Select(x=>x.Text))nl"+"Confidence: {transcript?.Confidence} {nl}" + "Itemcount:transcript?.Tokens?.Lengthnl"+string.Join(nl,transcript?.Tokens?.Select(x=>"Item count: {transcript?.Tokens?.Length} {nl}" + string.Join(nl, transcript?.Tokens?.Select(x => "Itemcount:transcript?.Tokens?.Lengthnl"+string.Join(nl,transcript?.Tokens?.Select(x=>"Timestep : {x.Timestep} TimeOffset: {x.StartTime} Char: {x.Text}")); return retval; } static void Main(string[] args) { string model = null; string scorer = null; string audio = null; string hotwords = null; bool extended = false; if (args.Length > 0) { model = GetArgument(args, "--model"); scorer = GetArgument(args, "--scorer"); audio = GetArgument(args, "--audio"); hotwords = GetArgument(args, "--hot_words"); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); } Stopwatch stopwatch = new Stopwatch(); try { Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm")) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (scorer != null) { Console.WriteLine("Loading scorer..."); sttClient.EnableExternalScorer(scorer ?? "kenlm.scorer"); } if(hotwords != null) { Console.WriteLine($"Adding hot-words {hotwords}"); char[] sep = {','}; string[] word_boosts = hotwords.Split(sep); foreach(string word_boost in word_boosts) { char[] sep1 = {':'}; string[] word = word_boost.Split(sep1); sttClient.AddHotWord(word[0], float.Parse(word[1])); } } string audioFile = audio ?? "arctic_a0024.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string speechResult; // sphinx-doc: csharp_ref_inference_start if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 1); speechResult = MetadataToString(metaResult.Transcripts[0]); } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } // sphinx-doc: csharp_ref_inference_stop stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); } waveBuffer.Clear(); } } catch (Exception ex) { Console.WriteLine(ex.Message); } } } }