PocketSphinx 5prealpha
ngram_search.c
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2008 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
42/* System headers. */
43#include <string.h>
44#include <assert.h>
45
46/* SphinxBase headers. */
47#include <sphinxbase/ckd_alloc.h>
48#include <sphinxbase/listelem_alloc.h>
49#include <sphinxbase/err.h>
50
51/* Local headers. */
53#include "ps_lattice_internal.h"
54#include "ngram_search.h"
57
58static int ngram_search_start(ps_search_t *search);
59static int ngram_search_step(ps_search_t *search, int frame_idx);
60static int ngram_search_finish(ps_search_t *search);
61static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
62static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score);
63static int32 ngram_search_prob(ps_search_t *search);
64static ps_seg_t *ngram_search_seg_iter(ps_search_t *search);
65
66static ps_searchfuncs_t ngram_funcs = {
67 /* start: */ ngram_search_start,
68 /* step: */ ngram_search_step,
69 /* finish: */ ngram_search_finish,
70 /* reinit: */ ngram_search_reinit,
71 /* free: */ ngram_search_free,
72 /* lattice: */ ngram_search_lattice,
73 /* hyp: */ ngram_search_hyp,
74 /* prob: */ ngram_search_prob,
75 /* seg_iter: */ ngram_search_seg_iter,
76};
77
78static ngram_model_t *default_lm;
79
80static void
81ngram_search_update_widmap(ngram_search_t *ngs)
82{
83 char const **words;
84 int32 i, n_words;
85
86 /* It's okay to include fillers since they won't be in the LM */
87 n_words = ps_search_n_words(ngs);
88 words = (char const**)ckd_calloc(n_words, sizeof(*words));
89 /* This will include alternates, again, that's okay since they aren't in the LM */
90 for (i = 0; i < n_words; ++i)
91 words[i] = dict_wordstr(ps_search_dict(ngs), i);
92 ngram_model_set_map_words(ngs->lmset, words, n_words);
93 ckd_free(words);
94}
95
96static void
97ngram_search_calc_beams(ngram_search_t *ngs)
98{
99 cmd_ln_t *config;
100 acmod_t *acmod;
101
102 config = ps_search_config(ngs);
103 acmod = ps_search_acmod(ngs);
104
105 /* Log beam widths. */
106 ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
107 ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
108 ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
109 ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
110 ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
111 ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
112 ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
113
114 /* Absolute pruning parameters. */
115 ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
116 ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
117
118 /* Various penalties which may or may not be useful. */
119 ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
120 ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
121 ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
122 ngs->silpen = ngs->pip
123 + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
124 ngs->fillpen = ngs->pip
125 + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
126
127 /* Language weight ratios for fwdflat and bestpath search. */
128 ngs->fwdflat_fwdtree_lw_ratio =
129 cmd_ln_float32_r(config, "-fwdflatlw")
130 / cmd_ln_float32_r(config, "-lw");
131 ngs->bestpath_fwdtree_lw_ratio =
132 cmd_ln_float32_r(config, "-bestpathlw")
133 / cmd_ln_float32_r(config, "-lw");
134
135 /* Acoustic score scale for posterior probabilities. */
136 ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
137}
138
140ngram_search_init(const char *name,
141 ngram_model_t *lm,
142 cmd_ln_t *config,
143 acmod_t *acmod,
144 dict_t *dict,
145 dict2pid_t *d2p)
146{
147 ngram_search_t *ngs;
148 static char *lmname = "default";
149
150 /* Make the acmod's feature buffer growable if we are doing two-pass
151 * search. */
152 acmod_set_grow(acmod, cmd_ln_boolean_r(config, "-fwdflat") &&
153 cmd_ln_boolean_r(config, "-fwdtree"));
154
155 ngs = ckd_calloc(1, sizeof(*ngs));
156 ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p);
157
158 ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
159 acmod->tmat->tp, NULL, acmod->mdef->sseq);
160 if (ngs->hmmctx == NULL) {
161 ps_search_free(ps_search_base(ngs));
162 return NULL;
163 }
164 ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
165 ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
166 ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
167
168 /* Calculate various beam widths and such. */
169 ngram_search_calc_beams(ngs);
170
171 /* Allocate a billion different tables for stuff. */
172 ngs->word_chan = ckd_calloc(dict_size(dict),
173 sizeof(*ngs->word_chan));
174 ngs->word_lat_idx = ckd_calloc(dict_size(dict),
175 sizeof(*ngs->word_lat_idx));
176 ngs->word_active = bitvec_alloc(dict_size(dict));
177 ngs->last_ltrans = ckd_calloc(dict_size(dict),
178 sizeof(*ngs->last_ltrans));
179
180 /* FIXME: All these structures need to be made dynamic with
181 * garbage collection. */
182 ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
183 ngs->bp_table = ckd_calloc(ngs->bp_table_size,
184 sizeof(*ngs->bp_table));
185 /* FIXME: This thing is frickin' huge. */
186 ngs->bscore_stack_size = ngs->bp_table_size * 20;
187 ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
188 sizeof(*ngs->bscore_stack));
189 ngs->n_frame_alloc = 256;
190 ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
191 sizeof(*ngs->bp_table_idx));
192 ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
193
194 /* Allocate active word list array */
195 ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
196 sizeof(**ngs->active_word_list));
197
198 ngs->lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
199 if (!ngs->lmset)
200 goto error_out;
201
202 if (ngram_wid(ngs->lmset, S3_FINISH_WORD) ==
203 ngram_unknown_wid(ngs->lmset))
204 {
205 E_ERROR("Language model/set does not contain </s>, "
206 "recognition will fail\n");
207 goto error_out;
208 }
209
210 /* Create word mappings. */
211 ngram_search_update_widmap(ngs);
212
213 /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
214 if (cmd_ln_boolean_r(config, "-fwdtree")) {
216 ngs->fwdtree = TRUE;
217 ngs->fwdtree_perf.name = "fwdtree";
218 ptmr_init(&ngs->fwdtree_perf);
219 }
220 if (cmd_ln_boolean_r(config, "-fwdflat")) {
222 ngs->fwdflat = TRUE;
223 ngs->fwdflat_perf.name = "fwdflat";
224 ptmr_init(&ngs->fwdflat_perf);
225 }
226 if (cmd_ln_boolean_r(config, "-bestpath")) {
227 ngs->bestpath = TRUE;
228 ngs->bestpath_perf.name = "bestpath";
229 ptmr_init(&ngs->bestpath_perf);
230 }
231
232 return (ps_search_t *)ngs;
233
234error_out:
236 return NULL;
237}
238
239static int
240ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
241{
242 ngram_search_t *ngs = (ngram_search_t *)search;
243 int old_n_words;
244 int rv = 0;
245
246 /* Update the number of words. */
247 old_n_words = search->n_words;
248 if (old_n_words != dict_size(dict)) {
249 search->n_words = dict_size(dict);
250 /* Reallocate these temporary arrays. */
251 ckd_free(ngs->word_lat_idx);
252 ckd_free(ngs->word_active);
253 ckd_free(ngs->last_ltrans);
254 ckd_free_2d(ngs->active_word_list);
255 ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
256 ngs->word_active = bitvec_alloc(search->n_words);
257 ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
259 = ckd_calloc_2d(2, search->n_words,
260 sizeof(**ngs->active_word_list));
261 }
262
263 /* Free old dict2pid, dict */
264 ps_search_base_reinit(search, dict, d2p);
265
266 if (ngs->lmset == NULL)
267 return 0;
268
269 /* Update beam widths. */
270 ngram_search_calc_beams(ngs);
271
272 /* Update word mappings. */
273 ngram_search_update_widmap(ngs);
274
275 /* Now rebuild lextrees. */
276 if (ngs->fwdtree) {
277 if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
278 return rv;
279 }
280 if (ngs->fwdflat) {
281 if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
282 return rv;
283 }
284
285 return rv;
286}
287
288void
290{
291 ngram_search_t *ngs = (ngram_search_t *)search;
292
293 if (ngs->fwdtree)
295 if (ngs->fwdflat)
297 if (ngs->bestpath) {
298 double n_speech = (double)ngs->n_tot_frame
299 / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
300
301 E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
302 ngs->bestpath_perf.t_tot_cpu,
303 ngs->bestpath_perf.t_tot_cpu / n_speech);
304 E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
305 ngs->bestpath_perf.t_tot_elapsed,
306 ngs->bestpath_perf.t_tot_elapsed / n_speech);
307 }
308
309 ps_search_base_free(search);
311 listelem_alloc_free(ngs->chan_alloc);
312 listelem_alloc_free(ngs->root_chan_alloc);
313 listelem_alloc_free(ngs->latnode_alloc);
314 ngram_model_free(ngs->lmset);
315
316 ckd_free(ngs->word_chan);
317 ckd_free(ngs->word_lat_idx);
318 bitvec_free(ngs->word_active);
319 ckd_free(ngs->bp_table);
320 ckd_free(ngs->bscore_stack);
321 if (ngs->bp_table_idx != NULL)
322 ckd_free(ngs->bp_table_idx - 1);
323 ckd_free_2d(ngs->active_word_list);
324 ckd_free(ngs->last_ltrans);
325 ckd_free(ngs);
326}
327
328int
330{
331 if (frame_idx >= ngs->n_frame_alloc) {
332 ngs->n_frame_alloc *= 2;
333 ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
334 (ngs->n_frame_alloc + 1)
335 * sizeof(*ngs->bp_table_idx));
336 if (ngs->frm_wordlist) {
337 ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
338 ngs->n_frame_alloc
339 * sizeof(*ngs->frm_wordlist));
340 }
341 ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
342 }
343 ngs->bp_table_idx[frame_idx] = ngs->bpidx;
344 return ngs->bpidx;
345}
346
347static void
348set_real_wid(ngram_search_t *ngs, int32 bp)
349{
350 bptbl_t *ent, *prev;
351
352 assert(bp != NO_BP);
353 ent = ngs->bp_table + bp;
354 if (ent->bp == NO_BP)
355 prev = NULL;
356 else
357 prev = ngs->bp_table + ent->bp;
358
359 /* Propagate lm state for fillers, rotate it for words. */
360 if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
361 if (prev != NULL) {
362 ent->real_wid = prev->real_wid;
363 ent->prev_real_wid = prev->prev_real_wid;
364 }
365 else {
366 ent->real_wid = dict_basewid(ps_search_dict(ngs),
367 ent->wid);
369 }
370 }
371 else {
372 ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
373 if (prev != NULL)
374 ent->prev_real_wid = prev->real_wid;
375 else
377 }
378}
379
380#define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */
381
382void
384 int32 w, int32 score, int32 path, int32 rc)
385{
386 int32 bp;
387
388 /* Look for an existing exit for this word in this frame. The
389 * only reason one would exist is from a different right context
390 * triphone, but of course that happens quite frequently. */
391 bp = ngs->word_lat_idx[w];
392 if (bp != NO_BP) {
393
394 if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) {
395 E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
396 frame_idx - ngs->bp_table[path].frame);
397 }
398
399 /* Keep only the best scoring one, we will reconstruct the
400 * others from the right context scores - usually the history
401 * is not lost. */
402 if (ngs->bp_table[bp].score WORSE_THAN score) {
403 assert(path != bp); /* Pathological. */
404 if (ngs->bp_table[bp].bp != path) {
405 int32 bplh[2], newlh[2];
406 /* But, sometimes, the history *is* lost. If we wanted to
407 * do exact language model scoring we'd have to preserve
408 * these alternate histories. */
409 E_DEBUG(2,("Updating path history %d => %d frame %d\n",
410 ngs->bp_table[bp].bp, path, frame_idx));
411 bplh[0] = ngs->bp_table[bp].bp == -1
412 ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
413 bplh[1] = ngs->bp_table[bp].bp == -1
414 ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
415 newlh[0] = path == -1
416 ? -1 : ngs->bp_table[path].prev_real_wid;
417 newlh[1] = path == -1
418 ? -1 : ngs->bp_table[path].real_wid;
419 /* Actually it's worth checking how often the actual
420 * language model state changes. */
421 if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
422 /* It's fairly rare that the actual language model
423 * state changes, but it does happen some
424 * times. */
425 E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
426 dict_wordstr(ps_search_dict(ngs), bplh[0]),
427 dict_wordstr(ps_search_dict(ngs), bplh[1]),
428 dict_wordstr(ps_search_dict(ngs), newlh[0]),
429 dict_wordstr(ps_search_dict(ngs), newlh[1]),
430 frame_idx));
431 set_real_wid(ngs, bp);
432 }
433 ngs->bp_table[bp].bp = path;
434 }
435 ngs->bp_table[bp].score = score;
436 }
437 /* But do keep track of scores for all right contexts, since
438 * we need them to determine the starting path scores for any
439 * successors of this word exit. */
440 if (ngs->bp_table[bp].s_idx != -1)
441 ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
442 }
443 else {
444 int32 i, rcsize;
445 bptbl_t *be;
446
447 /* This might happen if recognition fails. */
448 if (ngs->bpidx == NO_BP) {
449 E_ERROR("No entries in backpointer table!");
450 return;
451 }
452
453 /* Expand the backpointer tables if necessary. */
454 if (ngs->bpidx >= ngs->bp_table_size) {
455 ngs->bp_table_size *= 2;
456 ngs->bp_table = ckd_realloc(ngs->bp_table,
457 ngs->bp_table_size
458 * sizeof(*ngs->bp_table));
459 E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
460 }
461 if (ngs->bss_head >= ngs->bscore_stack_size
462 - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
463 ngs->bscore_stack_size *= 2;
464 ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
465 ngs->bscore_stack_size
466 * sizeof(*ngs->bscore_stack));
467 E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
468 }
469
470 ngs->word_lat_idx[w] = ngs->bpidx;
471 be = &(ngs->bp_table[ngs->bpidx]);
472 be->wid = w;
473 be->frame = frame_idx;
474 be->bp = path;
475 be->score = score;
476 be->s_idx = ngs->bss_head;
477 be->valid = TRUE;
478 assert(path != ngs->bpidx);
479
480 /* DICT2PID */
481 /* Get diphone ID for final phone and number of ssids corresponding to it. */
482 be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
483 if (dict_is_single_phone(ps_search_dict(ngs), w)) {
484 be->last2_phone = -1;
485 be->s_idx = -1;
486 rcsize = 0;
487 }
488 else {
489 be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
490 rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
491 be->last_phone, be->last2_phone)->n_ssid;
492 }
493 /* Allocate some space on the bscore_stack for all of these triphones. */
494 for (i = 0; i < rcsize; ++i)
495 ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
496 if (rcsize)
497 ngs->bscore_stack[ngs->bss_head + rc] = score;
498 set_real_wid(ngs, ngs->bpidx);
499
500 ngs->bpidx++;
501 ngs->bss_head += rcsize;
502 }
503}
504
505int
506ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
507{
508 /* End of backpointers for this frame. */
509 int end_bpidx;
510 int best_exit, bp;
511 int32 best_score;
512
513 /* No hypothesis means no exit node! */
514 if (ngs->n_frame == 0)
515 return NO_BP;
516
517 if (frame_idx == -1 || frame_idx >= ngs->n_frame)
518 frame_idx = ngs->n_frame - 1;
519 end_bpidx = ngs->bp_table_idx[frame_idx];
520
521 best_score = WORST_SCORE;
522 best_exit = NO_BP;
523
524 /* Scan back to find a frame with some backpointers in it. */
525 while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
526 --frame_idx;
527 /* This is NOT an error, it just means there is no hypothesis yet. */
528 if (frame_idx < 0)
529 return NO_BP;
530
531 /* Now find the entry for </s> OR the best scoring entry. */
532 assert(end_bpidx < ngs->bp_table_size);
533 for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
534 if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
535 || ngs->bp_table[bp].score BETTER_THAN best_score) {
536 best_score = ngs->bp_table[bp].score;
537 best_exit = bp;
538 }
539 if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
540 break;
541 }
542
543 if (out_best_score) {
544 *out_best_score = best_score;
545 }
546 return best_exit;
547}
548
549char const *
551{
552 ps_search_t *base = ps_search_base(ngs);
553 char *c;
554 size_t len;
555 int bp;
556
557 if (bpidx == NO_BP)
558 return NULL;
559
560 bp = bpidx;
561 len = 0;
562 while (bp != NO_BP) {
563 bptbl_t *be = &ngs->bp_table[bp];
564 bp = be->bp;
565 if (dict_real_word(ps_search_dict(ngs), be->wid))
566 len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
567 }
568
569 ckd_free(base->hyp_str);
570 if (len == 0) {
571 base->hyp_str = NULL;
572 return base->hyp_str;
573 }
574 base->hyp_str = ckd_calloc(1, len);
575
576 bp = bpidx;
577 c = base->hyp_str + len - 1;
578 while (bp != NO_BP) {
579 bptbl_t *be = &ngs->bp_table[bp];
580 size_t len;
581
582 bp = be->bp;
583 if (dict_real_word(ps_search_dict(ngs), be->wid)) {
584 len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
585 c -= len;
586 memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
587 if (c > base->hyp_str) {
588 --c;
589 *c = ' ';
590 }
591 }
592 }
593
594 return base->hyp_str;
595}
596
597void
599{
600 chan_t *hmm, *thmm;
601 xwdssid_t *rssid;
602 int32 i, tmatid, ciphone;
603
604 /* DICT2PID */
605 /* Get pointer to array of triphones for final diphone. */
606 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
607 ciphone = dict_last_phone(ps_search_dict(ngs),w);
608 rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
609 ciphone,
610 dict_second_last_phone(ps_search_dict(ngs),w));
611 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
612 hmm = ngs->word_chan[w];
613 if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
614 hmm = listelem_malloc(ngs->chan_alloc);
615 hmm->next = ngs->word_chan[w];
616 ngs->word_chan[w] = hmm;
617
618 hmm->info.rc_id = 0;
619 hmm->ciphone = ciphone;
620 hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
621 E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
622 rssid->ssid[0], hmm->ciphone,
623 dict_second_last_phone(ps_search_dict(ngs),w),
624 dict_wordstr(ps_search_dict(ngs),w)));
625 }
626 for (i = 1; i < rssid->n_ssid; ++i) {
627 if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
628 thmm = listelem_malloc(ngs->chan_alloc);
629 thmm->next = hmm->next;
630 hmm->next = thmm;
631 hmm = thmm;
632
633 hmm->info.rc_id = i;
634 hmm->ciphone = ciphone;
635 hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
636 E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
637 i, rssid->ssid[i], hmm->ciphone,
638 dict_second_last_phone(ps_search_dict(ngs),w),
639 dict_wordstr(ps_search_dict(ngs),w)));
640 }
641 else
642 hmm = hmm->next;
643 }
644}
645
646void
648{
649 chan_t *hmm, *thmm;
650
651 for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
652 thmm = hmm->next;
653 hmm_deinit(&hmm->hmm);
654 listelem_free(ngs->chan_alloc, hmm);
655 }
656 ngs->word_chan[w] = NULL;
657}
658
659int32
661{
662 /* DICT2PID */
663 /* Get the mapping from right context phone ID to index in the
664 * right context table and the bscore_stack. */
665 if (pbe->last2_phone == -1) {
666 /* No right context for single phone predecessor words. */
667 return pbe->score;
668 }
669 else {
670 xwdssid_t *rssid;
671 /* Find the index for the last diphone of the previous word +
672 * the first phone of the current word. */
673 rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
674 pbe->last_phone, pbe->last2_phone);
675 /* This may be WORST_SCORE, which means that there was no exit
676 * with rcphone as right context. */
677 return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
678 }
679}
680
681/*
682 * Compute acoustic and LM scores for a BPTable entry (segment).
683 */
684void
685ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
686 int32 *out_ascr, int32 *out_lscr)
687{
688 bptbl_t *pbe;
689 int32 start_score;
690
691 /* Start of utterance. */
692 if (be->bp == NO_BP) {
693 *out_ascr = be->score;
694 *out_lscr = 0;
695 return;
696 }
697
698 /* Otherwise, calculate lscr and ascr. */
699 pbe = ngs->bp_table + be->bp;
700 start_score = ngram_search_exit_score(ngs, pbe,
701 dict_first_phone(ps_search_dict(ngs),be->wid));
702 assert(start_score BETTER_THAN WORST_SCORE);
703
704 /* FIXME: These result in positive acoustic scores when filler
705 words have non-filler pronunciations. That whole business
706 is still pretty much broken but at least it doesn't
707 segfault. */
708 if (be->wid == ps_search_silence_wid(ngs)) {
709 *out_lscr = ngs->silpen;
710 }
711 else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
712 *out_lscr = ngs->fillpen;
713 }
714 else {
715 int32 n_used;
716 *out_lscr = ngram_tg_score(ngs->lmset,
717 be->real_wid,
718 pbe->real_wid,
719 pbe->prev_real_wid,
720 &n_used)>>SENSCR_SHIFT;
721 *out_lscr = *out_lscr * lwf;
722 }
723 *out_ascr = be->score - start_score - *out_lscr;
724}
725
726static int
727ngram_search_start(ps_search_t *search)
728{
729 ngram_search_t *ngs = (ngram_search_t *)search;
730
731 ngs->done = FALSE;
732 ngram_model_flush(ngs->lmset);
733 if (ngs->fwdtree)
735 else if (ngs->fwdflat)
737 else
738 return -1;
739 return 0;
740}
741
742static int
743ngram_search_step(ps_search_t *search, int frame_idx)
744{
745 ngram_search_t *ngs = (ngram_search_t *)search;
746
747 if (ngs->fwdtree)
748 return ngram_fwdtree_search(ngs, frame_idx);
749 else if (ngs->fwdflat)
750 return ngram_fwdflat_search(ngs, frame_idx);
751 else
752 return -1;
753}
754
755void
756dump_bptable(ngram_search_t *ngs)
757{
758 int i;
759 E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
760 for (i = 0; i < ngs->bpidx; ++i) {
761 bptbl_t *bpe = ngs->bp_table + i;
762 int j, rcsize;
763
764 E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
765 i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
766 (bpe->bp == -1
767 ? 0 : ngs->bp_table[bpe->bp].frame + 1),
768 bpe->frame, bpe->score, bpe->bp,
769 bpe->real_wid, bpe->prev_real_wid);
770
771 if (bpe->last2_phone == -1)
772 rcsize = 0;
773 else
774 rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
775 bpe->last_phone, bpe->last2_phone)->n_ssid;
776 if (rcsize) {
777 E_INFOCONT("\tbss");
778 for (j = 0; j < rcsize; ++j)
779 if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
780 E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
781 }
782 E_INFOCONT("\n");
783 }
784}
785
786static int
787ngram_search_finish(ps_search_t *search)
788{
789 ngram_search_t *ngs = (ngram_search_t *)search;
790
791 ngs->n_tot_frame += ngs->n_frame;
792 if (ngs->fwdtree) {
794 /* dump_bptable(ngs); */
795
796 /* Now do fwdflat search in its entirety, if requested. */
797 if (ngs->fwdflat) {
798 int i;
799 /* Rewind the acoustic model. */
800 if (acmod_rewind(ps_search_acmod(ngs)) < 0)
801 return -1;
802 /* Now redo search. */
804 i = 0;
805 while (ps_search_acmod(ngs)->n_feat_frame > 0) {
806 int nfr;
807 if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
808 return nfr;
809 acmod_advance(ps_search_acmod(ngs));
810 ++i;
811 }
813 /* And now, we should have a result... */
814 /* dump_bptable(ngs); */
815 }
816 }
817 else if (ngs->fwdflat) {
819 }
820
821 /* Mark the current utterance as done. */
822 ngs->done = TRUE;
823 return 0;
824}
825
826static ps_latlink_t *
827ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
828{
829 ngram_search_t *ngs = (ngram_search_t *)search;
830
831 if (search->last_link == NULL) {
832 search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
833 ngs->bestpath_fwdtree_lw_ratio,
834 ngs->ascale);
835 if (search->last_link == NULL)
836 return NULL;
837 /* Also calculate betas so we can fill in the posterior
838 * probability field in the segmentation. */
839 if (search->post == 0)
840 search->post = ps_lattice_posterior(search->dag, ngs->lmset,
841 ngs->ascale);
842 }
843 if (out_score)
844 *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
845 return search->last_link;
846}
847
848static char const *
849ngram_search_hyp(ps_search_t *search, int32 *out_score)
850{
851 ngram_search_t *ngs = (ngram_search_t *)search;
852
853 /* Only do bestpath search if the utterance is complete. */
854 if (ngs->bestpath && ngs->done) {
855 ps_lattice_t *dag;
856 ps_latlink_t *link;
857 char const *hyp;
858 double n_speech;
859
860 ptmr_reset(&ngs->bestpath_perf);
861 ptmr_start(&ngs->bestpath_perf);
862 if ((dag = ngram_search_lattice(search)) == NULL)
863 return NULL;
864 if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
865 return NULL;
866 hyp = ps_lattice_hyp(dag, link);
867 ptmr_stop(&ngs->bestpath_perf);
868 n_speech = (double)dag->n_frames
869 / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
870 E_INFO("bestpath %.2f CPU %.3f xRT\n",
871 ngs->bestpath_perf.t_cpu,
872 ngs->bestpath_perf.t_cpu / n_speech);
873 E_INFO("bestpath %.2f wall %.3f xRT\n",
874 ngs->bestpath_perf.t_elapsed,
875 ngs->bestpath_perf.t_elapsed / n_speech);
876 return hyp;
877 }
878 else {
879 int32 bpidx;
880
881 /* fwdtree and fwdflat use same backpointer table. */
882 bpidx = ngram_search_find_exit(ngs, -1, out_score);
883 if (bpidx != NO_BP)
884 return ngram_search_bp_hyp(ngs, bpidx);
885 }
886
887 return NULL;
888}
889
890static void
891ngram_search_bp2itor(ps_seg_t *seg, int bp)
892{
893 ngram_search_t *ngs = (ngram_search_t *)seg->search;
894 bptbl_t *be, *pbe;
895
896 be = &ngs->bp_table[bp];
897 pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
898 seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
899 seg->ef = be->frame;
900 seg->sf = pbe ? pbe->frame + 1 : 0;
901 seg->prob = 0; /* Bogus value... */
902 /* Compute acoustic and LM scores for this segment. */
903 if (pbe == NULL) {
904 seg->ascr = be->score;
905 seg->lscr = 0;
906 seg->lback = 0;
907 }
908 else {
909 int32 start_score;
910
911 /* Find ending path score of previous word. */
912 start_score = ngram_search_exit_score(ngs, pbe,
913 dict_first_phone(ps_search_dict(ngs), be->wid));
914 assert(start_score BETTER_THAN WORST_SCORE);
915 if (be->wid == ps_search_silence_wid(ngs)) {
916 seg->lscr = ngs->silpen;
917 }
918 else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
919 seg->lscr = ngs->fillpen;
920 }
921 else {
922 seg->lscr = ngram_tg_score(ngs->lmset,
923 be->real_wid,
924 pbe->real_wid,
925 pbe->prev_real_wid,
926 &seg->lback)>>SENSCR_SHIFT;
927 seg->lscr = (int32)(seg->lscr * seg->lwf);
928 }
929 seg->ascr = be->score - start_score - seg->lscr;
930 }
931}
932
933static void
934ngram_bp_seg_free(ps_seg_t *seg)
935{
936 bptbl_seg_t *itor = (bptbl_seg_t *)seg;
937
938 ckd_free(itor->bpidx);
939 ckd_free(itor);
940}
941
942static ps_seg_t *
943ngram_bp_seg_next(ps_seg_t *seg)
944{
945 bptbl_seg_t *itor = (bptbl_seg_t *)seg;
946
947 if (++itor->cur == itor->n_bpidx) {
948 ngram_bp_seg_free(seg);
949 return NULL;
950 }
951
952 ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
953 return seg;
954}
955
956static ps_segfuncs_t ngram_bp_segfuncs = {
957 /* seg_next */ ngram_bp_seg_next,
958 /* seg_free */ ngram_bp_seg_free
959};
960
961static ps_seg_t *
962ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
963{
964 bptbl_seg_t *itor;
965 int bp, cur;
966
967 /* Calling this an "iterator" is a bit of a misnomer since we have
968 * to get the entire backtrace in order to produce it. On the
969 * other hand, all we actually need is the bptbl IDs, and we can
970 * allocate a fixed-size array of them. */
971 itor = ckd_calloc(1, sizeof(*itor));
972 itor->base.vt = &ngram_bp_segfuncs;
973 itor->base.search = ps_search_base(ngs);
974 itor->base.lwf = lwf;
975 itor->n_bpidx = 0;
976 bp = bpidx;
977 while (bp != NO_BP) {
978 bptbl_t *be = &ngs->bp_table[bp];
979 bp = be->bp;
980 ++itor->n_bpidx;
981 }
982 if (itor->n_bpidx == 0) {
983 ckd_free(itor);
984 return NULL;
985 }
986 itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
987 cur = itor->n_bpidx - 1;
988 bp = bpidx;
989 while (bp != NO_BP) {
990 bptbl_t *be = &ngs->bp_table[bp];
991 itor->bpidx[cur] = bp;
992 bp = be->bp;
993 --cur;
994 }
995
996 /* Fill in relevant fields for first element. */
997 ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
998
999 return (ps_seg_t *)itor;
1000}
1001
1002static ps_seg_t *
1003ngram_search_seg_iter(ps_search_t *search)
1004{
1005 ngram_search_t *ngs = (ngram_search_t *)search;
1006
1007 /* Only do bestpath search if the utterance is done. */
1008 if (ngs->bestpath && ngs->done) {
1009 ps_lattice_t *dag;
1010 ps_latlink_t *link;
1011 double n_speech;
1012 ps_seg_t *itor;
1013
1014 ptmr_reset(&ngs->bestpath_perf);
1015 ptmr_start(&ngs->bestpath_perf);
1016 if ((dag = ngram_search_lattice(search)) == NULL)
1017 return NULL;
1018 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1019 return NULL;
1020 itor = ps_lattice_seg_iter(dag, link,
1021 ngs->bestpath_fwdtree_lw_ratio);
1022 ptmr_stop(&ngs->bestpath_perf);
1023 n_speech = (double)dag->n_frames
1024 / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1025 E_INFO("bestpath %.2f CPU %.3f xRT\n",
1026 ngs->bestpath_perf.t_cpu,
1027 ngs->bestpath_perf.t_cpu / n_speech);
1028 E_INFO("bestpath %.2f wall %.3f xRT\n",
1029 ngs->bestpath_perf.t_elapsed,
1030 ngs->bestpath_perf.t_elapsed / n_speech);
1031 return itor;
1032 }
1033 else {
1034 int32 bpidx;
1035
1036 /* fwdtree and fwdflat use same backpointer table. */
1037 bpidx = ngram_search_find_exit(ngs, -1, NULL);
1038 return ngram_search_bp_iter(ngs, bpidx,
1039 /* but different language weights... */
1040 (ngs->done && ngs->fwdflat)
1041 ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1042 }
1043
1044 return NULL;
1045}
1046
1047static int32
1048ngram_search_prob(ps_search_t *search)
1049{
1050 ngram_search_t *ngs = (ngram_search_t *)search;
1051
1052 /* Only do bestpath search if the utterance is done. */
1053 if (ngs->bestpath && ngs->done) {
1054 ps_lattice_t *dag;
1055 ps_latlink_t *link;
1056
1057 if ((dag = ngram_search_lattice(search)) == NULL)
1058 return 0;
1059 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1060 return 0;
1061 return search->post;
1062 }
1063 else {
1064 /* FIXME: Give some kind of good estimate here, eventually. */
1065 return 0;
1066 }
1067}
1068
1069static void
1070create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
1071{
1072 bptbl_t *bp_ptr;
1073 int32 i;
1074
1075 for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1076 int32 sf, ef, wid;
1077 ps_latnode_t *node;
1078
1079 /* Skip invalid backpointers (these result from -maxwpf pruning) */
1080 if (!bp_ptr->valid)
1081 continue;
1082
1083 sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
1084 ef = bp_ptr->frame;
1085 wid = bp_ptr->wid;
1086
1087 assert(ef < dag->n_frames);
1088 /* Skip non-final </s> entries. */
1089 if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
1090 continue;
1091
1092 /* Skip if word not in LM */
1093 if ((!dict_filler_word(ps_search_dict(ngs), wid))
1094 && (!ngram_model_set_known_wid(ngs->lmset,
1095 dict_basewid(ps_search_dict(ngs), wid))))
1096 continue;
1097
1098 /* See if bptbl entry <wid,sf> already in lattice */
1099 for (node = dag->nodes; node; node = node->next) {
1100 if ((node->wid == wid) && (node->sf == sf))
1101 break;
1102 }
1103
1104 /* For the moment, store bptbl indices in node.{fef,lef} */
1105 if (node)
1106 node->lef = i;
1107 else {
1108 /* New node; link to head of list */
1109 node = listelem_malloc(dag->latnode_alloc);
1110 node->wid = wid;
1111 node->sf = sf; /* This is a frame index. */
1112 node->fef = node->lef = i; /* These are backpointer indices (argh) */
1113 node->reachable = FALSE;
1114 node->entries = NULL;
1115 node->exits = NULL;
1116
1117 /* NOTE: This creates the list of nodes in reverse
1118 * topological order, i.e. a node always precedes its
1119 * antecedents in this list. */
1120 node->next = dag->nodes;
1121 dag->nodes = node;
1122 ++dag->n_nodes;
1123 }
1124 }
1125}
1126
1127static ps_latnode_t *
1128find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
1129{
1130 ps_latnode_t *node;
1131
1132 /* Find start node <s>.0 */
1133 for (node = dag->nodes; node; node = node->next) {
1134 if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
1135 break;
1136 }
1137 if (!node) {
1138 /* This is probably impossible. */
1139 E_ERROR("Couldn't find <s> in first frame\n");
1140 return NULL;
1141 }
1142 return node;
1143}
1144
1145static ps_latnode_t *
1146find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
1147{
1148 ps_latnode_t *node;
1149 int32 ef, bestbp, bp, bestscore;
1150
1151 /* Find final node </s>.last_frame; nothing can follow this node */
1152 for (node = dag->nodes; node; node = node->next) {
1153 int32 lef = ngs->bp_table[node->lef].frame;
1154 if ((node->wid == ps_search_finish_wid(ngs))
1155 && (lef == dag->n_frames - 1))
1156 break;
1157 }
1158 if (node != NULL)
1159 return node;
1160
1161 /* It is quite likely that no </s> exited in the last frame. So,
1162 * find the node corresponding to the best exit. */
1163 /* Find the last frame containing a word exit. */
1164 for (ef = dag->n_frames - 1;
1165 ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1166 --ef);
1167 if (ef < 0) {
1168 E_ERROR("Empty backpointer table: can not build DAG.\n");
1169 return NULL;
1170 }
1171
1172 /* Find best word exit in that frame. */
1173 bestscore = WORST_SCORE;
1174 bestbp = NO_BP;
1175 for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1176 int32 n_used, l_scr, wid, prev_wid;
1177 wid = ngs->bp_table[bp].real_wid;
1178 prev_wid = ngs->bp_table[bp].prev_real_wid;
1179 /* Always prefer </s>, of which there will only be one per frame. */
1180 if (wid == ps_search_finish_wid(ngs)) {
1181 bestbp = bp;
1182 break;
1183 }
1184 l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
1185 wid, prev_wid, &n_used) >>SENSCR_SHIFT;
1186 l_scr = l_scr * lwf;
1187 if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
1188 bestscore = ngs->bp_table[bp].score + l_scr;
1189 bestbp = bp;
1190 }
1191 }
1192 if (bestbp == NO_BP) {
1193 E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
1194 return NULL;
1195 }
1196 E_INFO("</s> not found in last frame, using %s.%d instead\n",
1197 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
1198
1199 /* Now find the node that corresponds to it. */
1200 for (node = dag->nodes; node; node = node->next) {
1201 if (node->lef == bestbp)
1202 return node;
1203 }
1204
1205 /* FIXME: This seems to happen a lot! */
1206 E_ERROR("Failed to find DAG node corresponding to %s\n",
1207 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
1208 return NULL;
1209}
1210
1211/*
1212 * Build lattice from bptable.
1213 */
1216{
1217 int32 i, score, ascr, lscr;
1218 ps_latnode_t *node, *from, *to;
1219 ngram_search_t *ngs;
1220 ps_lattice_t *dag;
1221 int min_endfr, nlink;
1222 float lwf;
1223
1224 ngs = (ngram_search_t *)search;
1225 min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
1226
1227 /* If the best score is WORST_SCORE or worse, there is no way to
1228 * make a lattice. */
1230 return NULL;
1231
1232 /* Check to see if a lattice has previously been created over the
1233 * same number of frames, and reuse it if so. */
1234 if (search->dag && search->dag->n_frames == ngs->n_frame)
1235 return search->dag;
1236
1237 /* Nope, create a new one. */
1238 ps_lattice_free(search->dag);
1239 search->dag = NULL;
1240 dag = ps_lattice_init_search(search, ngs->n_frame);
1241 /* Compute these such that they agree with the fwdtree language weight. */
1242 lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1243 create_dag_nodes(ngs, dag);
1244 if ((dag->start = find_start_node(ngs, dag)) == NULL)
1245 goto error_out;
1246 if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1247 goto error_out;
1248 E_INFO("lattice start node %s.%d end node %s.%d\n",
1249 dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
1250 dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
1251
1252 ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
1253 &dag->final_node_ascr, &lscr);
1254
1255 /*
1256 * At this point, dag->nodes is ordered such that nodes earlier in
1257 * the list can follow (in time) those later in the list, but not
1258 * vice versa (see above - also note that adjacency is purely
1259 * determined by time which is why we can make this claim). Now
1260 * create precedence links and simultanesously mark all nodes that
1261 * can reach dag->end. (All nodes are reached from dag->start
1262 * simply by definition - they were created that way).
1263 *
1264 * Note that this also means that any nodes before dag->end in the
1265 * list can be discarded, meaning that dag->end will always be
1266 * equal to dag->nodes (FIXME: except when loading from a file but
1267 * we can fix that...)
1268 */
1269 i = 0;
1270 while (dag->nodes && dag->nodes != dag->end) {
1271 ps_latnode_t *next = dag->nodes->next;
1272 listelem_free(dag->latnode_alloc, dag->nodes);
1273 dag->nodes = next;
1274 ++i;
1275 }
1276 E_INFO("Eliminated %d nodes before end node\n", i);
1277 dag->end->reachable = TRUE;
1278 nlink = 0;
1279 for (to = dag->end; to; to = to->next) {
1280 int fef, lef;
1281
1282 /* Skip if not reachable; it will never be reachable from dag->end */
1283 if (!to->reachable)
1284 continue;
1285
1286 /* Prune nodes with too few endpoints - heuristic
1287 borrowed from Sphinx3 */
1288 fef = ngs->bp_table[to->fef].frame;
1289 lef = ngs->bp_table[to->lef].frame;
1290 if (to != dag->end && lef - fef < min_endfr) {
1291 to->reachable = FALSE;
1292 continue;
1293 }
1294
1295 /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
1296 for (from = to->next; from; from = from->next) {
1297 bptbl_t *from_bpe;
1298
1299 fef = ngs->bp_table[from->fef].frame;
1300 lef = ngs->bp_table[from->lef].frame;
1301
1302 if ((to->sf <= fef) || (to->sf > lef + 1))
1303 continue;
1304 if (lef - fef < min_endfr) {
1305 assert(!from->reachable);
1306 continue;
1307 }
1308
1309 /* Find bptable entry for "from" that exactly precedes "to" */
1310 i = from->fef;
1311 from_bpe = ngs->bp_table + i;
1312 for (; i <= from->lef; i++, from_bpe++) {
1313 if (from_bpe->wid != from->wid)
1314 continue;
1315 if (from_bpe->frame >= to->sf - 1)
1316 break;
1317 }
1318
1319 if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
1320 continue;
1321
1322 /* Find acoustic score from.sf->to.sf-1 with right context = to */
1323 /* This gives us from_bpe's best acoustic score. */
1324 ngram_compute_seg_score(ngs, from_bpe, lwf,
1325 &ascr, &lscr);
1326 /* Now find the exact path score for from->to, including
1327 * the appropriate final triphone. In fact this might not
1328 * exist. */
1329 score = ngram_search_exit_score(ngs, from_bpe,
1330 dict_first_phone(ps_search_dict(ngs), to->wid));
1331 /* Does not exist. Can't create a link here. */
1332 if (score == WORST_SCORE)
1333 continue;
1334 /* Adjust the arc score to match the correct triphone. */
1335 else
1336 score = ascr + (score - from_bpe->score);
1337 if (score BETTER_THAN 0) {
1338 /* Scores must be negative, or Bad Things will happen.
1339 In general, they are, except in corner cases
1340 involving filler words. We don't want to throw any
1341 links away so we'll keep these, but with some
1342 arbitrarily improbable but recognizable score. */
1343 ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
1344 ++nlink;
1345 from->reachable = TRUE;
1346 }
1347 else if (score BETTER_THAN WORST_SCORE) {
1348 ps_lattice_link(dag, from, to, score, from_bpe->frame);
1349 ++nlink;
1350 from->reachable = TRUE;
1351 }
1352 }
1353 }
1354
1355 /* There must be at least one path between dag->start and dag->end */
1356 if (!dag->start->reachable) {
1357 E_ERROR("End node of lattice isolated; unreachable\n");
1358 goto error_out;
1359 }
1360
1361 for (node = dag->nodes; node; node = node->next) {
1362 /* Change node->{fef,lef} from bptbl indices to frames. */
1363 node->fef = ngs->bp_table[node->fef].frame;
1364 node->lef = ngs->bp_table[node->lef].frame;
1365 /* Find base wid for nodes. */
1366 node->basewid = dict_basewid(search->dict, node->wid);
1367 }
1368
1369 /* Link nodes with alternate pronunciations at the same timepoint. */
1370 for (node = dag->nodes; node; node = node->next) {
1371 ps_latnode_t *alt;
1372 /* Scan forward to find the next alternate, then stop. */
1373 for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
1374 if (alt->basewid == node->basewid) {
1375 alt->alt = node->alt;
1376 node->alt = alt;
1377 break;
1378 }
1379 }
1380 }
1381 E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
1382
1383 /* Minor hack: If the final node is a filler word and not </s>,
1384 * then set its base word ID to </s>, so that the language model
1385 * scores won't be screwed up. */
1386 if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
1387 dag->end->basewid = ps_search_finish_wid(ngs);
1388
1389 /* Free nodes unreachable from dag->end and their links */
1391
1392 /* Add silprob and fillprob to corresponding links */
1393 ps_lattice_penalize_fillers(dag, ngs->silpen, ngs->fillpen);
1394
1395 search->dag = dag;
1396 return dag;
1397
1398error_out:
1399 ps_lattice_free(dag);
1400 return NULL;
1401}
1402
1403void ngram_search_set_lm(ngram_model_t *lm)
1404{
1405 default_lm = ngram_model_retain(lm);
1406}
1407
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:410
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:899
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:877
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:413
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition: dict.c:427
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:100
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
void ngram_search_set_lm(ngram_model_t *lm)
Sets the global language model.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:647
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:598
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:660
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:140
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
Definition: ngram_search.c:329
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
Find the best word exit for the current frame in the backpointer table.
Definition: ngram_search.c:506
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.
Definition: ngram_search.c:550
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:383
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Definition: ngram_search.c:289
N-Gram based multi-pass search ("FBS")
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
Flat lexicon based Viterbi search.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
Lexicon tree based Viterbi search.
Internal implementation of PocketSphinx decoder.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:639
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
Definition: ps_lattice.c:106
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:830
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:174
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1006
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists,...
Definition: ps_lattice.c:65
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1446
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1215
Word graph search implementation.
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
Acoustic model structure.
Definition: acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
int32 wid
Word index.
Definition: ngram_search.h:113
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 bp
Back Pointer.
Definition: ngram_search.h:114
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
int16 last_phone
last phone of this word
Definition: ngram_search.h:119
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
Segmentation "iterator" for backpointer table results.
Definition: ngram_search.h:126
int16 cur
Current position in bpidx.
Definition: ngram_search.h:130
int32 * bpidx
Sequence of backpointer IDs.
Definition: ngram_search.h:128
int16 n_bpidx
Number of backpointer IDs.
Definition: ngram_search.h:129
ps_seg_t base
Base structure.
Definition: ngram_search.h:127
Lexical tree node data type.
Definition: ngram_search.h:64
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
a structure for a dictionary.
Definition: dict.h:76
N-Gram search module structure.
Definition: ngram_search.h:197
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
float32 ascale
Acoustic score scale for posterior probabilities.
Definition: ngram_search.h:333
listelem_alloc_t * latnode_alloc
For latnode_t.
Definition: ngram_search.h:213
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
Definition: ngram_search.h:307
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
listelem_alloc_t * root_chan_alloc
For root_chan_t.
Definition: ngram_search.h:212
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
ps_latnode_t ** frm_wordlist
List of active words in each frame.
Definition: ngram_search.h:316
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
latlink_list_t * entries
Links into this node.
frame_idx_t sf
Start frame.
latlink_list_t * exits
Links out of this node.
int32 fef
First end frame.
int32 lef
Last end frame.
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 basewid
Dictionary base word id.
int16 reachable
From.
int32 wid
Dictionary word id.
Word graph structure used in bestpath/nbest search.
ps_latnode_t * end
Ending node.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
frame_idx_t n_frames
Number of frames for this utterance.
ps_latnode_t * start
Starting node.
ps_latnode_t * nodes
List of all nodes.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
int32 n_nodes
Number of nodes in this lattice.
Base structure for search module.
int32 post
Utterance posterior probability.
ps_lattice_t * dag
Current hypothesis word graph.
dict_t * dict
Pronunciation dictionary.
ps_latlink_t * last_link
Final link in best path.
char * hyp_str
Current hypothesis string.
int32 n_words
Number of words known to search (may be less than in the dictionary)
V-table for search algorithm.
Base structure for hypothesis segmentation iterator.
ps_search_t * search
Search object from whence this came.
float32 lwf
Language weight factor (for second-pass searches)
int32 lback
Language model backoff.
ps_segfuncs_t * vt
V-table of seg methods.
int32 lscr
Language model score.
int32 ascr
Acoustic score.
frame_idx_t sf
Start frame.
char const * word
Word string (pointer into dictionary hash)
frame_idx_t ef
End frame.
int32 prob
Log posterior probability.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state].
Definition: tmat.h:56
cross word triphone model structure
Definition: dict2pid.h:73
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
int32 n_ssid
#Unique ssid in above, compressed ssid list
Definition: dict2pid.h:76
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:74