SRIPATH ?= /home/master/98/r98922053/srilm-1.5.10 MACHINE_TYPE ?= i686-m64 CXX = g++ CXXFLAGS = -O3 -I$(SRIPATH)/include -w --std=c++11 vpath lib%.a $(SRIPATH)/lib/$(MACHINE_TYPE) TARGET = ngram_test SRC = ngram_test.cpp OBJ = $(SRC:.cpp=.o) .PHONY: all clean all: $(TARGET) $(TARGET): $(OBJ) -loolm -ldstruct -lmisc $(CXX) $(LDFLAGS) -o $@ $^ %.o: %.cpp $(CXX) $(CXXFLAGS) -c $< clean: $(RM) $(OBJ) $(TARGET)
source code ngram_test.cpp為:
#include <stdio.h> #include "Ngram.h"
int main(int argc, char *argv[]) {
int ngram_order = 3; Vocab voc; Ngram lm( voc, ngram_order ); { const char lm_filename[] = "./corpus.lm"; File lmFile( lm_filename, "r" ); lm.read(lmFile); lmFile.close(); } VocabIndex wid = voc.getIndex("囧"); if(wid == Vocab_None) { printf("No word with wid = %d\n", wid); printf("where Vocab_None is %d\n", Vocab_None); } wid = voc.getIndex("患者"); VocabIndex context[] = {voc.getIndex("癮") , voc.getIndex("毒"), Vocab_None}; printf("log Prob(患者|毒-癮) = %f\n", lm.wordProb(wid, context)); }
如此就可以利用lm.wordProb來得到language model的機率。示範檔下載: ngram_test.tar.gz (corpus.lm非char-based LM!!)
使用前請先修改Makefile中的SRIPATH以及MACHINE_TYPE!!
VocabIndex wid = voc.getIndex("囧"); if(wid != Vocab_None) { ... }
ㄅ 八 匕 卜 不 卞 巴 比 丙 包 ... 八 八 匕 匕 不 不 ... ... ㄆ 仆 匹 片 丕 叵 平 扒 扑 疋 ... 仆 仆 匹 匹 片 片 ... ... ㄦ 二 而 耳 兒 洱 貳 爾 餌 邇 ... 二 二 而 而 兒 兒 ... ...注意後面每一個字之間都有空格。
disambig -text $file -map $map -lm $language_model -order $order
$file: input file (記得轉換成 character-based 分隔)make[1]: /home/hamigwa/usr/src/srilm-1.5.10/sbin/machine-type: Command not found
如果編譯錯誤的訊息中有出現上述語句,那很有可能是因為你的系統中沒有安裝csh11 warnings and 1 error genertated.
make[2]: *** [../obj/macosx/LatticeIndex.o] Error 1