439 lines
6.7 KiB
Text
439 lines
6.7 KiB
Text
|
|
#############################################################################
|
||
|
|
## Copyright (c) 1996, Carnegie Mellon University, Cambridge University,
|
||
|
|
## Ronald Rosenfeld and Philip Clarkson
|
||
|
|
#############################################################################
|
||
|
|
=============================================================================
|
||
|
|
=============== This file was produced by the CMU-Cambridge ===============
|
||
|
|
=============== Statistical Language Modeling Toolkit ===============
|
||
|
|
=============================================================================
|
||
|
|
This is a 2-gram language model, based on a vocabulary of 28 words,
|
||
|
|
which begins "<s>", "a", "b"...
|
||
|
|
This is an OPEN-vocabulary model (type 1)
|
||
|
|
(OOVs were mapped to UNK, which is treated as any other vocabulary word)
|
||
|
|
Absolute discounting was applied.
|
||
|
|
1-gram discounting constant : 1
|
||
|
|
2-gram discounting constant : 0.53271
|
||
|
|
This file is in the ARPA-standard format introduced by Doug Paul.
|
||
|
|
|
||
|
|
p(wd3|wd1,wd2)= if(trigram exists) p_3(wd1,wd2,wd3)
|
||
|
|
else if(bigram w1,w2 exists) bo_wt_2(w1,w2)*p(wd3|wd2)
|
||
|
|
else p(wd3|w2)
|
||
|
|
|
||
|
|
p(wd2|wd1)= if(bigram exists) p_2(wd1,wd2)
|
||
|
|
else bo_wt_1(wd1)*p_1(wd2)
|
||
|
|
|
||
|
|
All probs and back-off weights (bo_wt) are given in log10 form.
|
||
|
|
|
||
|
|
Data formats:
|
||
|
|
|
||
|
|
Beginning of data mark: \data\
|
||
|
|
ngram 1=nr # number of 1-grams
|
||
|
|
ngram 2=nr # number of 2-grams
|
||
|
|
|
||
|
|
\1-grams:
|
||
|
|
p_1 wd_1 bo_wt_1
|
||
|
|
\2-grams:
|
||
|
|
p_2 wd_1 wd_2
|
||
|
|
|
||
|
|
end of data mark: \end\
|
||
|
|
|
||
|
|
\data\
|
||
|
|
ngram 1=29
|
||
|
|
ngram 2=361
|
||
|
|
|
||
|
|
\1-grams:
|
||
|
|
-99.9990 <UNK> 0.0000
|
||
|
|
-98.9947 <s> 0.0000
|
||
|
|
-1.1423 a -0.5272
|
||
|
|
-1.4027 b -0.8846
|
||
|
|
-1.6368 c -0.4158
|
||
|
|
-1.4944 d -0.0486
|
||
|
|
-1.0238 e 0.0094
|
||
|
|
-1.9290 f -0.4012
|
||
|
|
-1.4519 g -0.5389
|
||
|
|
-1.2856 h 0.7485
|
||
|
|
-1.1808 i -0.3408
|
||
|
|
-2.2130 j -0.6851
|
||
|
|
-1.8084 k 0.0629
|
||
|
|
-1.2935 l -0.3457
|
||
|
|
-1.5946 m -0.6264
|
||
|
|
-1.2164 n 0.4262
|
||
|
|
-1.2266 o -0.1643
|
||
|
|
-1.4461 p -0.5694
|
||
|
|
-99.9990 q -0.2573
|
||
|
|
-1.0505 r -0.1338
|
||
|
|
-1.2266 s -0.5507
|
||
|
|
-1.0965 t -0.5775
|
||
|
|
-1.4349 u -0.6301
|
||
|
|
-1.8218 v -1.0660
|
||
|
|
-1.6935 w -0.0341
|
||
|
|
-99.9990 x -0.2669
|
||
|
|
-1.7955 y 0.0357
|
||
|
|
-2.4819 z -0.2479
|
||
|
|
-99.9990 </s> 0.0000
|
||
|
|
|
||
|
|
\2-grams:
|
||
|
|
-2.5179 a a
|
||
|
|
-1.2113 a c
|
||
|
|
-1.3144 a d
|
||
|
|
-1.7953 a e
|
||
|
|
-2.0210 a f
|
||
|
|
-1.5375 a h
|
||
|
|
-2.5179 a k
|
||
|
|
-1.0917 a l
|
||
|
|
-1.2113 a m
|
||
|
|
-0.8360 a n
|
||
|
|
-1.7953 a p
|
||
|
|
-0.6256 a r
|
||
|
|
-1.2113 a s
|
||
|
|
-1.2113 a t
|
||
|
|
-2.5179 a u
|
||
|
|
-1.2113 a v
|
||
|
|
-2.0210 a w
|
||
|
|
-1.5375 a y
|
||
|
|
-1.3894 b a
|
||
|
|
-2.2598 b b
|
||
|
|
-1.0563 b e
|
||
|
|
-2.2598 b i
|
||
|
|
-2.2598 b l
|
||
|
|
-1.3894 b o
|
||
|
|
-0.9096 b r
|
||
|
|
-1.5372 b s
|
||
|
|
-0.2179 b u
|
||
|
|
-0.8882 c a
|
||
|
|
-2.0294 c b
|
||
|
|
-2.0294 c c
|
||
|
|
-0.8882 c e
|
||
|
|
-0.6032 c h
|
||
|
|
-2.0294 c i
|
||
|
|
-0.8258 c k
|
||
|
|
-1.5325 c l
|
||
|
|
-2.0294 c m
|
||
|
|
-0.9612 c o
|
||
|
|
-2.0294 c p
|
||
|
|
-2.0294 c r
|
||
|
|
-2.0294 c y
|
||
|
|
-0.8626 d a
|
||
|
|
-2.1693 d b
|
||
|
|
-2.1693 d c
|
||
|
|
-1.6723 d d
|
||
|
|
-1.0281 d e
|
||
|
|
-1.4466 d f
|
||
|
|
-1.6723 d h
|
||
|
|
-1.6723 d i
|
||
|
|
-2.1693 d j
|
||
|
|
-2.1693 d l
|
||
|
|
-0.9657 d o
|
||
|
|
-0.9111 d p
|
||
|
|
-1.1011 d r
|
||
|
|
-1.1888 d s
|
||
|
|
-2.1693 d t
|
||
|
|
-2.1693 d u
|
||
|
|
-1.0281 d w
|
||
|
|
-1.6723 d y
|
||
|
|
-1.1450 e a
|
||
|
|
-1.6553 e b
|
||
|
|
-2.1388 e c
|
||
|
|
-1.6553 e d
|
||
|
|
-1.4322 e e
|
||
|
|
-2.1388 e f
|
||
|
|
-1.7654 e g
|
||
|
|
-2.6358 e h
|
||
|
|
-1.6553 e i
|
||
|
|
-2.1388 e j
|
||
|
|
-2.6358 e k
|
||
|
|
-0.9736 e l
|
||
|
|
-1.6553 e m
|
||
|
|
-0.8994 e n
|
||
|
|
-1.0631 e p
|
||
|
|
-0.8360 e r
|
||
|
|
-1.2459 e s
|
||
|
|
-1.5676 e t
|
||
|
|
-2.6358 e u
|
||
|
|
-1.3291 e v
|
||
|
|
-1.1450 e w
|
||
|
|
-1.5676 e y
|
||
|
|
-2.6358 e z
|
||
|
|
-1.7454 f a
|
||
|
|
-1.7454 f b
|
||
|
|
-1.7454 f c
|
||
|
|
-0.7649 f e
|
||
|
|
-1.2485 f f
|
||
|
|
-0.7649 f i
|
||
|
|
-1.0228 f o
|
||
|
|
-0.7649 f r
|
||
|
|
-1.0228 f t
|
||
|
|
-1.3408 g a
|
||
|
|
-2.2112 g c
|
||
|
|
-2.2112 g d
|
||
|
|
-0.1778 g h
|
||
|
|
-1.1430 g i
|
||
|
|
-1.2308 g l
|
||
|
|
-2.2112 g m
|
||
|
|
-2.2112 g n
|
||
|
|
-2.2112 g o
|
||
|
|
-1.7143 g r
|
||
|
|
-2.2112 g s
|
||
|
|
-2.2112 g t
|
||
|
|
-2.2112 g v
|
||
|
|
-0.6749 h a
|
||
|
|
-1.3953 h b
|
||
|
|
-1.6531 h c
|
||
|
|
-2.3757 h d
|
||
|
|
-1.2346 h e
|
||
|
|
-1.5053 h f
|
||
|
|
-1.6531 h g
|
||
|
|
-1.5053 h h
|
||
|
|
-1.3076 h i
|
||
|
|
-1.8788 h j
|
||
|
|
-1.3953 h l
|
||
|
|
-1.5053 h m
|
||
|
|
-1.5053 h n
|
||
|
|
-0.9496 h o
|
||
|
|
-1.5053 h p
|
||
|
|
-1.6531 h r
|
||
|
|
-1.2346 h s
|
||
|
|
-1.3076 h t
|
||
|
|
-2.3757 h u
|
||
|
|
-1.8788 h v
|
||
|
|
-2.3757 h w
|
||
|
|
-1.8788 h y
|
||
|
|
-1.6092 i a
|
||
|
|
-1.7570 i b
|
||
|
|
-1.0534 i c
|
||
|
|
-1.6092 i d
|
||
|
|
-1.2215 i e
|
||
|
|
-1.6092 i f
|
||
|
|
-1.7570 i g
|
||
|
|
-2.4796 i j
|
||
|
|
-2.4796 i k
|
||
|
|
-1.2215 i l
|
||
|
|
-1.4992 i m
|
||
|
|
-0.8174 i n
|
||
|
|
-2.4796 i o
|
||
|
|
-2.4796 i p
|
||
|
|
-2.4796 i r
|
||
|
|
-1.4992 i s
|
||
|
|
-0.4638 i t
|
||
|
|
-2.4796 i u
|
||
|
|
-1.4992 i v
|
||
|
|
-0.9796 j a
|
||
|
|
-0.4961 j e
|
||
|
|
-0.4084 j o
|
||
|
|
-1.4765 j u
|
||
|
|
-0.9915 k a
|
||
|
|
-1.3650 k b
|
||
|
|
-1.8619 k c
|
||
|
|
-1.8619 k d
|
||
|
|
-0.7208 k e
|
||
|
|
-1.8619 k f
|
||
|
|
-1.8619 k g
|
||
|
|
-1.8619 k h
|
||
|
|
-1.1393 k i
|
||
|
|
-1.8619 k j
|
||
|
|
-1.3650 k k
|
||
|
|
-1.8619 k l
|
||
|
|
-1.3650 k p
|
||
|
|
-1.8619 k r
|
||
|
|
-1.8619 k s
|
||
|
|
-1.8619 k u
|
||
|
|
-1.3650 k w
|
||
|
|
-1.3650 k y
|
||
|
|
-0.9081 l a
|
||
|
|
-1.6452 l b
|
||
|
|
-2.3678 l c
|
||
|
|
-1.0176 l d
|
||
|
|
-0.6488 l e
|
||
|
|
-2.3678 l f
|
||
|
|
-2.3678 l g
|
||
|
|
-0.9780 l i
|
||
|
|
-2.3678 l j
|
||
|
|
-1.8709 l k
|
||
|
|
-0.8480 l l
|
||
|
|
-1.6452 l m
|
||
|
|
-1.3874 l o
|
||
|
|
-2.3678 l p
|
||
|
|
-1.4974 l s
|
||
|
|
-2.3678 l t
|
||
|
|
-1.6452 l u
|
||
|
|
-2.3678 l w
|
||
|
|
-1.6452 l y
|
||
|
|
-2.3678 l z
|
||
|
|
-0.4740 m a
|
||
|
|
-2.0708 m c
|
||
|
|
-0.9296 m e
|
||
|
|
-1.0903 m i
|
||
|
|
-1.5738 m m
|
||
|
|
-2.0708 m n
|
||
|
|
-0.7205 m o
|
||
|
|
-1.2004 m p
|
||
|
|
-1.3481 m s
|
||
|
|
-2.0708 m u
|
||
|
|
-2.0708 m y
|
||
|
|
-1.1862 n a
|
||
|
|
-1.9474 n b
|
||
|
|
-1.3032 n c
|
||
|
|
-1.0182 n d
|
||
|
|
-0.7822 n e
|
||
|
|
-1.7217 n f
|
||
|
|
-1.2408 n g
|
||
|
|
-2.4444 n h
|
||
|
|
-1.0941 n i
|
||
|
|
-2.4444 n j
|
||
|
|
-1.7217 n k
|
||
|
|
-2.4444 n l
|
||
|
|
-1.5740 n m
|
||
|
|
-1.2408 n n
|
||
|
|
-1.5740 n o
|
||
|
|
-1.2408 n p
|
||
|
|
-1.7217 n r
|
||
|
|
-1.3032 n s
|
||
|
|
-1.1862 n t
|
||
|
|
-1.7217 n u
|
||
|
|
-2.4444 n x
|
||
|
|
-2.4444 n y
|
||
|
|
-2.4444 n z
|
||
|
|
-1.5638 o a
|
||
|
|
-1.5638 o b
|
||
|
|
-2.4342 o c
|
||
|
|
-1.1276 o d
|
||
|
|
-2.4342 o e
|
||
|
|
-1.7116 o f
|
||
|
|
-1.7116 o h
|
||
|
|
-2.4342 o k
|
||
|
|
-1.2931 o l
|
||
|
|
-1.2931 o m
|
||
|
|
-0.6811 o n
|
||
|
|
-1.0840 o o
|
||
|
|
-1.7116 o p
|
||
|
|
-0.8374 o r
|
||
|
|
-1.4538 o s
|
||
|
|
-1.9373 o t
|
||
|
|
-1.2931 o u
|
||
|
|
-1.1761 o w
|
||
|
|
-1.9373 o y
|
||
|
|
-2.4342 o z
|
||
|
|
-1.4943 p a
|
||
|
|
-2.2169 p e
|
||
|
|
-1.4943 p g
|
||
|
|
-1.1487 p h
|
||
|
|
-0.2288 p i
|
||
|
|
-1.2364 p l
|
||
|
|
-2.2169 p n
|
||
|
|
-1.3465 p o
|
||
|
|
-1.4943 p r
|
||
|
|
-1.4943 p s
|
||
|
|
-2.2169 p u
|
||
|
|
-2.2169 p y
|
||
|
|
-0.3304 q u
|
||
|
|
-1.2589 r a
|
||
|
|
-1.7388 r b
|
||
|
|
-1.8865 r c
|
||
|
|
-1.4056 r d
|
||
|
|
-0.9677 r e
|
||
|
|
-2.6092 r f
|
||
|
|
-0.6024 r g
|
||
|
|
-1.0124 r i
|
||
|
|
-1.3510 r k
|
||
|
|
-1.3510 r l
|
||
|
|
-2.1122 r m
|
||
|
|
-1.6287 r n
|
||
|
|
-0.9677 r o
|
||
|
|
-2.6092 r p
|
||
|
|
-1.4680 r r
|
||
|
|
-1.7388 r s
|
||
|
|
-1.4680 r t
|
||
|
|
-2.1122 r u
|
||
|
|
-2.6092 r w
|
||
|
|
-1.4680 r y
|
||
|
|
-1.7116 s a
|
||
|
|
-0.4367 s b
|
||
|
|
-2.4342 s c
|
||
|
|
-1.1761 s e
|
||
|
|
-1.4538 s h
|
||
|
|
-1.9373 s j
|
||
|
|
-1.4538 s k
|
||
|
|
-2.4342 s l
|
||
|
|
-1.4538 s m
|
||
|
|
-2.4342 s n
|
||
|
|
-1.0840 s o
|
||
|
|
-1.2306 s p
|
||
|
|
-1.5638 s s
|
||
|
|
-0.8872 s t
|
||
|
|
-1.9373 s u
|
||
|
|
-1.9373 s v
|
||
|
|
-1.5638 s w
|
||
|
|
-2.0665 t a
|
||
|
|
-2.5634 t b
|
||
|
|
-0.9666 t e
|
||
|
|
-2.5634 t g
|
||
|
|
-1.0726 t h
|
||
|
|
-1.5830 t i
|
||
|
|
-2.5634 t l
|
||
|
|
-2.0665 t m
|
||
|
|
-2.0665 t n
|
||
|
|
-1.4223 t o
|
||
|
|
-1.4952 t p
|
||
|
|
-1.5830 t r
|
||
|
|
-0.5753 t s
|
||
|
|
-0.4969 t t
|
||
|
|
-1.6930 t z
|
||
|
|
-2.2280 u b
|
||
|
|
-2.2280 u c
|
||
|
|
-2.2280 u d
|
||
|
|
-1.3576 u e
|
||
|
|
-1.7311 u g
|
||
|
|
-1.5054 u l
|
||
|
|
-1.7311 u m
|
||
|
|
-1.7311 u n
|
||
|
|
-2.2280 u p
|
||
|
|
-2.2280 u q
|
||
|
|
-0.1695 u r
|
||
|
|
-1.2476 u s
|
||
|
|
-1.7311 u t
|
||
|
|
-0.7807 v a
|
||
|
|
-0.2292 v e
|
||
|
|
-0.7807 v i
|
||
|
|
-1.8489 v k
|
||
|
|
-0.8327 w a
|
||
|
|
-1.9739 w b
|
||
|
|
-1.9739 w d
|
||
|
|
-1.2512 w e
|
||
|
|
-1.9739 w g
|
||
|
|
-1.4769 w h
|
||
|
|
-0.8327 w i
|
||
|
|
-1.9739 w l
|
||
|
|
-0.9934 w n
|
||
|
|
-0.7157 w o
|
||
|
|
-1.9739 w p
|
||
|
|
-1.4769 w r
|
||
|
|
-1.9739 w t
|
||
|
|
-1.9739 w w
|
||
|
|
-1.4769 w y
|
||
|
|
-0.3304 x v
|
||
|
|
-1.8745 y a
|
||
|
|
-1.3776 y b
|
||
|
|
-1.3776 y c
|
||
|
|
-1.1518 y d
|
||
|
|
-1.8745 y e
|
||
|
|
-1.8745 y g
|
||
|
|
-1.3776 y h
|
||
|
|
-1.3776 y j
|
||
|
|
-0.8940 y l
|
||
|
|
-1.3776 y m
|
||
|
|
-1.3776 y n
|
||
|
|
-1.3776 y o
|
||
|
|
-1.0041 y p
|
||
|
|
-1.3776 y s
|
||
|
|
-1.8745 y t
|
||
|
|
-1.3776 y v
|
||
|
|
-1.8745 y w
|
||
|
|
-1.2335 z b
|
||
|
|
-0.5109 z e
|
||
|
|
-1.2335 z i
|
||
|
|
-1.2335 z j
|
||
|
|
-1.2335 z l
|
||
|
|
-1.2335 z p
|
||
|
|
|
||
|
|
\end\
|