(original) (raw)
%!PS-Adobe-2.0 %%Creator: dvips 5.516 Copyright 1986, 1993 Radical Eye Software %%Title: nips94.dvi %%CreationDate: Wed Jan 18 12:12:33 1995 %%Pages: 8 %%PageOrder: Ascend %%BoundingBox: 0 0 612 792 %%EndComments %DVIPSCommandLine: dvips -o nips94.ps nips94.dvi %DVIPSSource: TeX output 1995.01.18:1212 %%BeginProcSet: tex.pro /TeXDict 250 dict def TeXDict begin /N{def}def /B{bind def}N /S{exch}N /X{S N}B /TR{translate}N /isls false N /vsize 11 72 mul N /hsize 8.5 72 mul N /landplus90{false}def /@rigin{isls{[0 landplus90{1 -1}{-1 1} ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR matrix currentmatrix dup dup 4 get round 4 exch put dup dup 5 get round 5 exch put setmatrix}N /@landscape{/isls true N}B /@manualfeed{ statusdict /manualfeed true put}B /@copies{/#copies X}B /FMat[1 0 0 -1 0 0]N /FBB[0 0 0 0]N /nn 0 N /IE 0 N /ctr 0 N /df-tail{/nn 8 dict N nn begin /FontType 3 N /FontMatrix fntrx N /FontBBox FBB N string /base X array /BitMaps X /BuildChar{CharBuilder}N /Encoding IE N end dup{/foo setfont}2 array copy cvx N load 0 nn put /ctr 0 N[}B /df{/sf 1 N /fntrx FMat N df-tail}B /dfs{div /sf X /fntrx[sf 0 0 sf neg 0 0]N df-tail}B /E{ pop nn dup definefont setfont}B /ch-width{ch-data dup length 5 sub get} B /ch-height{ch-data dup length 4 sub get}B /ch-xoff{128 ch-data dup length 3 sub get sub}B /ch-yoff{ch-data dup length 2 sub get 127 sub}B /ch-dx{ch-data dup length 1 sub get}B /ch-image{ch-data dup type /stringtype ne{ctr get /ctr ctr 1 add N}if}B /id 0 N /rw 0 N /rc 0 N /gp 0 N /cp 0 N /G 0 N /sf 0 N /CharBuilder{save 3 1 roll S dup /base get 2 index get S /BitMaps get S get /ch-data X pop /ctr 0 N ch-dx 0 ch-xoff ch-yoff ch-height sub ch-xoff ch-width add ch-yoff setcachedevice ch-width ch-height true[1 0 0 -1 -.1 ch-xoff sub ch-yoff .1 add]{ ch-image}imagemask restore}B /D{/cc X dup type /stringtype ne{]}if nn /base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{dup dup length 1 sub dup 2 index S get sf div put}if put /ctr ctr 1 add N}B /I{cc 1 add D }B /bop{userdict /bop-hook known{bop-hook}if /SI save N @rigin 0 0 moveto /V matrix currentmatrix dup 1 get dup mul exch 0 get dup mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N /eop{SI restore showpage userdict /eop-hook known{eop-hook}if}N /@start{userdict /start-hook known{start-hook}if pop /VResolution X /Resolution X 1000 div /DVImag X /IE 256 array N 0 1 255{IE S 1 string dup 0 3 index put cvn put}for 65781.76 div /vsize X 65781.76 div /hsize X}N /p{show}N /RMat[1 0 0 -1 0 0]N /BDot 260 string N /rulex 0 N /ruley 0 N /v{/ruley X /rulex X V}B /V {}B /RV statusdict begin /product where{pop product dup length 7 ge{0 7 getinterval dup(Display)eq exch 0 4 getinterval(NeXT)eq or}{pop false} ifelse}{false}ifelse end{{gsave TR -.1 -.1 TR 1 1 scale rulex ruley false RMat{BDot}imagemask grestore}}{{gsave TR -.1 -.1 TR rulex ruley scale 1 1 false RMat{BDot}imagemask grestore}}ifelse B /QV{gsave transform round exch round exch itransform moveto rulex 0 rlineto 0 ruley neg rlineto rulex neg 0 rlineto fill grestore}B /a{moveto}B /delta 0 N /tail{dup /delta X 0 rmoveto}B /M{S p delta add tail}B /b{S p tail} B /c{-4 M}B /d{-3 M}B /e{-2 M}B /f{-1 M}B /g{0 M}B /h{1 M}B /i{2 M}B /j{ 3 M}B /k{4 M}B /w{0 rmoveto}B /l{p -4 w}B /m{p -3 w}B /n{p -2 w}B /o{p -1 w}B /q{p 1 w}B /r{p 2 w}B /s{p 3 w}B /t{p 4 w}B /x{0 S rmoveto}B /y{ 3 2 roll p a}B /bos{/SS save N}B /eos{SS restore}B end %%EndProcSet TeXDict begin 40258431 52099146 1000 300 300 (/tmp_mnt/home/u1/tommi/papers/nips94/nips94.dvi) @start /Fa 1 51 df<7FFFFCFFFFFEC00006C00006C00006C00006C00006C00006C00006C00006 C00006C00006C00006C00006C00006C00006C00006C00006C00006C00006C00006FFFFFE FFFFFE17177C991F>50 D E /Fb 2 108 df<0F3060FEC0C040611E08097E880D>15 D<7800180018001800300030C03360344078007E0063006320C340C1800B0E7E8D10> 107 D E /Fc 1 50 df<18F818181818181818181818FF080D7D8C0E>49 D E /Fd 2 89 df80 D88 D E /Fe 44 122 df<0001FC000703000C03001C07001C03001800003800003800003800 00380000700007FFFC00701C00701C00701C00E03800E03800E03800E03800E07001C070 01C07001C07001C0E201C0E201C0E20380E4038064038038038000030000070000060000 C60000E40000CC00007000001825819C17>12 D<183878380808101020404080050C7D83 0D>44 DI<3078F06005047C830D>I<00020006000C001C00 7C039C0038003800380038007000700070007000E000E000E000E001C001C001C001C003 800380038003800780FFF00F1C7C9B15>49 D<003C0000C3000101800201800201C00441 C00441C00841C00841C00841C01083801083801107000E0600000C0000180000300000C0 000100000600000800001001001002002002004006007E0C00C7F80083F80080E000121D 7C9B15>I<003E0000C1800101800200C00400C00440C00841C00841C00841C008838007 0380000700000E0001F800003800000C00000C00000E00000E00000E00000E00700E00E0 1C00801C0080380080300040600021C0001F0000121D7C9B15>I<000F0000308000C080 0183800383800300000600000E00000C00001C00001CF0003B18003C0C00380C00780C00 700E00700E00700E00601C00E01C00E01C00E01C00E03800E03800E0700060600060C000 2180001E0000111D7B9B15>54 D<001E000061000081800180800300C00300C006018006 018006018007030007860003CC0003F00001F000037800063C00081E00180E00300E0060 0600600600600600C00C00C00C00C0180060100060200030C0000F0000121D7C9B15>56 D<060F0F06000000000000000000003078F06008127C910D>58 D<000018000000180000 0038000000380000007800000078000000B8000001B800000138000002380000023C0000 041C0000041C0000081C0000181C0000101C0000201C0000201C00007FFC0000401C0000 801C0001801C0001001C0002001C0002001C0004000E000C000E001C001E00FF00FFC01A 1D7E9C1F>65 D<0003F020001E0C60003002E000E003C001C001C0038001C0070000C00E 0000801E0000801C0000803C0000803C000000780000007800000078000000F0000000F0 000000F0000000F0000000F0000400F0000400F0000400F0000800700008007000100038 002000180040000C0180000706000001F800001B1E7A9C1E>67 D<01FFFE00003C078000 3801C0003801C0003800E0003800E0007000F00070007000700070007000F000E000F000 E000F000E000F000E000F001C001E001C001E001C001E001C001C0038003C00380038003 8007800380070007000E0007001C0007003800070070000E01C000FFFF00001C1C7D9B1F >I<01FFFFE0003C00E00038006000380040003800400038004000700040007000400070 20400070200000E0400000E0400000E0C00000FFC00001C0800001C0800001C0800001C0 800003810100038001000380020003800200070004000700040007000C00070018000E00 7800FFFFF0001B1C7D9B1C>I<01FFFFC0003C01C0003800C00038008000380080003800 800070008000700080007020800070200000E0400000E0400000E0C00000FFC00001C080 0001C0800001C0800001C080000381000003800000038000000380000007000000070000 0007000000070000000F000000FFF000001A1C7D9B1B>I<01FFC0003C00003800003800 00380000380000700000700000700000700000E00000E00000E00000E00001C00001C000 01C00001C0000380000380000380000380000700000700000700000700000F0000FFE000 121C7E9B10>73 D<01FFE0003C0000380000380000380000380000700000700000700000 700000E00000E00000E00000E00001C00001C00001C00001C00003800803800803800803 80100700100700300700600700E00E03C0FFFFC0151C7D9B1A>76 D<01FE0007F8003E000780002E000F00002E001700002E001700002E002700004E002E00 004E004E00004E004E00004E008E00008E011C00008E011C00008E021C00008E021C0001 070438000107043800010708380001071038000207107000020720700002072070000207 407000040740E000040780E000040700E0000C0700E0001C0601E000FF861FFC00251C7D 9B25>I<01FC03FE001C0070003C0060002E0040002E0040002E00400047008000470080 00470080004380800083810000838100008181000081C1000101C2000101C2000100E200 0100E2000200E4000200740002007400020074000400380004003800040038000C001800 1C001000FF8010001F1C7D9B1F>I<01FFFC00003C070000380380003801C0003801C000 3801C0007003C0007003C0007003C00070038000E0078000E0070000E00E0000E0380001 FFE00001C0000001C0000001C00000038000000380000003800000038000000700000007 00000007000000070000000F000000FFE000001A1C7D9B1C>80 D<0007F000001C1C0000 700E0000E0070001C0078003800380070003800E0003C01E0003C01C0003C03C0003C03C 0003C0780003C0780003C0780003C0F0000780F0000780F0000780F0000F00F0000F00F0 000E00F0001E00F0001C0070783800708070007104E0003905C0001D0780000F0E040003 FC040000040C0000060800000E1800000FF0000007F0000007E000000380001A257A9C20 >I<000F8400304C00403C00801801001803001803001806001006001006000007000007 000003E00003FC0001FF00007F800007C00001C00001C00000C00000C02000C02000C060 0180600180600300600200F00400CC180083E000161E7D9C17>83 D<1FFFFFC01C0701C0300E00C0200E0080600E0080400E0080401C0080801C0080801C00 80001C000000380000003800000038000000380000007000000070000000700000007000 0000E0000000E0000000E0000000E0000001C0000001C0000001C0000001C0000003C000 007FFE00001A1C799B1E>I<03CC063C0C3C181C3838303870387038E070E070E070E070 E0E2C0E2C0E261E462643C380F127B9115>97 D<3F00070007000E000E000E000E001C00 1C001C001C0039C03E60383038307038703870387038E070E070E070E060E0E0C0C0C1C0 618063003C000D1D7B9C13>I<01F007080C08181C3838300070007000E000E000E000E0 00E000E008E010602030C01F000E127B9113>I<001F8000038000038000070000070000 0700000700000E00000E00000E00000E0003DC00063C000C3C00181C0038380030380070 3800703800E07000E07000E07000E07000E0E200C0E200C0E20061E4006264003C380011 1D7B9C15>I<01E007100C1018083810701070607F80E000E000E000E000E000E0086010 602030C01F000D127B9113>I<0003C0000670000C70001C60001C00001C000038000038 0000380000380000380003FF8000700000700000700000700000700000E00000E00000E0 0000E00000E00001C00001C00001C00001C00001C0000380000380000380000300000300 00070000C60000E60000CC00007800001425819C0D>I<00F3018F030F06070E0E0C0E1C 0E1C0E381C381C381C381C383830383038187818F00F700070007000E000E0C0C0E1C0C3 007E00101A7D9113>I<0FC00001C00001C0000380000380000380000380000700000700 000700000700000E78000E8C000F0E000E0E001C0E001C0E001C0E001C0E00381C00381C 00381C00383800703880703880707080707100E03200601C00111D7D9C15>I<01800380 010000000000000000000000000000001C002600470047008E008E000E001C001C001C00 38003800710071007100720072003C00091C7C9B0D>I<1F800380038007000700070007 000E000E000E000E001C001C001C001C0038003800380038007000700070007000E400E4 00E400E40068003800091D7C9C0B>108 D<3C1E0780266318C04683A0E04703C0E08E03 80E08E0380E00E0380E00E0380E01C0701C01C0701C01C0701C01C070380380E0388380E 0388380E0708380E0710701C0320300C01C01D127C9122>I<3C3C002646004687004707 008E07008E07000E07000E07001C0E001C0E001C0E001C1C00381C40381C403838403838 80701900300E0012127C9117>I<01E007180C0C180C380C300E700E700EE01CE01CE01C E018E038E030E06060C031801E000F127B9115>I<07870004D98008E0C008E0C011C0E0 11C0E001C0E001C0E00381C00381C00381C00381800703800703000707000706000E8C00 0E70000E00000E00001C00001C00001C00001C00003C0000FF8000131A7F9115>I<3C3C 26C2468747078E068E000E000E001C001C001C001C003800380038003800700030001012 7C9112>114 D<01F006080C080C1C18181C001F001FC00FF007F0007800386030E030C0 30806060C01F000E127D9111>I<00C001C001C001C00380038003800380FFE007000700 07000E000E000E000E001C001C001C001C00384038403840388019000E000B1A7D990E> I<1E0300270700470700470700870E00870E000E0E000E0E001C1C001C1C001C1C001C1C 003838803838801838801839001C5900078E0011127C9116>I<1E06270E470E47068702 87020E020E021C041C041C041C0818083808181018200C4007800F127C9113>I<1E0183 2703874703874703838707018707010E07010E07011C0E021C0E021C0E021C0E04180C04 181C04181C081C1C100C263007C3C018127C911C>I<1E03270747074707870E870E0E0E 0E0E1C1C1C1C1C1C1C1C38383838183818381C7007F00070007000E0E0C0E1C081804700 3C00101A7C9114>121 D E /Ff 22 122 df<60F0F06004047D830B>46 D70 D<7FFFFF00701C0700401C0100401C0100C01C018080 1C0080801C0080801C0080001C0000001C0000001C0000001C0000001C0000001C000000 1C0000001C0000001C0000001C0000001C0000001C0000001C0000001C0000001C000000 1C0000001C000003FFE000191A7F991C>84 D<3F8070C070E020700070007007F01C7030 707070E070E071E071E0F171FB1E3C10107E8F13>97 DI< 07F80C1C381C30087000E000E000E000E000E000E0007000300438080C1807E00E107F8F 11>I<007E00000E00000E00000E00000E00000E00000E00000E00000E00000E0003CE00 0C3E00380E00300E00700E00E00E00E00E00E00E00E00E00E00E00E00E00600E00700E00 381E001C2E0007CFC0121A7F9915>I<07C01C3030187018600CE00CFFFCE000E000E000 E0006000300438080C1807E00E107F8F11>I<0FCE187330307038703870387038303018 602FC02000600070003FF03FFC1FFE600FC003C003C003C0036006381C07E010187F8F13 >103 DI<18003C003C0018000000000000000000000000 00FC001C001C001C001C001C001C001C001C001C001C001C001C001C001C00FF80091A80 990A>I108 DI< FCF8001D0C001E0E001E0E001C0E001C0E001C0E001C0E001C0E001C0E001C0E001C0E00 1C0E001C0E001C0E00FF9FC012107F8F15>I<07E01C38300C700E6006E007E007E007E0 07E007E0076006700E381C1C3807E010107F8F13>II114 D<1F2060E04020C020C020F0007F003FC01FE000F080708030C030C020F0408F800C107F 8F0F>I<0400040004000C000C001C003C00FFC01C001C001C001C001C001C001C001C00 1C201C201C201C201C200E4003800B177F960F>II119 D121 D E /Fg 1 50 df<0C003C00CC000C000C000C000C000C000C00 0C000C000C000C000C000C00FF8009107E8F0F>49 D E /Fh 5 51 df0 D<00000400000004000000020000000100FFFFFFE0FF FFFFE0000001000000020000000400000004001B0A7E8B21>33 D<040E0E1C1C1C383830 70706060C0C0070F7F8F0A>48 D<0F001E003BC061806060804040310040801A0020800E 0020800E0020800E0020800B0020401180404020C0C030C07B800F001E001B0D7E8C21> I<03FC0FFC1C003000600060006000C000C000FFFCFFFCC000C00060006000600030001C 000FFC03FC0E147D9016>I E /Fi 2 49 df0 D<181818303030606060C0C0050B7E8B09>48 D E /Fj 13 117 df<0F80180020006000C000FE00C00080008000C000C00061003E00090D7D8C0E>15 D<3FFE7FFEC440844004400CC008C008C018C018C030C030E020400F0D7E8C13>25 D<40E06020202040408003097D820A>59 D<00200060006000C000C000C0018001800180 030003000300060006000C000C000C00180018001800300030003000600060006000C000 C000C0000B1D7E9511>61 D<07E01FC000E0060001700400017004000138040001380400 021C0800021C0800020E0800020E0800040710000407100004039000040390000801E000 0801E0000800E0000800E00018004000FE0040001A147F931A>78 D<1FFFF8381C1820381820380840380840381080701000700000700000700000E00000E0 0000E00000E00001C00001C00001C00001C0000380003FF8001514809314>84 D<07B00C7010703060606060606060C0C0C0C8C0C841C862D03C700D0D7E8C12>97 D<3E0006000C000C000C000C00180019E01E30183038303030303030306060606460C460 C4C0C8C0700E147E9313>104 D<3E0006000C000C000C000C001800187018B819383230 340038003E006300631063106310C320C1C00D147E9312>107 D<30F87C00590C86004E 0D06009C0E0600980C0600180C0600180C060030180C0030180C80301818803018188060 30190060300E00190D7F8C1D>109 D<30F8590C4E0C9C0C980C180C180C301830193031 30316032601C100D7F8C15>I<0700188019C0318038001E000F0003804180E180C10082 007C000A0D7E8C10>115 D<02000600060006000C00FF800C000C001800180018001800 300031003100320032001C0009127F910D>I E /Fk 7 62 df<01020408103020606040 C0C0C0C0C0C0C0C0C0C040606020301008040201081E7E950D>40 D<80402010080C0406060203030303030303030303020606040C0810204080081E7E950D >I<006000006000006000006000006000006000006000006000006000006000FFFFF0FF FFF000600000600000600000600000600000600000600000600000600000600014167E91 19>43 D<0C001C00EC000C000C000C000C000C000C000C000C000C000C000C000C000C00 0C000C00FFC00A137D9211>49 D<1F0060C06060F070F030603000700070006000C001C0 0180020004000810101020207FE0FFE00C137E9211>I<006000E000E00160026006600C 600860106020606060C060FFFC0060006000600060006003FC0E137F9211>52 D<7FFFE0FFFFF0000000000000000000000000000000000000FFFFF07FFFE0140A7E8B19 >61 D E /Fl 9 107 df0 D<60F0F06004047C8B0C> I<000001800000078000001E00000078000001E00000078000001E00000078000001E000 00078000001E00000078000000E0000000780000001E0000000780000001E00000007800 00001E0000000780000001E0000000780000001E00000007800000018000000000000000 0000000000000000000000000000000000000000007FFFFF00FFFFFF8019227D9920>20 D<000000040000000002000000000200000000010000000000800000000040FFFFFFFFF8 FFFFFFFFF800000000400000000080000000010000000002000000000200000000040025 0E7E902A>33 D<07C001E01FF0078030780C00201C1800400E3000400760008007E00080 03C0008003C0008001E0008001E0008003F0004003700040063800600C1C0020180F0018 F007E007C001E01B127E9120>47 D<0001FC000007FF0000083F8000300F800070078000 E0078000E0060001E0040001E0000001F0000001F8000000FC0000007E0000003F800000 1FE0000007F0000001F8000000FC0008007C0038003C0070003C0070003C00F0003800F0 003800F8007000F80060007C0080007F0300003FFC00000FF00000191E7F9C19>83 D<003C00E001C00180038003800380038003800380038003800380038003800380038003 0007001C00F0001C00070003000380038003800380038003800380038003800380038003 800380018001C000E0003C0E297D9E15>102 DI106 D E /Fm 30 118 df<0001F000061800080C00100C00 200E00400E00800E00801C01001C010018010038020FF00210C0020FE002003004003004 00300400380400380800700800700800700800E01800E01800C0140180140300230E0020 F80020000020000040000040000040000040000080000080000017257F9C17>12 D<0780101FC0103FE0207FF020603040C01080801080000900000900000A00000A00000A 00000C00000C000008000008000008000018000018000018000030000030000030000030 00006000006000004000141B7F9115>I<01F807000C0018003800300070007FC0E000E0 00E000E000E00060006000300018600F800D127E9111>15 D<07800001C00000E00000E0 0000F000007000007000007000003800003800003800003C00001C00001C00001E00000E 00001E00003F0000670000C7000187800303800703800E03801C03C03801C07001C0E001 E06000E0131D7E9C18>21 D<0FFFF81FFFF83FFFF0608400408400808C00010C00010C00 030C00030C00020C00061C00061C000E1C000C1C001C0E001C0E00180C0015127E9118> 25 D<1E000423800823C00801C01001C02001E04000E08000E10000F100007200007400 007800003800003800007800007C00009C00011C00021C00041E00080E00080E00100F00 2007084003908001E0161A7F911A>31 D<60F0F06004047C830C>58 D<60F0F0701010101020204080040C7C830C>I<0000038000000F0000003C000000F000 0003C000000F0000003C000000F0000003C000000F0000003C000000F0000000F0000000 3C0000000F00000003C0000000F00000003C0000000F00000003C0000000F00000003C00 00000F000000038019187D9520>I<00010003000600060006000C000C000C0018001800 180030003000300060006000C000C000C0018001800180030003000300060006000C000C 000C00180018001800300030003000600060006000C000C00010297E9E15>II<01FFFF00003C01C000 3800E0003800F0003800700038007000700070007000F0007000F0007001E000E003C000 E0078000E01F0000FFFC0001C00F0001C0078001C003C001C003C0038003C0038003C003 8003C0038003C0070007800700070007000E0007001C000E007800FFFFC0001C1C7E9B1F >66 D<0001F808000E061800380138006000F001C0007003800070070000300F0000200E 0000201C0000203C0000203C000000780000007800000078000000F0000000F0000000F0 000000F0000000F0000100F0000100F0000100700002007000020030000400380008001C 0010000E0060000701800000FE00001D1E7E9C1E>I<01FFFFF8003C0078003800180038 001000380010003800100070001000700010007010100070100000E0200000E0200000E0 600000FFE00001C0400001C0400001C0400001C040000380804003800040038000800380 0080070001000700010007000300070006000E003E00FFFFFC001D1C7E9B1F>69 D<007FF80003C0000380000380000380000380000700000700000700000700000E00000E 00000E00000E00001C00001C00001C00001C00003800003800003800203800707000F070 00E0600080E00081C0004380003E0000151D7D9B17>74 D<01FFC07F80003C001E000038 001800003800200000380040000038008000007002000000700400000070080000007010 000000E040000000E0C0000000E1E0000000E2E0000001C470000001D070000001E03800 0001C0380000038038000003801C000003801C000003800E000007000E000007000E0000 070007000007000700000F00078000FFE03FF000211C7E9B23>I<0003F800000E0E0000 38038000E001C001C001C0038000E0070000E00F0000F01E0000F01C0000F03C0000F03C 0000F0780000F0780000F0780000F0F00001E0F00001E0F00001E0F00003C0F00003C0F0 000780F0000780F0000F0070000E0070001C00380038003C0070001C01C0000707800001 FC00001C1E7E9C20>79 D<01FFFF00003C03C0003800E0003800F0003800700038007000 7000F0007000F0007000F0007000E000E001E000E003C000E0078000E01E0001FFF00001 C0000001C0000001C0000003800000038000000380000003800000070000000700000007 000000070000000F000000FFE000001C1C7E9B1B>I<0003F800000E0E000038038000E0 01C001C001C0038000E0070000E00F0000F01E0000F01C0000F03C0000F03C0000F07800 00F0780000F0780000F0F00001E0F00001E0F00001E0F00003C0F00003C0F0000380F000 0780F0000F00703C0E0070421C0038823800388270001C83C0000787810001FF01000003 03000003020000038E000003FC000003F8000001F8000001E0001C257E9C21>I<01FFFE 00003C03C0003800E0003800F00038007000380070007000F0007000F0007000F0007001 E000E001C000E0078000E01E0000FFF00001C0300001C0180001C01C0001C01C0003801C 0003801C0003801C0003801C0007003C0007003C0807003C0807003C100F001E10FFE00E 20000007C01D1D7E9B20>I86 D<01E3000717000C0F00180F00380E00300E00700E00700E00E01C00E01C00E01C00E01C 00E03880E03880E038806078803199001E0E0011127E9116>97 D<01C003C003C0018000 00000000000000000000001C00270047004700870087000E000E001C001C001C00380038 8038807080710032001C000A1C7E9B0E>105 D<0FC00001C00001C00003800003800003 80000380000700000700000700000700000E07000E18800E21C00E23C01C47801C83001D 00001E00003F800039C00038E00038E00070E10070E10070E10070E200E06200603C0012 1D7E9C16>107 D<381F81F04E20C6184640E81C4680F01C8F00F01C8E00E01C0E00E01C 0E00E01C1C01C0381C01C0381C01C0381C01C0703803807138038071380380E1380380E2 700700643003003820127E9124>109 D<07078009C86008D03008E03011C03011C03801 C03801C0380380700380700380700380600700E00700C00701800783000E86000E78000E 00000E00001C00001C00001C00001C00003C0000FF8000151A819115>112 D<383C4E424687470F8E1E8E0C0E000E001C001C001C001C003800380038003800700030 0010127E9113>114 D<01F0060C04040C0E180C1C001F000FE00FF003F80038201C7018 F018F010803060601F800F127E9113>I<00C001C001C001C00380038003800380FFF007 00070007000E000E000E000E001C001C001C001C00382038203840384018800F000C1A80 990F>I<1C00C02701C04701C04701C08703808703800E03800E03801C07001C07001C07 001C0700180E20180E20180E201C1E200C264007C38013127E9118>I E /Fn 32 117 df45 D<00180000780001F800FFF800 FFF80001F80001F80001F80001F80001F80001F80001F80001F80001F80001F80001F800 01F80001F80001F80001F80001F80001F80001F80001F80001F80001F80001F80001F800 01F80001F8007FFFE07FFFE013207C9F1C>49 D<03FC000FFF003C1FC07007E07C07F0FE 03F0FE03F8FE03F8FE01F87C01F83803F80003F80003F00003F00007E00007C0000F8000 1F00003E0000380000700000E01801C0180380180700180E00380FFFF01FFFF03FFFF07F FFF0FFFFF0FFFFF015207D9F1C>I<00FE0007FFC00F07E01E03F03F03F03F81F83F81F8 3F81F81F03F81F03F00003F00003E00007C0001F8001FE0001FF000007C00001F00001F8 0000FC0000FC3C00FE7E00FEFF00FEFF00FEFF00FEFF00FC7E01FC7801F81E07F00FFFC0 01FE0017207E9F1C>I<0000E00001E00003E00003E00007E0000FE0001FE0001FE00037 E00077E000E7E001C7E00187E00307E00707E00E07E00C07E01807E03807E07007E0E007 E0FFFFFEFFFFFE0007E00007E00007E00007E00007E00007E00007E000FFFE00FFFE1720 7E9F1C>I<1000201E01E01FFFC01FFF801FFF001FFE001FF8001BC00018000018000018 000018000019FC001FFF001E0FC01807E01803E00003F00003F00003F80003F83803F87C 03F8FE03F8FE03F8FC03F0FC03F07007E03007C01C1F800FFF0003F80015207D9F1C>I< 001F8000FFE003F07007C0F00F01F81F01F83E01F83E01F87E00F07C00007C0000FC0800 FC7FC0FCFFE0FD80F0FF00F8FE007CFE007CFC007EFC007EFC007EFC007E7C007E7C007E 7C007E3C007C3E007C1E00F80F00F00783E003FFC000FF0017207E9F1C>I<0000700000 00007000000000F800000000F800000000F800000001FC00000001FC00000003FE000000 03FE00000003FE00000006FF000000067F0000000E7F8000000C3F8000000C3F80000018 3FC00000181FC00000381FE00000300FE00000300FE00000600FF000006007F00000E007 F80000FFFFF80000FFFFF800018001FC00018001FC00038001FE00030000FE00030000FE 000600007F000600007F00FFE00FFFF8FFE00FFFF825227EA12A>65 DI<0003 FE0080001FFF818000FF01E38001F8003F8003E0001F8007C0000F800F800007801F8000 07803F000003803F000003807F000001807E000001807E00000180FE00000000FE000000 00FE00000000FE00000000FE00000000FE00000000FE00000000FE000000007E00000000 7E000001807F000001803F000001803F000003801F800003000F8000030007C000060003 F0000C0001F800380000FF00F000001FFFC0000003FE000021227DA128>III<0003FE0040001FFFC0 C0007F00F1C001F8003FC003F0000FC007C00007C00FC00003C01F800003C03F000001C0 3F000001C07F000000C07E000000C07E000000C0FE00000000FE00000000FE00000000FE 00000000FE00000000FE00000000FE00000000FE000FFFFC7E000FFFFC7F00001FC07F00 001FC03F00001FC03F00001FC01F80001FC00FC0001FC007E0001FC003F0001FC001FC00 3FC0007F80E7C0001FFFC3C00003FF00C026227DA12C>71 DII76 DII<0007FC0000003FFF800000FC07E00003F001F80007 E000FC000FC0007E001F80003F001F80003F003F00001F803F00001F807F00001FC07E00 000FC07E00000FC0FE00000FE0FE00000FE0FE00000FE0FE00000FE0FE00000FE0FE0000 0FE0FE00000FE0FE00000FE0FE00000FE07E00000FC07F00001FC07F00001FC03F00001F 803F80003F801F80003F000FC0007E0007E000FC0003F001F80000FC07E000003FFF8000 0007FC000023227DA12A>II82 D<01FC0407FF8C1F03FC3C007C7C003C78001C78001CF8000CF8000C FC000CFC0000FF0000FFE0007FFF007FFFC03FFFF01FFFF80FFFFC03FFFE003FFE0003FF 00007F00003F00003FC0001FC0001FC0001FE0001EE0001EF0003CFC003CFF00F8C7FFE0 80FF8018227DA11F>I<7FFFFFFF807FFFFFFF807E03F80F807803F807807003F8038060 03F80180E003F801C0E003F801C0C003F800C0C003F800C0C003F800C0C003F800C00003 F800000003F800000003F800000003F800000003F800000003F800000003F800000003F8 00000003F800000003F800000003F800000003F800000003F800000003F800000003F800 000003F800000003F800000003F800000003F800000003F8000003FFFFF80003FFFFF800 22227EA127>III89 D<07FC001FFF803F07C03F03E03F01E03F01F01E01F00001F00001F0003FF003FDF01FC1 F03F01F07E01F0FC01F0FC01F0FC01F0FC01F07E02F07E0CF81FF87F07E03F18167E951B >97 DI<00FF8007FFE00F83F01F03F03E03F07E03F07C01E07C0000FC0000FC0000 FC0000FC0000FC0000FC00007C00007E00007E00003E00301F00600FC0E007FF8000FE00 14167E9519>I114 D<0FF3003FFF00781F00600700E00300E00300F00300FC00007FE0 007FF8003FFE000FFF0001FF00000F80C00780C00380E00380E00380F00700FC0E00EFFC 00C7F00011167E9516>I<0180000180000180000180000380000380000780000780000F 80003F8000FFFF00FFFF000F80000F80000F80000F80000F80000F80000F80000F80000F 80000F80000F80000F81800F81800F81800F81800F81800F830007C30003FE0000F80011 207F9F16>I E /Fo 83 125 df0 D<00030000000300000007800000078000000FC000000BC0000013E0000011E0000021F0 000020F0000040F8000040780000807C0000803C0001003E0001001E0002001F0002000F 0004000F8004000780080007C0080003C0100003E0100001E0200000F0200000F07FFFFF F8FFFFFFFCFFFFFFFC1E1D7E9C23>I<007E1F0001C1B1800303E3C00703C3C00E03C180 0E01C0000E01C0000E01C0000E01C0000E01C0000E01C000FFFFFC000E01C0000E01C000 0E01C0000E01C0000E01C0000E01C0000E01C0000E01C0000E01C0000E01C0000E01C000 0E01C0000E01C0000E01C0000E01C0000E01C0007F87FC001A1D809C18>11 D<007E0001C1800301800703C00E03C00E01800E00000E00000E00000E00000E0000FFFF C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01 C00E01C00E01C00E01C00E01C07F87F8151D809C17>I<007FC001C1C00303C00703C00E 01C00E01C00E01C00E01C00E01C00E01C00E01C0FFFFC00E01C00E01C00E01C00E01C00E 01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C00E01C07F CFF8151D809C17>I<003F07E00001C09C18000380F018000701F03C000E01E03C000E00 E018000E00E000000E00E000000E00E000000E00E000000E00E00000FFFFFFFC000E00E0 1C000E00E01C000E00E01C000E00E01C000E00E01C000E00E01C000E00E01C000E00E01C 000E00E01C000E00E01C000E00E01C000E00E01C000E00E01C000E00E01C000E00E01C00 0E00E01C007FC7FCFF80211D809C23>I22 D<6060F0F0F8F8686808 0808080808101010102020404080800D0C7F9C15>34 D<00E00000019000000308000003 080000070800000708000007080000070800000710000007100000072000000740000003 C03FE003800F00038006000380040005C0040009C0080010E0100030E010006070200060 702000E0384000E03C4000E01C8000E00F0020E0070020700780403009C0401830E18007 C03E001B1F7E9D20>38 D<60F0F8680808081010204080050C7C9C0C>I<004000800100 020006000C000C0018001800300030007000600060006000E000E000E000E000E000E000 E000E000E000E000E000E000600060006000700030003000180018000C000C0006000200 0100008000400A2A7D9E10>I<800040002000100018000C000C00060006000300030003 8001800180018001C001C001C001C001C001C001C001C001C001C001C001C00180018001 80038003000300060006000C000C00180010002000400080000A2A7E9E10>I<00060000 000600000006000000060000000600000006000000060000000600000006000000060000 000600000006000000060000FFFFFFE0FFFFFFE000060000000600000006000000060000 000600000006000000060000000600000006000000060000000600000006000000060000 1B1C7E9720>43 D<60F0F0701010101020204080040C7C830C>II<60F0F06004047C830C>I<03C00C301818300C300C700E60066006E007E007E007E007 E007E007E007E007E007E007E007E007E00760066006700E300C300C18180C3007E0101D 7E9B15>48 D<030007003F00C70007000700070007000700070007000700070007000700 070007000700070007000700070007000700070007000F80FFF80D1C7C9B15>I<07C018 30201C400C400EF00FF80FF807F8077007000F000E000E001C001C00380070006000C001 80030006010C01180110023FFE7FFEFFFE101C7E9B15>I<07E01830201C201C781E780E 781E381E001C001C00180030006007E00030001C001C000E000F000F700FF80FF80FF80F F00E401C201C183007E0101D7E9B15>I<000C00000C00001C00003C00003C00005C0000 DC00009C00011C00031C00021C00041C000C1C00081C00101C00301C00201C00401C00C0 1C00FFFFC0001C00001C00001C00001C00001C00001C00001C0001FFC0121C7F9B15>I< 300C3FF83FF03FC020002000200020002000200023E024302818301C200E000E000F000F 000F600FF00FF00FF00F800E401E401C2038187007C0101D7E9B15>I<00F0030C06040C 0E181E301E300C700070006000E3E0E430E818F00CF00EE006E007E007E007E007E00760 0760077006300E300C18180C3003E0101D7E9B15>I<4000007FFF807FFF007FFF004002 0080040080040080080000100000100000200000600000400000C00000C00001C0000180 000180000380000380000380000380000780000780000780000780000780000780000300 00111D7E9B15>I<03E00C301008200C20066006600660067006780C3E083FB01FE007F0 07F818FC307E601E600FC007C003C003C003C00360026004300C1C1007E0101D7E9B15> I<03C00C301818300C700C600EE006E006E007E007E007E007E0076007700F300F18170C 2707C700060006000E300C780C78187010203030C00F80101D7E9B15>I<60F0F0600000 000000000000000060F0F06004127C910C>I<60F0F0600000000000000000000060F0F0 701010101020204080041A7C910C>I<7FFFFFC0FFFFFFE0000000000000000000000000 0000000000000000000000000000000000000000FFFFFFE07FFFFFC01B0C7E8F20>61 D<003F800000C0600003001800040004000800020010000100201F00802070808040E040 4040C0384041C03840818038208380382083803820838038208380382083803820818038 2041C0382040C0384040E0784020709880201F0F00100000000800000004000000030001 E000C01F80003FF0001B1D7E9C20>64 D<000600000006000000060000000F0000000F00 00000F00000017800000178000001780000023C0000023C0000023C0000041E0000041E0 000041E0000080F0000080F0000180F8000100780001FFF80003007C0002003C0002003C 0006003E0004001E0004001E000C001F001E001F00FF80FFF01C1D7F9C1F>II<001F808000E0618001801980070007800E0003801C 0003801C00018038000180780000807800008070000080F0000000F0000000F0000000F0 000000F0000000F0000000F0000000F0000000700000807800008078000080380000801C 0001001C0001000E000200070004000180080000E03000001FC000191E7E9C1E>IIII<001F808000E0618001801980070007800E0003801C0003801C 00018038000180780000807800008070000080F0000000F0000000F0000000F0000000F0 000000F0000000F000FFF0F0000F80700007807800078078000780380007801C0007801C 0007800E00078007000B800180118000E06080001F80001C1E7E9C21>III<1FFF00F8 007800780078007800780078007800780078007800780078007800780078007800780078 00787078F878F878F878F0F040E021C01F00101D7F9B15>II< FFF8000F80000F00000F00000F00000F00000F00000F00000F00000F00000F00000F0000 0F00000F00000F00000F00000F00000F00000F00080F00080F00080F00180F00180F0010 0F00300F00700F01F0FFFFF0151C7E9B1A>III<003F80 0000E0E0000380380007001C000E000E001C0007003C00078038000380780003C0780003 C0700001C0F00001E0F00001E0F00001E0F00001E0F00001E0F00001E0F00001E0F00001 E0700001C0780003C0780003C0380003803C0007801C0007000E000E0007001C00038038 0000E0E000003F80001B1E7E9C20>II<00 3F800000E0E0000380380007001C000E000E001C0007003C00078038000380780003C078 0003C0700001C0F00001E0F00001E0F00001E0F00001E0F00001E0F00001E0F00001E0F0 0001E0700001C0780003C0780003C0380003803C0E07801C1107000E208E0007205C0003 A0780000F0E020003FE0200000602000003060000038E000003FC000003FC000001F8000 000F001B257E9C20>II<07E0801C19803005807003 80600180E00180E00080E00080E00080F00000F800007C00007FC0003FF8001FFE0007FF 0000FF80000F800007C00003C00001C08001C08001C08001C0C00180C00180E00300D002 00CC0C0083F800121E7E9C17>I<7FFFFFC0700F01C0600F00C0400F0040400F0040C00F 0020800F0020800F0020800F0020000F0000000F0000000F0000000F0000000F0000000F 0000000F0000000F0000000F0000000F0000000F0000000F0000000F0000000F0000000F 0000000F0000000F0000001F800003FFFC001B1C7F9B1E>II87 D89 D91 D<08081010202040404040808080808080B0B0F8F8787830300D0C7A9C15>II<1FC000307000783800781C00301C00001C00001C0001FC000F1C00 381C00701C00601C00E01C40E01C40E01C40603C40304E801F870012127E9115>97 DI<07E00C301878307870306000E0 00E000E000E000E000E00060007004300418080C3007C00E127E9112>I<003F00000700 00070000070000070000070000070000070000070000070000070003E7000C1700180F00 300700700700600700E00700E00700E00700E00700E00700E00700600700700700300700 180F000C370007C7E0131D7E9C17>I<03E00C301818300C700E6006E006FFFEE000E000 E000E00060007002300218040C1803E00F127F9112>I<00F8018C071E061E0E0C0E000E 000E000E000E000E00FFE00E000E000E000E000E000E000E000E000E000E000E000E000E 000E000E000E007FE00F1D809C0D>I<00038003C4C00C38C01C3880181800381C00381C 00381C00381C001818001C38000C300013C0001000003000001800001FF8001FFF001FFF 803003806001C0C000C0C000C0C000C06001803003001C0E0007F800121C7F9215>II<18003C003C00180000000000000000 00000000000000FC001C001C001C001C001C001C001C001C001C001C001C001C001C001C 001C001C00FF80091D7F9C0C>I<00C001E001E000C00000000000000000000000000000 0FE000E000E000E000E000E000E000E000E000E000E000E000E000E000E000E000E000E0 00E000E000E060E0F0C0F1C061803E000B25839C0D>IIIII<03F0000E1C00180600300300700380600180E001 C0E001C0E001C0E001C0E001C0E001C06001807003803003001806000E1C0003F0001212 7F9115>II<03C1000C3300180B00300F00700700700700 E00700E00700E00700E00700E00700E00700600700700700300F00180F000C370007C700 000700000700000700000700000700000700000700003FE0131A7E9116>II<1F9030704030C010C010E010F8007F803FE00FF000F880388018C018C018E010D060 8FC00D127F9110>I<04000400040004000C000C001C003C00FFE01C001C001C001C001C 001C001C001C001C001C101C101C101C101C100C100E2003C00C1A7F9910>IIII<7F8FF00F03800F030007020003840001C80001 D80000F00000700000780000F800009C00010E00020E000607000403801E07C0FF0FF815 12809116>II<7FFC70386038407040F040E041C003C003 8007000F040E041C043C0C380870087038FFF80E127F9112>I124 D E /Fp 31 120 df<78FCFCFCFC7806067D850D>46 D<00600001E0000FE000 FFE000F3E00003E00003E00003E00003E00003E00003E00003E00003E00003E00003E000 03E00003E00003E00003E00003E00003E00003E00003E00003E00003E0007FFF807FFF80 111B7D9A18>49 D<00038000000380000007C0000007C0000007C000000FE000000FE000 001FF000001BF000001BF0000031F8000031F8000061FC000060FC0000E0FE0000C07E00 00C07E0001803F0001FFFF0003FFFF8003001F8003001F8006000FC006000FC00E000FE0 0C0007E0FFC07FFEFFC07FFE1F1C7E9B24>65 D68 DI73 D<07FFF007FFF0001F80001F80001F80001F80001F80001F80001F80001F80001F 80001F80001F80001F80001F80001F80001F80001F80001F80001F80301F80781F80FC1F 80FC1F80FC1F00783E00387C000FF000141C7F9B19>I77 DI80 D82 D<07F8201FFEE03C07E07801E07000E0F000E0 F00060F00060F80000FE0000FFE0007FFE003FFF003FFF800FFFC007FFE0007FE00003F0 0001F00000F0C000F0C000F0C000E0E000E0F001C0FC03C0EFFF0083FC00141C7D9B1B> I<7FFFFFE07FFFFFE0781F81E0701F80E0601F8060E01F8070C01F8030C01F8030C01F80 30C01F8030001F8000001F8000001F8000001F8000001F8000001F8000001F8000001F80 00001F8000001F8000001F8000001F8000001F8000001F8000001F8000001F800007FFFE 0007FFFE001C1C7E9B21>I<7FFE1FFE007FFE1FFE0007F001800003F803800001FC0700 0000FC06000000FE0C0000007F1C0000003F380000003FB00000001FE00000000FE00000 000FE000000007F000000003F800000007F80000000FFC0000000CFE000000187E000000 387F000000703F800000601F800000C01FC00001C00FE000018007F000030007F000FFF0 3FFF80FFF03FFF80211C7F9B24>88 D<0FF8001C1E003E0F803E07803E07C01C07C00007 C0007FC007E7C01F07C03C07C07C07C0F807C0F807C0F807C0780BC03E13F80FE1F81512 7F9117>97 D<03FC000E0E001C1F003C1F00781F00780E00F80000F80000F80000F80000 F80000F800007800007801803C01801C03000E0E0003F80011127E9115>99 D<000FF0000FF00001F00001F00001F00001F00001F00001F00001F00001F00001F001F9 F00F07F01C03F03C01F07801F07801F0F801F0F801F0F801F0F801F0F801F0F801F07801 F07801F03C01F01C03F00F0FFE03F9FE171D7E9C1B>I<01FC000F07001C03803C01C078 01C07801E0F801E0F801E0FFFFE0F80000F80000F800007800007C00603C00601E00C00F 038001FC0013127F9116>I<007F0001E38003C7C00787C00F87C00F83800F80000F8000 0F80000F80000F8000FFF800FFF8000F80000F80000F80000F80000F80000F80000F8000 0F80000F80000F80000F80000F80000F80000F80007FF8007FF800121D809C0F>I<03F8 F00E0F381E0F381C07303C07803C07803C07803C07801C07001E0F000E0E001BF8001000 001800001800001FFF001FFFC00FFFE01FFFF07801F8F00078F00078F000787000707800 F01E03C007FF00151B7F9118>II< 1E003F003F003F003F001E00000000000000000000000000FF00FF001F001F001F001F00 1F001F001F001F001F001F001F001F001F001F00FFE0FFE00B1E7F9D0E>I107 DIII<01FC000F07801C01C03C 01E07800F07800F0F800F8F800F8F800F8F800F8F800F8F800F87800F07800F03C01E01E 03C00F078001FC0015127F9118>I114 D<1FD830786018E018E018F000FF807FE07FF01FF807FC007CC01CC01CE01C E018F830CFC00E127E9113>I<0300030003000300070007000F000F003FFCFFFC1F001F 001F001F001F001F001F001F001F001F0C1F0C1F0C1F0C0F08079803F00E1A7F9913>I< FF8FF8FEFF8FF8FE1F03E0301F03E0301F83E0700F83F0600F86F06007C6F0C007CEF8C0 07EC79C003EC7D8003F83D8001F83F0001F83F0001F01F0000F01E0000E00E0000E00E00 1F127F9122>119 D E /Fq 25 122 df<0000007C0000000000007C000000000000FE00 0000000000FE000000000000FE000000000001FF000000000001FF000000000003FF8000 00000003FF800000000007FFC00000000007FFC00000000007FFC0000000000FFFE00000 00000F7FE0000000001F7FF0000000001E3FF0000000001E3FF0000000003E3FF8000000 003C1FF8000000007C1FFC00000000780FFC00000000780FFC00000000F80FFE00000000 F007FE00000001F007FF00000001E003FF00000001E003FF00000003E003FF80000003C0 01FF80000007C001FFC00000078000FFC00000078000FFC000000FFFFFFFE000000FFFFF FFE000001FFFFFFFF000001E00003FF000001E00003FF000003C00003FF800003C00001F F800007C00001FFC00007800000FFC00007800000FFC0000F0000007FE0000F0000007FE 0001F0000007FF0003F8000003FF00FFFFC001FFFFFEFFFFC001FFFFFEFFFFC001FFFFFE 37317DB03E>65 D68 D76 DI<00000FFE0000000000FFFFE000000007FFFFFC0000001FFC07FF0000 003FE000FF800000FF80003FE00001FF00001FF00003FE00000FF80007FC000007FC0007 FC000007FC000FF8000003FE001FF8000003FF001FF0000001FF003FF0000001FF803FF0 000001FF803FF0000001FF807FE0000000FFC07FE0000000FFC07FE0000000FFC0FFE000 0000FFE0FFE0000000FFE0FFE0000000FFE0FFE0000000FFE0FFE0000000FFE0FFE00000 00FFE0FFE0000000FFE0FFE0000000FFE0FFE0000000FFE0FFE0000000FFE0FFE0000000 FFE07FE0000000FFC07FE0000000FFC07FF0000001FFC07FF0000001FFC03FF0000001FF 803FF0000001FF801FF8000003FF001FF8000003FF000FFC000007FE000FFC000007FE00 07FE00000FFC0003FF00001FF80001FF80003FF00000FFC0007FE000003FE000FF800000 1FFC07FF00000007FFFFFC00000000FFFFE0000000000FFE00000033317BB03E>79 DI82 D<007FF8000003FFFF000007FFFFC0000FE01F E0001FF007F0001FF003F8001FF003FC001FF001FE000FE001FE0007C001FE00010001FE 00000001FE00000001FE000001FFFE00003FFFFE0001FFF1FE0007FE01FE000FF001FE00 1FC001FE003F8001FE007F8001FE00FF0001FE00FF0001FE00FF0001FE00FF0001FE00FF 0003FE007F8003FE007FC00EFE003FF03CFF000FFFF87FF807FFF03FF800FF800FF82520 7E9F28>97 D<01F8000000FFF8000000FFF8000000FFF80000000FF800000007F8000000 07F800000007F800000007F800000007F800000007F800000007F800000007F800000007 F800000007F800000007F800000007F800000007F800000007F80FF00007F87FFE0007F9 FFFF8007FFE03FC007FF000FE007FE0007F007F80003F807F80003FC07F80003FC07F800 01FE07F80001FE07F80001FE07F80001FF07F80001FF07F80001FF07F80001FF07F80001 FF07F80001FF07F80001FF07F80001FF07F80001FE07F80001FE07F80001FE07F80003FC 07F80003FC07FC0007F807FE0007F007F7001FE007E3E07FC007C1FFFF0007807FFE0007 001FE00028327EB12E>I<0007FF00007FFFE000FFFFF003FC03F807F007FC0FE007FC1F E007FC3FC007FC3FC003F87FC001F07F8000407F800000FF800000FF800000FF800000FF 800000FF800000FF800000FF800000FF8000007F8000007FC000007FC000003FC0000E3F E0000E1FE0001C0FF0001C07F8007803FF01F000FFFFE0007FFF800007FC001F207D9F25 >I<0007FC0000003FFF800000FFFFE00003FC07F00007F801F8000FE000FC001FE0007E 003FC0007E003FC0003F007FC0003F007F80003F007F80003F80FF80003F80FF80003F80 FFFFFFFF80FFFFFFFF80FFFFFFFF80FF80000000FF80000000FF800000007F800000007F 800000003FC00000003FC00003801FC00003801FE00007800FF0000F0007F8001E0003FE 00FC0000FFFFF800003FFFE0000003FF000021207E9F26>101 D<0000FF000007FFC000 1FFFE0003FC7F0007F0FF800FE0FF801FE0FF801FC0FF803FC07F003FC03E003FC01C003 FC000003FC000003FC000003FC000003FC000003FC000003FC0000FFFFF800FFFFF800FF FFF80003FC000003FC000003FC000003FC000003FC000003FC000003FC000003FC000003 FC000003FC000003FC000003FC000003FC000003FC000003FC000003FC000003FC000003 FC000003FC000003FC000003FC000003FC000003FC000003FC000003FC000003FC00007F FFF0007FFFF0007FFFF0001D327EB119>I<001FF007E000FFFE3FF001FFFF7FF807F83F F1F80FE00FE1F80FE00FE0F01FC007F0601FC007F0003FC007F8003FC007F8003FC007F8 003FC007F8003FC007F8001FC007F0001FC007F0000FE00FE0000FE00FE00007F83FC000 07FFFF000006FFFE00000E1FF000000E000000001E000000001E000000001F000000001F 800000001FFFFFC0000FFFFFF8000FFFFFFE0007FFFFFF0003FFFFFF8007FFFFFFC01FFF FFFFE03F00007FE07E00000FF0FC000007F0FC000003F0FC000003F0FC000003F0FC0000 03F07E000007E03F00000FC01FC0003F800FF801FF0007FFFFFE0000FFFFF000001FFF80 00252F7E9F29>I<01F800000000FFF800000000FFF800000000FFF8000000000FF80000 000007F80000000007F80000000007F80000000007F80000000007F80000000007F80000 000007F80000000007F80000000007F80000000007F80000000007F80000000007F80000 000007F80000000007F807F8000007F83FFF000007F87FFF800007F8F03FC00007F9C01F E00007FB000FE00007FE000FF00007FE000FF00007FC000FF00007FC000FF00007F8000F F00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000F F00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000F F00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000F F000FFFFC1FFFF80FFFFC1FFFF80FFFFC1FFFF8029327DB12E>I<03C0000FF0000FF000 1FF8001FF8001FFC001FF8001FF8000FF0000FF00003C000000000000000000000000000 00000000000000000000000001F800FFF800FFF800FFF8000FF80007F80007F80007F800 07F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F800 07F80007F80007F80007F80007F80007F80007F80007F80007F800FFFF80FFFF80FFFF80 11337DB217>I<01F8000000FFF8000000FFF8000000FFF80000000FF800000007F80000 0007F800000007F800000007F800000007F800000007F800000007F800000007F8000000 07F800000007F800000007F800000007F800000007F800000007F8007FFC07F8007FFC07 F8007FFC07F8001FC007F8001F0007F8003E0007F800780007F801F00007F803E00007F8 07800007F81F000007F83E000007F87C000007F9FE000007FBFF000007FFFF800007FF7F C00007FE3FE00007F81FE00007F01FF00007F00FF80007F007FC0007F003FE0007F001FF 0007F000FF0007F000FF8007F0007FC007F0003FE007F0003FF0FFFF80FFFFFFFF80FFFF FFFF80FFFF28327EB12C>107 D<01F800FFF800FFF800FFF8000FF80007F80007F80007 F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007 F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007 F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007F80007 F80007F80007F80007F800FFFFC0FFFFC0FFFFC012327DB117>I<03F007F8000FF000FF F03FFF007FFE00FFF07FFF80FFFF00FFF0F03FC1E07F800FF1C01FE3803FC007F3000FE6 001FC007F6000FFC001FE007FE000FFC001FE007FC000FF8001FE007FC000FF8001FE007 F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0 001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007 F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0 001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007F8000FF0001FE007 F8000FF0001FE0FFFFC1FFFF83FFFFFFFFC1FFFF83FFFFFFFFC1FFFF83FFFF40207D9F45 >I<03F007F80000FFF03FFF0000FFF07FFF8000FFF0F03FC0000FF1C01FE00007F3000F E00007F6000FF00007FE000FF00007FC000FF00007FC000FF00007F8000FF00007F8000F F00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000F F00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000F F00007F8000FF00007F8000FF00007F8000FF00007F8000FF00007F8000FF000FFFFC1FF FF80FFFFC1FFFF80FFFFC1FFFF8029207D9F2E>I<0007FE0000003FFFC00000FFFFF000 03FC03FC0007F000FE000FE0007F001FC0003F803FC0003FC03FC0003FC07F80001FE07F 80001FE07F80001FE0FF80001FF0FF80001FF0FF80001FF0FF80001FF0FF80001FF0FF80 001FF0FF80001FF0FF80001FF07F80001FE07F80001FE07F80001FE03FC0003FC03FC000 3FC01FE0007F800FE0007F0007F801FE0003FE07FC0001FFFFF800003FFFC0000007FE00 0024207E9F29>I<03F03F00FFF07FC0FFF1FFE0FFF3C7F00FF38FF807F70FF807F60FF8 07FE0FF807FC07F007FC03E007FC008007F8000007F8000007F8000007F8000007F80000 07F8000007F8000007F8000007F8000007F8000007F8000007F8000007F8000007F80000 07F8000007F8000007F8000007F80000FFFFE000FFFFE000FFFFE0001D207E9F22>114 D<00FF870007FFEF001FFFFF003F007F003C001F0078000F00F8000700F8000700F80007 00FC000700FF000000FFF800007FFFC0003FFFF0003FFFFC000FFFFE0007FFFF0001FFFF 80001FFF800000FFC000001FC060000FC0E00007C0E00007C0F00007C0F8000780F8000F 80FE000F00FF803E00FFFFFC00F3FFF800C07FC0001A207D9F21>I<0038000038000038 0000380000380000780000780000780000F80000F80001F80003F80007F8001FF800FFFF FEFFFFFEFFFFFE07F80007F80007F80007F80007F80007F80007F80007F80007F80007F8 0007F80007F80007F80007F80007F80007F80007F80707F80707F80707F80707F80707F8 0707F80703F80E03FC0E01FE1C00FFF8007FF0000FE0182E7EAD20>I118 D121 D E end %%EndProlog %%BeginSetup %%Feature: *Resolution 300dpi TeXDict begin %%EndSetup %%Page: 1 1 1 0 bop 225 358 1509 17 v 281 483 a Fq(Reinforcemen)n(t)26 b(Learning)h(Algorithm)h(for)293 566 y(P)n(artially)g(Observ)-5 b(able)26 b(Mark)n(o)n(v)g(Decision)810 649 y(Problems)p 225 715 1509 5 v 320 867 a Fp(T)l(ommi)14 b(Jaakk)o(ola)281 912 y Fo(tommi)o(@psyc)o(he.mit.)o(edu)797 867 y Fp(Satinder)f(P)l(.)j (Singh)781 912 y Fo(singh@psyc)o(he.mit.edu)1283 867 y Fp(Mic)o(hael)f(I.)h(Jordan)1259 912 y Fo(jordan@psyc)o(he.mit.edu) 481 1016 y(Departmen)o(t)d(of)g(Brain)h(and)g(Cognitiv)o(e)f(Sciences,) i(Bld.)j(E10)633 1062 y(Massac)o(h)o(usetts)e(Institute)f(of)e(T)m(ec)o (hnology)775 1108 y(Cam)o(bridge,)e(MA)j(02139)872 1280 y Fn(Abstract)374 1470 y Fo(Increasing)i(atten)o(tion)f(has)h(b)q(een)g (paid)f(to)g(reinforcemen)o(t)g(learning)g(algo-)374 1516 y(rithms)j(in)g(recen)o(t)i(y)o(ears,)g(partly)e(due)h(to)g (successes)j(in)c(the)h(theoretical)374 1561 y(analysis)c(of)g(their)h (b)q(eha)o(vior)f(in)g(Mark)o(o)o(v)f(en)o(vironmen)o(ts.)22 b(If)15 b(the)h(Mark)o(o)o(v)374 1607 y(assumption)g(is)g(remo)o(v)o (ed,)g(ho)o(w)o(ev)o(er,)i(neither)f(generally)g(the)g(algorithms)374 1653 y(nor)h(the)g(analyses)g(con)o(tin)o(ue)g(to)f(b)q(e)h(usable.)30 b(W)m(e)17 b(prop)q(ose)h(and)g(analyze)374 1698 y(a)h(new)h(learning)f (algorithm)d(to)j(solv)o(e)g(a)g(certain)h(class)g(of)f(non-Mark)o(o)o (v)374 1744 y(decision)i(problems.)36 b(Our)21 b(algorithm)c(applies)j (to)h(problems)e(in)h(whic)o(h)374 1790 y(the)f(en)o(vironmen)o(t)f(is) g(Mark)o(o)o(v,)g(but)h(the)g(learner)g(has)f(restricted)j(access)374 1835 y(to)d(state)h(information.)26 b(The)19 b(algorithm)c(in)o(v)o (olv)o(es)i(a)g(Mon)o(te-Carlo)g(p)q(ol-)374 1881 y(icy)c(ev)n (aluation)e(com)o(bined)h(with)g(a)h(p)q(olicy)f(impro)o(v)o(emen)o(t)e (metho)q(d)i(that)h(is)374 1927 y(similar)i(to)h(that)h(of)f(Mark)o(o)o (v)f(decision)i(problems)f(and)g(is)h(guaran)o(teed)g(to)374 1972 y(con)o(v)o(erge)d(to)f(a)f(lo)q(cal)g(maxim)o(um)n(.)j(The)e (algorithm)d(op)q(erates)15 b(in)d(the)h(space)374 2018 y(of)18 b(sto)q(c)o(hastic)i(p)q(olicies,)f(a)f(space)i(whic)o(h)e(can) h(yield)f(a)g(p)q(olicy)g(that)h(p)q(er-)374 2064 y(forms)13 b(considerably)i(b)q(etter)h(than)e(an)o(y)g(deterministic)g(p)q(olicy) m(.)k(Although)374 2109 y(the)h(space)f(of)f(sto)q(c)o(hastic)i(p)q (olicies)e(is)h(con)o(tin)o(uous|ev)o(en)f(for)h(a)f(discrete)374 2155 y(action)d(space|our)g(algorithm)e(is)h(computationally)e (tractable.)p eop %%Page: 2 2 2 1 bop 225 83 a Fn(1)56 b(INTR)n(ODUCTION)225 181 y Fo(Reinforcemen)o(t)18 b(learning)g(pro)o(vides)h(a)f(sound)h(framew)o (ork)e(for)i(credit)g(assignmen)o(t)f(in)g(un-)225 226 y(kno)o(wn)h(sto)q(c)o(hastic)h(dynamic)e(en)o(vironmen)o(ts.)34 b(F)m(or)19 b(Mark)o(o)o(v)g(en)o(vironmen)o(ts)g(a)g(v)n(ariet)o(y)g (of)225 272 y(di\013eren)o(t)13 b(reinforcemen)o(t)f(learning)f (algorithms)f(ha)o(v)o(e)i(b)q(een)h(devised)g(to)e(predict)i(and)f (con)o(trol)225 318 y(the)19 b(en)o(vironmen)o(t)f(\(e.g.,)g(the)i (TD\()p Fm(\025)p Fo(\))e(algorithm)e(of)i(Sutton,)i(1988,)e(and)g(the) h(Q-learning)225 363 y(algorithm)10 b(of)h(W)m(atkins,)g(1989\).)17 b(Ties)c(to)f(the)h(theory)g(of)e(dynamic)g(programming)e(\(DP\))j(and) 225 409 y(the)19 b(theory)g(of)f(sto)q(c)o(hastic)h(appro)o(ximation)c (ha)o(v)o(e)j(b)q(een)i(exploited,)f(pro)o(viding)e(to)q(ols)h(that)225 455 y(ha)o(v)o(e)13 b(allo)o(w)o(ed)f(these)j(algorithms)c(to)i(b)q(e)h (analyzed)g(theoretically)f(\(Da)o(y)o(an,)f(1992;)g(Tsitsiklis,)225 500 y(1994;)g(Jaakk)o(ola,)g(Jordan,)i(&)g(Singh,)f(1994;)f(W)m(atkins) h(&)h(Da)o(y)o(an,)e(1992\).)225 571 y(Although)17 b(curren)o(t)j (reinforcemen)o(t)e(learning)f(algorithms)e(are)k(based)f(on)g(the)g (assumption)225 617 y(that)f(the)g(learning)f(problem)f(can)i(b)q(e)g (cast)h(as)e(Mark)o(o)o(v)g(decision)h(problem)e(\(MDP\),)i(man)o(y)225 662 y(practical)h(problems)f(resist)h(b)q(eing)g(treated)h(as)f(an)g (MDP)m(.)f(Unfortunately)m(,)g(if)g(the)i(Mark)o(o)o(v)225 708 y(assumption)d(is)i(remo)o(v)o(ed)f(examples)f(can)i(b)q(e)g(found) g(where)h(curren)o(t)g(algorithms)c(cease)k(to)225 754 y(p)q(erform)12 b(w)o(ell)h(\(Singh,)g(Jaakk)o(ola,)e(&)j(Jordan,)f (1994\).)k(Moreo)o(v)o(er,)c(the)h(theoretical)g(analyses)225 799 y(rely)g(hea)o(vily)f(on)g(the)i(Mark)o(o)o(v)e(assumption.)225 870 y(The)e(non-Mark)o(o)o(v)f(nature)h(of)f(the)h(en)o(vironmen)o(t)f (can)h(arise)g(in)f(man)o(y)f(w)o(a)o(ys.)17 b(The)11 b(most)e(direct)225 916 y(extension)15 b(of)e(MDP's)h(is)g(to)g(depriv) o(e)g(the)h(learner)g(of)e(p)q(erfect)j(information)11 b(ab)q(out)j(the)g(state)225 961 y(of)h(the)h(en)o(vironmen)o(t.)22 b(Muc)o(h)16 b(as)f(in)h(the)g(case)g(of)f(Hidden)h(Mark)o(o)o(v)f(Mo)q (dels)h(\(HMM's\),)f(the)225 1007 y(underlying)d(en)o(vironmen)o(t)g (is)g(assumed)g(to)g(b)q(e)i(Mark)o(o)o(v,)d(but)i(the)g(data)f(do)h (not)f(app)q(ear)h(to)f(b)q(e)225 1052 y(Mark)o(o)o(vian)g(to)h(the)h (learner.)19 b(This)13 b(extension)h(not)f(only)g(allo)o(ws)f(for)h(a)g (tractable)g(theoretical)225 1098 y(analysis,)18 b(but)h(is)g(also)f (app)q(ealing)f(for)h(practical)h(purp)q(oses.)33 b(The)19 b(decision)g(problems)f(w)o(e)225 1144 y(consider)d(here)g(are)f(of)f (this)h(t)o(yp)q(e.)225 1214 y(The)19 b(analog)d(of)i(the)h(HMM)f(for)g (con)o(trol)g(problems)f(is)h(the)h(partially)d(observ)n(able)j(Mark)o (o)o(v)225 1260 y(decision)i(pro)q(cess)h(\(POMDP;)f(see)h(e.g.,)g (Monahan,)f(1982\).)38 b(Unlik)o(e)20 b(HMM's,)i(ho)o(w)o(ev)o(er,)225 1306 y(there)13 b(is)f(no)g(kno)o(wn)g(computationally)d(tractable)j (pro)q(cedure)i(for)e(POMDP's.)18 b(The)12 b(problem)225 1351 y(is)20 b(that)g(once)h(the)g(state)g(estimates)f(ha)o(v)o(e)h(b)q (een)g(obtained,)g(DP)f(m)o(ust)g(b)q(e)g(p)q(erformed)g(in)225 1397 y(the)e(con)o(tin)o(uous)e(space)i(of)f(probabilities)f(of)g (state)i(o)q(ccupancies,)h(and)d(this)h(DP)g(pro)q(cess)i(is)225 1443 y(computationally)12 b(infeasible)i(except)j(for)e(small)d(state)k (spaces.)23 b(In)15 b(this)g(pap)q(er)h(w)o(e)f(describ)q(e)225 1488 y(an)d(alternativ)o(e)g(approac)o(h)g(for)g(POMDP's)g(that)g(a)o (v)o(oids)f(the)i(state)g(estimation)d(problem)h(and)225 1534 y(w)o(orks)j(directly)h(in)f(the)h(space)g(of)f(\(sto)q(c)o (hastic\))h(con)o(trol)f(p)q(olicies.)19 b(\(See)d(Singh,)d(et)i(al.,)d (1994,)225 1580 y(for)i(additional)e(material)f(on)j(sto)q(c)o(hastic)h (p)q(olicies.\))225 1699 y Fn(2)56 b(P)-5 b(AR)g(TIAL)20 b(OBSER)-6 b(V)g(ABILITY)225 1797 y Fo(A)12 b(Mark)o(o)o(v)f(decision)h (problem)f(can)h(b)q(e)g(generalized)h(to)e(a)h(POMDP)g(b)o(y)g (restricting)h(the)f(state)225 1842 y(information)e(a)o(v)n(ailable)g (to)j(the)g(learner.)18 b(Accordingly)m(,)12 b(w)o(e)h(de\014ne)g(the)h (learning)e(problem)f(as)225 1888 y(follo)o(ws.)k(There)e(is)d(an)h (underlying)g(MDP)g(with)g(states)h Fl(S)i Fo(=)e Fl(f)p Fm(s)1230 1894 y Fk(1)1249 1888 y Fm(;)7 b(s)1287 1894 y Fk(2)1306 1888 y Fm(;)g(:)g(:)g(:)t(;)g(s)1417 1894 y Fj(N)1449 1888 y Fl(g)k Fo(and)f(transition)225 1934 y(probabilit)o(y)h Fm(p)457 1919 y Fj(a)457 1951 y(ss)489 1937 y Fi(0)504 1934 y Fo(,)h(the)i(probabilit)o(y)d(of)h(jumping)e (from)h(state)j Fm(s)f Fo(to)f(state)i Fm(s)1416 1919 y Fh(0)1441 1934 y Fo(when)f(action)f Fm(a)h Fo(is)225 1987 y(tak)o(en)g(in)g(state)h Fm(s)p Fo(.)k(F)m(or)13 b(ev)o(ery)h(state)g(and)f(ev)o(ery)h(action)f(a)f(\(random\))g(rew)o (ard)i(is)f(pro)o(vided)g(to)225 2032 y(the)i(learner.)20 b(In)15 b(the)g(POMDP)f(setting,)h(the)g(learner)g(is)f(not)g(allo)o(w) o(ed)g(to)g(observ)o(e)h(the)g(state)225 2078 y(directly)d(but)g(only)f (via)h(messages)g(con)o(taining)e(information)f(ab)q(out)j(the)h (state.)18 b(A)o(t)12 b(eac)o(h)g(time)225 2124 y(step)i Fm(t)f Fo(an)g(observ)n(able)h(message)f Fm(m)794 2130 y Fj(t)822 2124 y Fo(is)g(dra)o(wn)g(from)f(a)g(\014nite)i(set)g(of)f (messages)g(according)g(to)225 2169 y(an)j(unkno)o(wn)g(probabilit)o(y) f(distribution)g Fm(P)6 b Fo(\()p Fm(m)p Fl(j)p Fm(s)1024 2175 y Fj(t)1039 2169 y Fo(\))1071 2154 y Fk(1)1090 2169 y Fo(.)25 b(W)m(e)16 b(assume)g(that)g(the)h(learner)g(do)q(es)p 225 2207 598 2 v 277 2234 a Fg(1)294 2250 y Ff(F)m(or)11 b(simplicit)o(y)k(w)o(e)10 b(assume)i(that)g(this)g(distribution)i(dep) q(ends)f(only)g(on)e(the)h(curren)o(t)f(state.)17 b(The)225 2292 y(analyses)e(go)e(through)h(also)g(with)g(distributions)i(dep)q (enden)o(t)f(on)e(the)g(past)h(states)f(and)h(actions)p eop %%Page: 3 3 3 2 bop 225 83 a Fo(not)16 b(p)q(ossess)i(an)o(y)e(prior)g(information) d(ab)q(out)j(the)h(underlying)f(MDP)g(b)q(ey)o(ond)h(the)g(n)o(um)o(b)q (er)225 129 y(of)f(messages)g(and)h(actions.)25 b(The)17 b(goal)e(for)h(the)h(learner)g(is)g(to)f(come)g(up)g(with)g(a)g(p)q (olicy|a)225 174 y(mapping)c(from)g(messages)i(to)f(actions|that)h(giv) o(es)g(the)g(highest)g(exp)q(ected)i(rew)o(ard.)225 245 y(As)d(discussed)h(in)e(Singh)g(et)h(al.)k(\(1994\),)12 b(sto)q(c)o(hastic)h(p)q(olicies)g(can)f(yield)g(considerably)h(higher) 225 291 y(exp)q(ected)h(rew)o(ards)f(than)g(deterministic)e(p)q (olicies)h(in)g(the)h(case)g(of)f(POMDP's.)18 b(T)m(o)11 b(mak)o(e)g(this)225 336 y(statemen)o(t)h(precise)i(requires)g(an)e (appropriate)g(tec)o(hnical)h(de\014nition)f(of)g(\\exp)q(ected)i(rew)o (ard,")225 382 y(b)q(ecause)g(in)d(general)h(it)g(is)g(imp)q(ossible)e (to)i(\014nd)g(a)f(p)q(olicy)m(,)g(sto)q(c)o(hastic)i(or)f(not,)g(that) g(maxim)o(izes)225 428 y(the)20 b(exp)q(ected)h(rew)o(ard)f(for)f(eac)o (h)h(observ)n(able)g(message)f(separately)m(.)34 b(W)m(e)19 b(tak)o(e)h(the)f(time-)225 473 y(a)o(v)o(erage)c(rew)o(ard)h(as)f(a)g (measure)g(of)f(p)q(erformance,)h(that)g(is,)g(the)h(total)e(accrued)i (rew)o(ard)g(p)q(er)225 519 y(n)o(um)o(b)q(er)10 b(of)g(steps)h(tak)o (en)g(\(Bertsek)n(as,)h(1987;)f(Sc)o(h)o(w)o(artz,)g(1993\).)16 b(This)10 b(approac)o(h)h(requires)g(the)225 565 y(assumption)d(that)i (ev)o(ery)h(state)f(of)f(the)i(underlying)e(con)o(trollable)g(Mark)o(o) o(v)g(c)o(hain)g(is)h(reac)o(hable.)225 635 y(In)k(this)f(pap)q(er)i(w) o(e)f(fo)q(cus)g(on)f(a)g Fe(dir)n(e)n(ct)g Fo(approac)o(h)h(to)f (solving)g(the)h(learning)f(problem.)j(Direct)225 681 y(approac)o(hes)j(are)g(to)f(b)q(e)h(compared)f(to)g Fe(indir)n(e)n(ct)g Fo(approac)o(hes,)i(in)e(whic)o(h)g(the)h(learner)g (\014rst)225 726 y(iden)o(ti\014es)12 b(the)h(parameters)f(of)g(the)g (underlying)g(MDP)m(,)f(and)h(then)g(utilizes)g(DP)g(to)g(obtain)f(the) 225 772 y(p)q(olicy)m(.)17 b(As)c(w)o(e)g(noted)g(earlier,)g(indirect)g (approac)o(hes)h(lead)e(to)h(computationally)d(in)o(tractable)225 818 y(algorithms.)16 b(Our)e(approac)o(h)f(can)h(b)q(e)g(view)o(ed)g (as)g(pro)o(viding)e(a)h(generalization)g(of)g(the)h(direct)225 863 y(approac)o(h)g(to)g(MDP's)f(to)h(the)h(case)f(of)g(POMDP's.)225 986 y Fn(3)56 b(A)19 b(MONTE-CARLO)g(POLICY)g(EV)-6 b(ALUA)h(TION)225 1085 y Fo(Adv)n(an)o(tages)15 b(of)g(Mon)o(te-Carlo)f(metho)q(ds)h(for) f(p)q(olicy)h(ev)n(aluation)f(in)g(MDP's)h(ha)o(v)o(e)g(b)q(een)h(re-) 225 1131 y(view)o(ed)f(recen)o(tly)i(\(Barto)e(and)h(Du\013,)f(1994\).) 21 b(Here)16 b(w)o(e)g(presen)o(t)h(a)e(metho)q(d)f(for)h(calculating) 225 1176 y(the)g(v)n(alue)f(of)f(a)i(sto)q(c)o(hastic)g(p)q(olicy)f (that)g(has)h(the)g(\015a)o(v)o(or)e(of)h(a)g(Mon)o(te-Carlo)g (algorithm.)j(T)m(o)225 1222 y(motiv)n(ate)12 b(suc)o(h)j(an)f(approac) o(h)h(let)f(us)h(\014rst)g(consider)g(a)f(simple)f(case)i(where)g(the)g (a)o(v)o(erage)f(re-)225 1268 y(w)o(ard)g(is)f(kno)o(wn)h(and)g (generalize)g(the)h(w)o(ell-de\014ned)f(MDP)f(v)n(alue)h(function)f(to) h(the)g(POMDP)225 1313 y(setting.)30 b(In)17 b(the)h(Mark)o(o)o(v)f (case)i(the)f(v)n(alue)f(function)g(can)h(b)q(e)g(written)h(as)e(\(cf.) 30 b(Bertsek)n(as,)225 1359 y(1987\):)606 1454 y Fm(V)10 b Fo(\()p Fm(s)p Fo(\))i(=)31 b(lim)747 1481 y Fj(N)s Fh(!1)865 1402 y Fj(N)849 1415 y Fd(X)852 1502 y Fj(t)p Fk(=1)916 1454 y Fm(E)r Fl(f)p Fm(R)p Fo(\()p Fm(s)1037 1460 y Fj(t)1052 1454 y Fm(;)7 b(u)1095 1460 y Fj(t)1109 1454 y Fo(\))i Fl(\000)h Fm(R)p Fl(j)p Fm(s)1239 1460 y Fk(1)1269 1454 y Fo(=)i Fm(s)p Fl(g)328 b Fo(\(1\))225 1567 y(where)15 b Fm(s)364 1573 y Fj(t)394 1567 y Fo(and)f Fm(a)497 1573 y Fj(t)525 1567 y Fo(refer)i(to)e(the)h(state)g(and)f (the)h(action)f(tak)o(en)g(at)g(the)h(t)1377 1552 y Fj(th)1425 1567 y Fo(step)g(resp)q(ectiv)o(ely)m(.)225 1613 y(This)f(form)e (generalizes)j(easily)e(to)h(the)g(lev)o(el)g(of)f(messages)h(b)o(y)f (taking)g(an)h(additional)e(exp)q(ec-)225 1659 y(tation:)751 1709 y Fm(V)e Fo(\()p Fm(m)p Fo(\))i(=)g Fm(E)d Fl(f)p Fm(V)g Fo(\()p Fm(s)p Fo(\))p Fl(j)p Fm(s)j Fl(!)f Fm(m)p Fl(g)474 b Fo(\(2\))225 1776 y(where)16 b Fm(s)e Fl(!)e Fm(m)k Fo(refers)g(to)f(all)e(the)j(instances)g(where)g Fm(m)f Fo(is)g(observ)o(ed)h(in)f Fm(s)g Fo(and)g Fm(E)r Fl(f\001j)p Fm(s)d Fl(!)h Fm(m)p Fl(g)225 1822 y Fo(is)i(a)f(Mon)o (te-Carlo)g(exp)q(ectation.)22 b(This)15 b(generalization)f(yields)g(a) h(POMDP)g(v)n(alue)f(function)225 1867 y(giv)o(en)f(b)o(y)755 1928 y Fm(V)c Fo(\()p Fm(m)p Fo(\))k(=)916 1888 y Fd(X)912 1975 y Fj(s)p Fh(2)p Fj(m)987 1928 y Fm(P)6 b Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\))p Fm(V)j Fo(\()p Fm(s)p Fo(\))478 b(\(3\))225 2038 y(in)13 b(whic)o(h)g Fm(P)6 b Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\))13 b(de\014ne)i(the)f(limit)c(o)q(ccupancy)k (probabilities)f(o)o(v)o(er)g(the)h(underlying)f(states)225 2084 y(for)e(eac)o(h)g(message)g Fm(m)p Fo(.)18 b(As)11 b(is)g(seen)h(in)f(the)h(next)f(section)h(v)n(alue)f(functions)g(of)f (this)h(t)o(yp)q(e)h(can)f(b)q(e)225 2130 y(used)i(to)g(re\014ne)h(the) f(curren)o(tly)g(follo)o(w)o(ed)e(con)o(trol)h(p)q(olicy)g(to)h(yield)f (a)g(higher)g(a)o(v)o(erage)h(rew)o(ard.)225 2200 y(Let)18 b(us)h(no)o(w)e(consider)i(ho)o(w)f(the)g(generalized)h(v)n(alue)e (functions)h(can)g(b)q(e)h(computed)e(based)225 2246 y(on)f(the)h(observ)n(ations.)26 b(W)m(e)16 b(prop)q(ose)i(a)e (recursiv)o(e)i(Mon)o(te-Carlo)e(algorithm)e(to)i(e\013ectiv)o(ely)225 2292 y(compute)d(the)h(a)o(v)o(erages)f(in)o(v)o(olv)o(ed)f(in)h(the)h (de\014nition)f(of)g(the)h(v)n(alue)f(function.)k(In)d(the)f(simple)p eop %%Page: 4 4 4 3 bop 225 83 a Fo(case)15 b(when)f(the)h(a)o(v)o(erage)f(pa)o(y)o (o\013)f(is)h(kno)o(wn)f(this)h(algorithm)d(is)j(giv)o(en)f(b)o(y)469 174 y Fm(\014)492 180 y Fj(t)507 174 y Fo(\()p Fm(m)p Fo(\))43 b(=)f(\(1)9 b Fl(\000)789 145 y Fm(\037)815 151 y Fj(t)829 145 y Fo(\()p Fm(m)p Fo(\))p 784 164 119 2 v 784 202 a Fm(K)819 208 y Fj(t)834 202 y Fo(\()p Fm(m)p Fo(\))908 174 y(\))p Fm(\015)945 180 y Fj(t)960 174 y Fm(\014)983 180 y Fj(t)p Fh(\000)p Fk(1)1041 174 y Fo(\()p Fm(m)p Fo(\))h(+)1170 145 y Fm(\037)1196 151 y Fj(t)1210 145 y Fo(\()p Fm(m)p Fo(\))p 1165 164 V 1165 202 a Fm(K)1200 208 y Fj(t)1215 202 y Fo(\()p Fm(m)p Fo(\))1681 174 y(\(4\))469 288 y Fm(V)493 294 y Fj(t)507 288 y Fo(\()p Fm(m)p Fo(\))43 b(=)f(\(1)9 b Fl(\000)789 260 y Fm(\037)815 266 y Fj(t)829 260 y Fo(\()p Fm(m)p Fo(\))p 784 279 V 784 317 a Fm(K)819 323 y Fj(t)834 317 y Fo(\()p Fm(m)p Fo(\))908 288 y(\))p Fm(V)948 294 y Fj(t)p Fh(\000)p Fk(1)1005 288 y Fo(\()p Fm(m)p Fo(\))h(+)g Fm(\014)1148 294 y Fj(t)1163 288 y Fo(\()p Fm(m)p Fo(\)[)p Fm(R)p Fo(\()p Fm(s)1310 294 y Fj(t)1325 288 y Fm(;)d(a)1366 294 y Fj(t)1380 288 y Fo(\))i Fl(\000)h Fm(R)p Fo(])190 b(\(5\))225 381 y(where)14 b Fm(\037)370 387 y Fj(t)385 381 y Fo(\()p Fm(m)p Fo(\))g(is)e(the)i (indicator)f(function)f(for)h(message)g Fm(m)p Fo(,)g Fm(K)1232 387 y Fj(t)1247 381 y Fo(\()p Fm(m)p Fo(\))h(is)e(the)i(n)o (um)o(b)q(er)e(of)h(times)225 427 y Fm(m)18 b Fo(has)g(o)q(ccurred,)h (and)f Fm(\015)648 433 y Fj(t)680 427 y Fo(is)g(a)f(discoun)o(t)h (factor)f(con)o(v)o(erging)g(to)h(one)g(in)f(the)h(limit.)26 b(This)225 473 y(algorithm)11 b(can)i(b)q(e)h(view)o(ed)g(as)f (recursiv)o(e)i(a)o(v)o(eraging)d(of)h(\(discoun)o(ted\))i(sample)d (sequences)k(of)225 518 y(di\013eren)o(t)d(lengths)g(eac)o(h)g(of)f (whic)o(h)g(has)h(b)q(een)g(started)h(at)e(a)g(di\013eren)o(t)h(o)q (ccurrence)i(of)d(message)225 564 y Fm(m)p Fo(.)21 b(This)15 b(can)g(b)q(e)g(seen)h(b)o(y)f(unfolding)e(the)i(recursion,)h(yielding) e(an)g(explicit)g(expression)i(for)225 610 y Fm(V)249 616 y Fj(t)264 610 y Fo(\()p Fm(m)p Fo(\).)i(T)m(o)13 b(this)g(end,)g(let)g Fm(t)666 616 y Fj(k)700 610 y Fo(denote)h(the)g (time)d(step)j(corresp)q(onding)g(to)f(the)h Fm(k)1494 595 y Fj(th)1541 610 y Fo(o)q(ccurrence)225 655 y(of)j(message)g Fm(m)g Fo(and)g(for)g(clarit)o(y)g(let)g Fm(R)871 661 y Fj(t)903 655 y Fo(=)g Fm(R)p Fo(\()p Fm(s)1019 661 y Fj(t)1034 655 y Fm(;)7 b(u)1077 661 y Fj(t)1091 655 y Fo(\))12 b Fl(\000)f Fm(R)17 b Fo(for)g(ev)o(ery)h Fm(t)p Fo(.)29 b(Using)17 b(these)h(the)225 701 y(recursion)d(yields:) 477 787 y Fm(V)501 793 y Fj(t)516 787 y Fo(\()p Fm(m)p Fo(\))d(=)694 759 y(1)p 645 778 V 645 816 a Fm(K)680 822 y Fj(t)695 816 y Fo(\()p Fm(m)p Fo(\))769 787 y([)23 b Fm(R)836 793 y Fj(t)849 797 y Fc(1)876 787 y Fo(+)9 b(\000)943 793 y Fk(1)p Fj(;)p Fk(1)997 787 y Fm(R)1029 793 y Fj(t)1042 797 y Fc(1)1058 793 y Fk(+1)1111 787 y Fo(+)g Fm(:)e(:)g(:)h Fo(+)i(\000)1278 793 y Fk(1)p Fj(;t)p Fh(\000)p Fj(t)1357 797 y Fc(1)1382 787 y Fm(R)1414 793 y Fj(t)804 861 y Fl(\001)d(\001)g(\001)804 919 y Fo(+)p Fm(R)868 925 y Fj(t)881 929 y Fb(k)910 919 y Fo(+)i(\000)977 925 y Fj(k)q(;)p Fk(1)1033 919 y Fm(R)1065 925 y Fj(t)1078 929 y Fb(k)1096 925 y Fk(+1)1149 919 y Fo(+)g Fm(:)e(:)g(:)h Fo(+)i(\000)1316 925 y Fj(k)q(;t)p Fh(\000)p Fj(t)1397 929 y Fb(k)1424 919 y Fm(R)1456 925 y Fj(t)1470 919 y Fo(])199 b(\(6\))225 990 y(where)16 b(w)o(e)g(ha)o(v)o(e)e(for)h (simplicit)o(y)e(used)j(\000)883 996 y Fj(k)q(;T)952 990 y Fo(to)f(indicate)g(the)h(discoun)o(ting)e(at)h(the)h(T)1611 975 y Fj(th)1660 990 y Fo(step)225 1041 y(in)f(the)h(k)370 1026 y Fj(th)420 1041 y Fo(sequence.)25 b(Comparing)13 b(the)j(ab)q(o)o(v)o(e)g(expression)g(to)g(equation)f(1)g(indicates)h (that)225 1087 y(the)e(discoun)o(t)f(factor)h(has)f(to)g(con)o(v)o (erge)h(to)f(one)h(in)f(the)h(limit)c(since)k(the)g(a)o(v)o(erages)g (in)f Fm(V)c Fo(\()p Fm(s)p Fo(\))14 b(or)225 1132 y Fm(V)9 b Fo(\()p Fm(m)p Fo(\))15 b(in)o(v)o(olv)o(e)e(no)g(discoun)o (ting.)225 1203 y(T)m(o)c(address)h(the)h(question)e(of)g(con)o(v)o (ergence)i(of)e(this)h(algorithm)d(let)i(us)h(\014rst)g(assume)f(a)g (constan)o(t)225 1248 y(discoun)o(ting)15 b(\(that)h(is,)g Fm(\015)632 1254 y Fj(t)662 1248 y Fo(=)f Fm(\015)i(<)f Fo(1\).)23 b(In)16 b(this)g(case,)h(the)f(algorithm)d(pro)q(duces)18 b(at)d(b)q(est)i(an)225 1294 y(appro)o(ximation)9 b(to)j(the)g(v)n (alue)g(function.)17 b(F)m(or)11 b(large)h Fm(K)s Fo(\()p Fm(m)p Fo(\))h(the)f(con)o(v)o(ergence)i(rate)e(b)o(y)g(whic)o(h)225 1340 y(this)18 b(appro)o(ximate)e(solution)h(is)h(found)f(can)h(b)q(e)h (c)o(haracterized)h(in)d(terms)h(of)f(the)i(bias)e(and)225 1385 y(v)n(ariance.)37 b(This)21 b(giv)o(es)f Fm(B)r(ias)p Fl(f)p Fm(V)10 b Fo(\()p Fm(m)p Fo(\))p Fl(g)23 b(/)g Fo(\(1)14 b Fl(\000)h Fo(\026)-23 b Fm(\015)s Fo(\))1079 1370 y Fh(\000)p Fk(1)1124 1385 y Fm(=K)s Fo(\()p Fm(m)p Fo(\))21 b(and)f Fm(V)10 b(ar)q Fl(f)p Fm(V)f Fo(\()p Fm(m)p Fo(\))p Fl(g)21 b(/)f Fo(\(1)14 b Fl(\000)227 1431 y Fo(\026)-23 b Fm(\015)s Fo(\))265 1416 y Fh(\000)p Fk(2)310 1431 y Fm(=K)s Fo(\()p Fm(m)p Fo(\))17 b(where)i(\026)-22 b Fm(\015)19 b Fo(=)d Fm(E)r Fl(f)p Fm(\015)742 1416 y Fj(t)755 1420 y Fb(k)774 1416 y Fh(\000)p Fj(t)813 1420 y Fb(k)p Fi(\000)p Fc(1)869 1431 y Fl(g)g Fo(is)h(the)g(exp)q (ected)i(e\013ectiv)o(e)f(discoun)o(ting)f(b)q(et)o(w)o(een)225 1477 y(observ)n(ations.)g(No)o(w,)10 b(in)f(order)i(to)f(\014nd)g(the)h (correct)h(v)n(alue)d(function)h(w)o(e)g(need)h(an)f(appropriate)225 1522 y(w)o(a)o(y)g(of)g(letting)g Fm(\015)499 1528 y Fj(t)526 1522 y Fl(!)h Fo(1)f(in)h(the)g(limit.)j(Ho)o(w)o(ev)o(er,)d (not)g(all)e(suc)o(h)j(sc)o(hedules)g(lead)f(to)f(con)o(v)o(ergen)o(t) 225 1568 y(algorithms;)j(setting)j Fm(\015)603 1574 y Fj(t)632 1568 y Fo(=)f(1)g(for)g(all)f Fm(t)p Fo(,)h(for)g(example,)g (w)o(ould)f(not.)23 b(By)15 b(making)e(use)k(of)d(the)225 1614 y(ab)q(o)o(v)o(e)f(b)q(ounds)g(a)g(feasible)g(sc)o(hedule)h (guaran)o(teeing)f(a)f(v)n(anishing)g(bias)h(and)f(v)n(ariance)h(can)g (b)q(e)225 1659 y(found.)19 b(F)m(or)14 b(instance,)h(since)g Fm(\015)h(>)e Fo(\026)-23 b Fm(\015)18 b Fo(w)o(e)c(can)h(c)o(ho)q(ose) g Fm(\015)1121 1666 y Fj(k)q Fk(\()p Fj(m)p Fk(\))1210 1659 y Fo(=)e(1)c Fl(\000)h Fm(K)s Fo(\()p Fm(m)p Fo(\))1433 1644 y Fk(1)p Fj(=)p Fk(4)1486 1659 y Fo(.)19 b(Muc)o(h)c(faster)225 1705 y(sc)o(hedules)g(are)g(p)q(ossible)f(to)g(obtain)f(b)o(y)g (estimating)h(\026)-22 b Fm(\015)r Fo(.)225 1776 y(Let)12 b(us)h(no)o(w)e(revise)i(the)g(algorithm)c(to)j(tak)o(e)g(in)o(to)f (accoun)o(t)i(the)f(fact)g(that)g(the)h(learner)f(in)g(fact)225 1821 y(has)i(no)g(prior)g(kno)o(wledge)f(of)h(the)g(a)o(v)o(erage)g (rew)o(ard.)19 b(In)14 b(this)g(case)h(the)g(true)f(a)o(v)o(erage)g (rew)o(ard)225 1867 y(app)q(earing)9 b(in)g(the)h(ab)q(o)o(v)o(e)f (algorithm)e(needs)k(to)e(b)q(e)h(replaced)g(with)f(an)g(incremen)o (tally)f(up)q(dated)225 1913 y(estimate)15 b Fm(R)425 1919 y Fj(t)p Fh(\000)p Fk(1)482 1913 y Fo(.)23 b(T)m(o)15 b(impro)o(v)o(e)f(the)j(e\013ect)g(this)f(c)o(hanging)f(estimate)g(has) h(on)g(the)g(v)n(alues)g(w)o(e)225 1958 y(transform)10 b(the)h(v)n(alue)g(function)f(whenev)o(er)j(the)e(estimate)f(is)h(up)q (dated.)18 b(This)11 b(transformation)225 2004 y(is)j(giv)o(en)f(b)o(y) 588 2095 y Fm(C)618 2101 y Fj(t)632 2095 y Fo(\()p Fm(m)p Fo(\))47 b(=)g(\(1)9 b Fl(\000)923 2066 y Fm(\037)949 2072 y Fj(t)963 2066 y Fo(\()p Fm(m)p Fo(\))p 918 2085 V 918 2123 a Fm(K)953 2129 y Fj(t)968 2123 y Fo(\()p Fm(m)p Fo(\))1042 2095 y(\))p Fm(C)1088 2101 y Fj(t)p Fh(\000)p Fk(1)1145 2095 y Fo(\()p Fm(m)p Fo(\))h(+)f Fm(\014)1287 2101 y Fj(t)1302 2095 y Fo(\()p Fm(m)p Fo(\))311 b(\(7\))593 2181 y Fm(V)617 2187 y Fj(t)632 2181 y Fo(\()p Fm(m)p Fo(\))43 b Fl(!)e Fm(V)850 2187 y Fj(t)864 2181 y Fo(\()p Fm(m)p Fo(\))10 b Fl(\000)g Fm(C)1014 2187 y Fj(t)1028 2181 y Fo(\()p Fm(m)p Fo(\)\()p Fm(R)1144 2187 y Fj(t)1168 2181 y Fl(\000)g Fm(R)1242 2187 y Fj(t)p Fh(\000)p Fk(1)1299 2181 y Fo(\))366 b(\(8\))225 2246 y(and,)13 b(as)g(a)g(result,)h(the)g(new)g(v)n(alues)f(are)g(as)h(if)e (they)i(had)f(b)q(een)i(computed)e(using)g(the)g(curren)o(t)225 2292 y(estimate)g(of)h(the)g(a)o(v)o(erage)g(rew)o(ard.)p eop %%Page: 5 5 5 4 bop 225 83 a Fo(T)m(o)11 b(carry)h(these)h(results)f(to)g(the)g (con)o(trol)f(setting)h(and)f(assign)g(a)g(\014gure)h(of)f(merit)g(to)g (sto)q(c)o(hastic)225 129 y(p)q(olicies)17 b(w)o(e)h(need)g(a)f(quan)o (tit)o(y)f(related)i(to)f(the)h(actions)f(for)g(eac)o(h)h(observ)o(ed)h (message.)27 b(As)225 174 y(in)20 b(the)h(case)h(of)e(MDP's,)h(this)g (is)f(readily)g(ac)o(hiev)o(ed)h(b)o(y)f(replacing)h Fm(m)f Fo(in)g(the)i(algorithm)225 220 y(just)15 b(describ)q(ed)j(b)o (y)d(\()p Fm(m;)7 b(a)p Fo(\).)22 b(In)15 b(terms)g(of)g(equation)g(6,) g(for)f(example,)g(this)i(means)e(that)h(the)225 266 y(sequences)k(started)f(from)e Fm(m)h Fo(are)g(classi\014ed)g (according)g(to)g(the)h(actions)e(tak)o(en)h(when)h Fm(m)f Fo(is)225 311 y(observ)o(ed.)23 b(The)15 b(ab)q(o)o(v)o(e)g(analysis)f (go)q(es)i(through)f(when)g Fm(m)h Fo(is)e(replaced)i(b)o(y)f(\()p Fm(m;)7 b(a)p Fo(\),)15 b(yielding)225 357 y(\\Q-v)n(alues")e(on)h(the) g(lev)o(el)g(of)f(messages:)685 429 y Fm(Q)718 412 y Fj(\031)740 429 y Fo(\()p Fm(m;)7 b(a)p Fo(\))12 b(=)905 390 y Fd(X)927 477 y Fj(s)972 429 y Fm(P)1005 412 y Fj(\031)1027 429 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\))p Fm(Q)1159 412 y Fj(\031)1182 429 y Fo(\()p Fm(s;)7 b(a)p Fo(\))407 b(\(9\))225 531 y(In)14 b(the)g(next)g(section)h(w)o(e)f(sho)o(w)g(ho)o (w)f(these)i(v)n(alues)f(can)g(b)q(e)g(used)h(to)e(searc)o(h)i (e\016cien)o(tly)f(for)f(a)225 577 y(b)q(etter)i(p)q(olicy)m(.)225 693 y Fn(4)56 b(POLICY)19 b(IMPR)n(O)n(VEMENT)h(THEOREM)225 788 y Fo(Here)e(w)o(e)e(presen)o(t)i(a)e(p)q(olicy)g(impro)o(v)o(emen)o (t)d(theorem)j(that)h(enables)g(the)g(learner)g(to)f(searc)o(h)225 834 y(e\016cien)o(tly)h(for)g(a)g(b)q(etter)i(p)q(olicy)d(in)h(the)h (con)o(tin)o(uous)f(p)q(olicy)g(space)h(using)f(the)h(\\Q-v)n(alues") 225 880 y Fm(Q)p Fo(\()p Fm(m;)7 b(a)p Fo(\))k(describ)q(ed)j(in)d(the) h(previous)g(section.)17 b(The)12 b(theorem)g(allo)o(ws)e(the)i(p)q (olicy)f(re\014nemen)o(t)225 925 y(to)j(b)q(e)g(done)g(in)g(a)f(w)o(a)o (y)h(that)g(is)f(similar)f(to)h(p)q(olicy)h(impro)o(v)o(em)o(en)o(t)e (in)h(a)h(MDP)g(setting.)225 1011 y Fp(Theorem)h(1)20 b Fe(L)n(et)15 b(the)f(curr)n(ent)h(sto)n(chastic)f(p)n(olicy)h Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))g Fe(le)n(ad)f(to)h (Q-values)f Fm(Q)1543 996 y Fj(\031)1566 1011 y Fo(\()p Fm(m;)7 b(a)p Fo(\))14 b Fe(on)225 1056 y(the)h(level)f(of)h(messages.) k(F)m(or)c(any)g(p)n(olicy)g Fm(\031)931 1041 y Fk(1)950 1056 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))g Fe(de\014ne)597 1131 y Fm(J)624 1114 y Fj(\031)644 1101 y Fc(1)662 1131 y Fo(\()p Fm(m)p Fo(\))e(=)786 1092 y Fd(X)807 1179 y Fj(a)853 1131 y Fm(\031)878 1114 y Fk(1)897 1131 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1044 1114 y Fj(\031)1066 1131 y Fo(\()p Fm(m;)7 b(a)p Fo(\))j Fl(\000)f Fm(V)1260 1114 y Fj(\031)1282 1131 y Fo(\()p Fm(m)p Fo(\)])225 1232 y Fe(The)k(change)i(in)e(the)h(aver)n(age)f(r)n(ewar)n(d)g(r)n (esulting)f(fr)n(om)h(changing)i(the)e(curr)n(ent)g(p)n(olicy)g(ac)n(c) n(or)n(d-)225 1278 y(ing)i(to)g Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))d Fl(!)f Fo(\(1)e Fl(\000)g Fm(\017)p Fo(\))p Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))h(+)g Fm(\017\031)878 1263 y Fk(1)896 1278 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))15 b Fe(is)g(given)g(by)657 1353 y Fo(\001)p Fm(R)724 1336 y Fj(\031)758 1353 y Fo(=)d Fm(\017)826 1313 y Fd(X)841 1401 y Fj(m)892 1353 y Fm(P)925 1336 y Fj(\031)947 1353 y Fo(\()p Fm(m)p Fo(\))p Fm(J)1042 1336 y Fj(\031)1062 1323 y Fc(1)1082 1353 y Fo(\()p Fm(m)p Fo(\))e(+)f Fm(O)q Fo(\()p Fm(\017)1267 1336 y Fk(2)1286 1353 y Fo(\))225 1457 y Fe(wher)n(e)i Fm(P)372 1442 y Fj(\031)394 1457 y Fo(\()p Fm(m)p Fo(\))h Fe(ar)n(e)f(the)g(o)n(c)n (cup)n(ancy)i(pr)n(ob)n(abilities)d(for)h(messages)h(asso)n(ciate)n(d)f (with)g(the)h(curr)n(ent)225 1502 y(p)n(olicy.)225 1587 y Fo(The)k(pro)q(of)g(is)f(giv)o(en)h(in)f(App)q(endix.)24 b(In)16 b(terms)f(of)h(p)q(olicy)f(impro)o(v)o(emen)o(t)e(the)j (theorem)g(can)225 1633 y(b)q(e)f(in)o(terpreted)g(as)f(follo)o(ws.)i (Cho)q(ose)f(the)f(p)q(olicy)g Fm(\031)1062 1618 y Fk(1)1080 1633 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))g(suc)o(h)h(that)664 1707 y Fm(J)691 1690 y Fj(\031)711 1678 y Fc(1)730 1707 y Fo(\()p Fm(m)p Fo(\))d(=)g(max)884 1732 y Fj(a)932 1707 y Fo([)p Fm(Q)977 1690 y Fj(\031)998 1707 y Fo(\()p Fm(m;)7 b(a)p Fo(\))j Fl(\000)f Fm(V)1192 1690 y Fj(\031)1214 1707 y Fo(\()p Fm(m)p Fo(\)])366 b(\(10\))225 1798 y(If)19 b(no)o(w)g Fm(J)391 1783 y Fj(\031)411 1771 y Fc(1)430 1798 y Fo(\()p Fm(m)p Fo(\))i Fm(>)g Fo(0)f(for)f(some)f Fm(m)i Fo(then)g(w)o(e)g(can)f(c)o(hange)h(the)g(curren)o(t)h(p)q (olicy)e(to)o(w)o(ards)225 1844 y Fm(\031)250 1829 y Fk(1)287 1844 y Fo(and)e(exp)q(ect)j(an)d(increase)i(in)f(the)g(a)o(v)o (erage)g(rew)o(ard)g(as)g(sho)o(wn)g(b)o(y)g(the)g(theorem.)30 b(The)225 1889 y Fm(\017)18 b Fo(factor)h(suggests)g(lo)q(cal)f(c)o (hanges)h(in)f(the)h(p)q(olicy)f(space)h(and)f(the)h(p)q(olicy)f(can)h (b)q(e)g(re\014ned)225 1943 y(un)o(til)12 b(max)399 1950 y Fj(\031)419 1942 y Fc(1)445 1943 y Fm(J)472 1928 y Fj(\031)492 1915 y Fc(1)510 1943 y Fo(\()p Fm(m)p Fo(\))h(=)e(0)i(for)f (all)f Fm(m)i Fo(whic)o(h)g(constitutes)h(a)e(lo)q(cal)f(maxim)o(um)d (for)k(this)h(p)q(olicy)225 1988 y(impro)o(v)o(emen)o(t)e(metho)q(d.)17 b(Note)d(that)g(the)g(new)g(direction)g Fm(\031)1185 1973 y Fk(1)1204 1988 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))g(in)f(the)h(p)q(olicy)f(space)i(can)225 2034 y(b)q(e)g(c)o (hosen)f(separately)h(for)e(eac)o(h)i Fm(m)p Fo(.)225 2151 y Fn(5)56 b(THE)18 b(ALGORITHM)225 2246 y Fo(Based)11 b(on)e(the)i(theoretical)f(analysis)f(presen)o(ted)j(ab)q(o)o(v)o(e)d (w)o(e)h(can)g(construct)h(an)f(algorithm)d(that)225 2292 y(p)q(erforms)13 b(w)o(ell)f(in)g(a)h(POMDP)h(setting.)k(The)13 b(algorithm)e(is)h(comp)q(osed)h(of)f(t)o(w)o(o)h(parts:)18 b(First,)p eop %%Page: 6 6 6 5 bop 225 83 a Fm(Q)p Fo(\()p Fm(m;)7 b(a)p Fo(\))17 b(v)n(alues|analogous)e(to)i(the)h(Q-v)n(alues)f(in)g(MDP|are)g (calculated)h(via)e(a)h(Mon)o(te-)225 129 y(Carlo)12 b(approac)o(h.)17 b(This)12 b(is)h(follo)o(w)o(ed)d(b)o(y)i(a)g(p)q (olicy)g(impro)o(v)o(emen)o(t)e(step)j(whic)o(h)f(is)h(ac)o(hiev)o(ed)f (b)o(y)225 174 y(increasing)h(the)h(probabilit)o(y)e(of)g(taking)g(the) i(b)q(est)g(action)f(as)g(de\014ned)h(b)o(y)f Fm(Q)p Fo(\()p Fm(m;)7 b(a)p Fo(\).)18 b(The)c(new)225 220 y(p)q(olicy)e(is)h (guaran)o(teed)g(to)f(yield)g(a)h(higher)f(a)o(v)o(erage)h(rew)o(ard)g (\(see)h(Theorem)f(1\))f(as)h(long)f(as)g(for)225 266 y(some)h Fm(m)747 311 y Fo(max)777 336 y Fj(a)825 311 y Fo([)p Fm(Q)p Fo(\()p Fm(m;)7 b(a)p Fo(\))h Fl(\000)i Fm(V)f Fo(\()p Fm(m)p Fo(\)])j Fm(>)g Fo(0)441 b(\(11\))225 382 y(This)11 b(condition)g(b)q(eing)g(false)h(constitutes)g(a)g(lo)q (cal)e(maxim)n(um)d(for)k(the)i(algorithm.)h(Examples)225 428 y(illustrating)f(that)g(this)h(indeed)h(is)f(a)f(lo)q(cal)g(maxim)n (um)d(can)k(b)q(e)h(found)e(fairly)f(easily)m(.)225 498 y(In)d(practice,)i(it)e(is)g(not)h(feasible)f(to)g(w)o(ait)g(for)g(the) h(Mon)o(te-Carlo)e(p)q(olicy)h(ev)n(aluation)f(to)h(con)o(v)o(erge)225 544 y(but)15 b(to)f(try)g(to)g(impro)o(v)o(e)f(the)i(p)q(olicy)f(b)q (efore)h(the)g(con)o(v)o(ergence.)21 b(The)14 b(p)q(olicy)g(can)h(b)q (e)g(re\014ned)225 589 y(concurren)o(tly)g(with)f(the)g(Mon)o(te-Carlo) f(metho)q(d)g(according)h(to)564 651 y Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)675 657 y Fj(n)698 651 y Fo(\))d Fl(!)g Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)889 657 y Fj(n)912 651 y Fo(\))f(+)f Fm(\017)p Fo([)p Fm(Q)1041 657 y Fj(n)1063 651 y Fo(\()p Fm(m)1115 657 y Fj(n)1138 651 y Fm(;)e(a)p Fo(\))i Fl(\000)g Fm(V)1269 657 y Fj(n)1292 651 y Fo(\()p Fm(m)1344 657 y Fj(n)1367 651 y Fo(\)])265 b(\(12\))225 712 y(with)10 b(normalization.)j(Other)f(async)o(hronous)e(or)g(sync)o (hronous)h(on-online)e(up)q(dating)g(sc)o(hemes)225 758 y(can)20 b(also)f(b)q(e)h(used.)37 b(Note)20 b(that)g(if)f Fm(Q)866 764 y Fj(n)888 758 y Fo(\()p Fm(m;)7 b(a)p Fo(\))22 b(=)f Fm(Q)p Fo(\()p Fm(m;)7 b(a)p Fo(\))20 b(then)g(this)g(c)o(hange)g (w)o(ould)f(b)q(e)225 803 y(statistically)11 b(equiv)n(alen)o(t)g(to)h (that)g(of)g(the)g(batc)o(h)g(v)o(ersion)g(with)g(the)h(concomitan)o(t) d(guaran)o(tees)225 849 y(of)j(giving)g(a)g(higher)h(a)o(v)o(erage)g (rew)o(ard.)225 965 y Fn(6)56 b(CONCLUSIONS)225 1060 y Fo(In)13 b(this)h(pap)q(er)g(w)o(e)f(ha)o(v)o(e)h(prop)q(osed)g(and)f (theoretically)g(analyzed)h(an)f(algorithm)d(that)k(solv)o(es)225 1106 y(a)j(reinforcemen)o(t)f(learning)h(problem)e(in)i(a)f(POMDP)i (setting,)f(where)h(the)g(learner)f(has)g(re-)225 1152 y(stricted)k(access)g(to)f(the)g(state)g(of)f(the)i(en)o(vironmen)o(t.) 34 b(As)20 b(the)g(underlying)f(MDP)h(is)f(not)225 1197 y(kno)o(wn)13 b(the)h(problem)e(app)q(ears)j(to)e(the)h(learner)g(to)g (ha)o(v)o(e)f(a)g(non-Mark)o(o)o(v)g(nature.)18 b(The)c(a)o(v)o(er-)225 1243 y(age)e(rew)o(ard)h(w)o(as)f(c)o(hosen)h(as)f(the)g(\014gure)h(of) f(merit)f(for)g(the)i(learning)e(problem)g(and)h(sto)q(c)o(hastic)225 1289 y(p)q(olicies)h(w)o(ere)i(used)g(to)e(pro)o(vide)h(higher)f(a)o(v) o(erage)h(rew)o(ards)h(than)e(can)h(b)q(e)g(ac)o(hiev)o(ed)g(with)g (de-)225 1334 y(terministic)f(p)q(olicies.)18 b(This)c(extension)g (from)e(MDP's)i(to)g(POMDP's)g(greatly)g(increases)h(the)225 1380 y(domain)d(of)h(p)q(oten)o(tial)g(applications)g(of)g (reinforcemen)o(t)h(learning)f(metho)q(ds.)225 1451 y(The)d(simplicit)o (y)e(of)h(the)i(algorithm)c(stems)j(partly)g(from)e(a)i(Mon)o(te-Carlo) f(approac)o(h)h(to)g(obtain-)225 1496 y(ing)i(action-dep)q(enden)o(t)h (v)n(alues)f(for)g(eac)o(h)h(message.)k(These)c(new)g(\\Q-v)n(alues")f (w)o(ere)h(sho)o(wn)f(to)225 1542 y(giv)o(e)g(rise)h(to)f(a)g(simple)f (p)q(olicy)g(impro)o(v)o(emen)o(t)f(result)j(that)f(enables)h(the)g (learner)g(to)f(gradually)225 1588 y(impro)o(v)o(e)g(the)j(p)q(olicy)e (in)g(the)i(con)o(tin)o(uous)f(space)g(of)g(probabilistic)f(p)q (olicies.)225 1658 y(The)g(batc)o(h)g(v)o(ersion)g(of)f(the)h (algorithm)d(w)o(as)j(sho)o(wn)f(to)h(con)o(v)o(erge)g(to)g(a)f(lo)q (cal)g(maxim)n(um)o(.)i(W)m(e)225 1704 y(also)k(prop)q(osed)h(an)f (on-line)f(v)o(ersion)i(of)e(the)i(algorithm)d(in)i(whic)o(h)g(the)h(p) q(olicy)e(is)i(c)o(hanged)225 1750 y(concurren)o(tly)14 b(with)e(the)h(calculation)e(of)h(the)h(\\Q-v)n(alues.")k(The)c(p)q (olicy)f(impro)o(v)o(emen)o(t)e(of)h(the)225 1795 y(on-line)i(v)o (ersion)h(resem)o(bles)g(that)g(of)g(learning)f(automata.)225 1896 y Fp(APPENDIX)225 1980 y Fo(Let)j(us)f(denote)i(the)e(p)q(olicy)g (after)g(the)h(c)o(hange)g(b)o(y)f Fm(\031)1082 1965 y Fj(\017)1098 1980 y Fo(.)22 b(Assume)15 b(\014rst)h(that)g(w)o(e)f (ha)o(v)o(e)g(access)225 2026 y(to)e Fm(Q)308 2011 y Fj(\031)331 2026 y Fo(\()p Fm(s;)7 b(a)p Fo(\),)13 b(the)h(Q-v)n(alues) f(for)h(the)g(underlying)f(MDP)m(,)f(and)i(to)f Fm(P)1312 2011 y Fj(\031)1332 1999 y Fb(\017)1349 2026 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\),)h(the)g(o)q(ccupancy)225 2072 y(probabilities)f(after)h(the)h(p)q(olicy)e(re\014nemen)o(t.)18 b(De\014ne)428 2143 y Fm(J)t Fo(\()p Fm(m;)7 b(\031)551 2126 y Fj(\017)567 2143 y Fm(;)g(\031)611 2126 y Fj(\017)626 2143 y Fm(;)g(\031)q Fo(\))12 b(=)741 2104 y Fd(X)762 2191 y Fj(a)808 2143 y Fm(\031)833 2126 y Fj(\017)849 2143 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))962 2104 y Fd(X)958 2191 y Fj(s)p Fh(2)p Fj(m)1033 2143 y Fm(P)1066 2126 y Fj(\031)1086 2114 y Fb(\017)1103 2143 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1247 2126 y Fj(\031)1269 2143 y Fo(\()p Fm(s;)7 b(a)p Fo(\))j Fl(\000)f Fm(V)1446 2126 y Fj(\031)1468 2143 y Fo(\()p Fm(s)p Fo(\)])129 b(\(13\))225 2246 y(where)15 b(w)o(e)g(ha)o(v)o(e)f(used)h(the)g (notation)e(that)i(the)g(p)q(olicies)f(on)g(the)h(left)f(hand)g(side)g (corresp)q(ond)225 2292 y(to)h(the)g(p)q(olicies)g(on)g(the)h(righ)o(t) e(resp)q(ectiv)o(ely)m(.)22 b(T)m(o)15 b(sho)o(w)g(ho)o(w)f(the)i(a)o (v)o(erage)f(rew)o(ard)g(dep)q(ends)p eop %%Page: 7 7 7 6 bop 225 83 a Fo(on)15 b(this)h(quan)o(tit)o(y)f(w)o(e)h(need)h(to)e (mak)o(e)f(use)j(of)e(the)h(follo)o(wing)d(facts.)24 b(The)16 b(Q-v)n(alues)g(for)f(the)225 129 y(underlying)f(MDP)f (satisfy)h(\(Bellman's)e(equation\))610 207 y Fm(Q)643 190 y Fj(\031)666 207 y Fo(\()p Fm(s;)7 b(a)p Fo(\))k(=)h Fm(R)p Fo(\()p Fm(s;)7 b(a)p Fo(\))i Fl(\000)h Fm(R)1020 190 y Fj(\031)1052 207 y Fo(+)1093 168 y Fd(X)1109 263 y Fj(s)1125 249 y Fi(0)1160 207 y Fm(p)1181 190 y Fj(a)1181 223 y(ss)1213 208 y Fi(0)1228 207 y Fm(V)1261 190 y Fj(\031)1284 207 y Fo(\()p Fm(s)1319 178 y Fi(0)1332 207 y Fo(\))312 b(\(14\))225 323 y(In)12 b(addition,)449 292 y Fd(P)492 335 y Fj(a)519 323 y Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))p Fm(Q)679 308 y Fj(\031)702 323 y Fo(\()p Fm(s;)7 b(a)p Fo(\))12 b(=)g Fm(V)883 308 y Fj(\031)905 323 y Fo(\()p Fm(s)p Fo(\),)h(implying)c(that)j Fm(J)t Fo(\()p Fm(m;)7 b(\031)1362 308 y Fj(\017)1378 323 y Fm(;)g(\031)1422 308 y Fj(\017)1437 323 y Fm(;)g(\031)1481 308 y Fj(\017)1497 323 y Fo(\))k(=)h(0)g(\(see)h(eq.)225 369 y(13\).)18 b(These)d(facts)f(allo)o(w)e(us)j(to)e(write)225 435 y Fm(J)t Fo(\()p Fm(m;)7 b(\031)348 418 y Fj(\017)364 435 y Fm(;)g(\031)408 418 y Fj(\017)424 435 y Fm(;)g(\031)q Fo(\))41 b(=)h Fm(J)t Fo(\()p Fm(m;)7 b(\031)722 418 y Fj(\017)738 435 y Fm(;)g(\031)782 418 y Fj(\017)797 435 y Fm(;)g(\031)q Fo(\))i Fl(\000)h Fm(J)t Fo(\()p Fm(m;)d(\031)1031 418 y Fj(\017)1047 435 y Fm(;)g(\031)1091 418 y Fj(\017)1106 435 y Fm(;)g(\031)1150 418 y Fj(\017)1166 435 y Fo(\))525 505 y(=)599 466 y Fd(X)620 553 y Fj(a)666 505 y Fm(\031)691 488 y Fj(\017)707 505 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))816 466 y Fd(X)838 553 y Fj(s)883 505 y Fm(P)916 488 y Fj(\031)936 476 y Fb(\017)952 505 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1096 488 y Fj(\031)1119 505 y Fo(\()p Fm(s;)g(a)p Fo(\))i Fl(\000)h Fm(V)1295 488 y Fj(\031)1318 505 y Fo(\()p Fm(s)p Fo(\))g Fl(\000)f Fm(Q)1453 488 y Fj(\031)1473 476 y Fb(\017)1490 505 y Fo(\()p Fm(s;)e(a)p Fo(\))j(+)f Fm(V)1667 488 y Fj(\031)1687 476 y Fb(\017)1704 505 y Fo(\()p Fm(s)p Fo(\)])525 618 y(=)42 b Fm(R)631 600 y Fj(\031)651 588 y Fb(\017)677 618 y Fl(\000)10 b Fm(R)751 600 y Fj(\031)782 618 y Fo(+)824 578 y Fd(X)846 665 y Fj(s)891 618 y Fm(P)924 600 y Fj(\031)944 588 y Fb(\017)960 618 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\))1066 578 y Fd(X)1082 673 y Fj(s)1098 659 y Fi(0)1134 618 y Fm(p)1155 600 y Fj(\031)1175 588 y Fb(\017)1155 633 y Fj(ss)1187 619 y Fi(0)1201 618 y Fo([)p Fm(V)1246 600 y Fj(\031)1269 618 y Fo(\()p Fm(s)1304 588 y Fi(0)1318 618 y Fo(\))f Fl(\000)h Fm(V)1418 600 y Fj(\031)1438 588 y Fb(\017)1456 618 y Fo(\()p Fm(s)1491 588 y Fi(0)1504 618 y Fo(\)])599 738 y Fl(\000)638 698 y Fd(X)660 785 y Fj(s)705 738 y Fm(P)738 720 y Fj(\031)758 708 y Fb(\017)775 738 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\)[)p Fm(V)919 720 y Fj(\031)942 738 y Fo(\()p Fm(s)p Fo(\))g Fl(\000)f Fm(V)1078 720 y Fj(\031)1098 708 y Fb(\017)1115 738 y Fo(\()p Fm(s)p Fo(\)])516 b(\(15\))225 851 y(By)15 b(w)o(eigh)o(ting)f(this)h(result)h(for)f(eac)o(h)g(class)h (b)o(y)f Fm(P)1028 836 y Fj(\031)1048 823 y Fb(\017)1065 851 y Fo(\()p Fm(m)p Fo(\))g(and)g(summing)d(o)o(v)o(er)j(the)h (messages)225 896 y(the)c(probabilit)o(y)e(w)o(eigh)o(tings)g(for)h (the)h(last)f(t)o(w)o(o)g(terms)g(b)q(ecome)g(equal)g(and)g(the)h (terms)f(cancel.)225 942 y(This)j(pro)q(cedure)i(giv)o(es)d(us)633 1018 y Fm(R)665 1001 y Fj(\031)685 989 y Fb(\017)711 1018 y Fl(\000)d Fm(R)785 1001 y Fj(\031)819 1018 y Fo(=)862 979 y Fd(X)878 1066 y Fj(m)929 1018 y Fm(P)962 1001 y Fj(\031)982 989 y Fb(\017)999 1018 y Fo(\()p Fm(m)p Fo(\))p Fm(J)t Fo(\()p Fm(m;)d(\031)1190 1001 y Fj(\017)1207 1018 y Fm(;)g(\031)1251 1001 y Fj(\017)1266 1018 y Fm(;)g(\031)q Fo(\))334 b(\(16\))225 1149 y(This)16 b(result)h(do)q(es)g(not)f(allo)o (w)f(the)h(learner)h(to)f(assess)i(the)f(e\013ect)h(of)d(the)i(p)q (olicy)f(re\014nemen)o(t)225 1194 y(on)e(the)h(a)o(v)o(erage)g(rew)o (ard)g(since)g(the)g Fm(J)t Fo(\(\))g(term)f(con)o(tains)g(information) d(not)k(a)o(v)n(ailable)d(to)i(the)225 1240 y(learner.)31 b(Ho)o(w)o(ev)o(er,)19 b(making)d(use)j(of)e(the)i(fact)f(that)g(the)h (p)q(olicy)e(has)h(b)q(een)h(c)o(hanged)g(only)225 1286 y(sligh)o(tly)13 b(this)g(problem)g(can)h(b)q(e)h(a)o(v)o(oided.)225 1356 y(As)d Fm(\031)309 1341 y Fj(\017)337 1356 y Fo(is)f(a)g(p)q (olicy)g(satisfying)g(max)788 1362 y Fj(ma)844 1356 y Fl(j)p Fm(\031)881 1341 y Fj(\017)897 1356 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))5 b Fl(\000)g Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))p Fl(j)11 b(\024)h Fm(\017)p Fo(,)f(it)g(can)h(then)g(b)q(e)g(sho)o(wn)g(that)225 1402 y(there)j(exists)f(a)f(constan)o(t)h Fm(C)j Fo(suc)o(h)d(that)f (the)i(maxim)n(um)9 b(c)o(hange)14 b(in)f Fm(P)6 b Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\),)13 b Fm(P)6 b Fo(\()p Fm(s)p Fo(\),)13 b Fm(P)6 b Fo(\()p Fm(m)p Fo(\))14 b(is)225 1448 y(b)q(ounded)i(b)o(y)f Fm(C)s(\017)p Fo(.)22 b(Using)15 b(these)i(b)q(ounds)f(and)f(indicating)g(the)h(di\013erence)h(b)q(et)o (w)o(een)g Fm(\031)1636 1432 y Fj(\017)1667 1448 y Fo(and)225 1493 y Fm(\031)e Fo(dep)q(enden)o(t)h(quan)o(tities)d(b)o(y)h(\001)f(w) o(e)h(get)449 1530 y Fd(X)470 1617 y Fj(a)509 1570 y Fo([)p Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))9 b(+)h(\001)p Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\)])880 1530 y Fd(X)901 1617 y Fj(s)939 1570 y Fo([)p Fm(P)984 1552 y Fj(\031)1006 1570 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\))g(+)f(\001)p Fm(P)1224 1552 y Fj(\031)1246 1570 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\)][)p Fm(Q)1402 1552 y Fj(\031)1424 1570 y Fo(\()p Fm(s;)e(a)p Fo(\))i Fl(\000)h Fm(V)1600 1552 y Fj(\031)1623 1570 y Fo(\()p Fm(s)p Fo(\)])491 1682 y(=)564 1642 y Fd(X)585 1729 y Fj(a)631 1682 y Fo(\001)p Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))804 1642 y Fd(X)800 1729 y Fj(s)p Fh(2)p Fj(m)875 1682 y Fm(P)908 1664 y Fj(\031)930 1682 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1074 1664 y Fj(\031)1096 1682 y Fo(\()p Fm(s;)d(a)p Fo(\))j Fl(\000)f Fm(V)1272 1664 y Fj(\031)1295 1682 y Fo(\()p Fm(s)p Fo(\)])h(+)564 1796 y(+)603 1756 y Fd(X)625 1844 y Fj(a)671 1796 y Fo(\001)p Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))840 1756 y Fd(X)862 1844 y Fj(s)906 1796 y Fo(\001)p Fm(P)974 1779 y Fj(\031)996 1796 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1140 1779 y Fj(\031)1162 1796 y Fo(\()p Fm(s;)d(a)p Fo(\))j Fl(\000)f Fm(V)1339 1779 y Fj(\031)1361 1796 y Fo(\()p Fm(s)p Fo(\)])491 1908 y(=)41 b Fm(\017)588 1869 y Fd(X)609 1956 y Fj(a)655 1908 y Fm(\031)680 1891 y Fk(1)699 1908 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\))808 1869 y Fd(X)830 1956 y Fj(s)875 1908 y Fm(P)908 1891 y Fj(\031)930 1908 y Fo(\()p Fm(s)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1074 1891 y Fj(\031)1096 1908 y Fo(\()p Fm(s;)7 b(a)p Fo(\))j Fl(\000)f Fm(V)1273 1891 y Fj(\031)1295 1908 y Fo(\()p Fm(s)p Fo(\)])h(+)f Fm(O)q Fo(\()p Fm(\017)1475 1891 y Fk(2)1494 1908 y Fo(\))150 b(\(17\))225 2016 y(where)18 b(the)f(second)h(equalit)o(y)e(follo)o(ws) f(from)960 1985 y Fd(P)1004 2028 y Fj(a)1031 2016 y Fm(\031)q Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1203 2001 y Fj(\031)1225 2016 y Fo(\()p Fm(s;)7 b(a)p Fo(\))k Fl(\000)g Fm(V)1405 2001 y Fj(\031)1427 2016 y Fo(\()p Fm(s)p Fo(\)])17 b(=)f(0)h(and)f(the)225 2061 y(third)e(from)e(the)j(b)q(ounds)f(stated) h(earlier.)225 2132 y(The)20 b(equation)e(c)o(haracterizing)i(the)g(c)o (hange)g(in)e(the)i(a)o(v)o(erage)f(rew)o(ard)h(due)g(to)f(the)h(p)q (olicy)225 2178 y(c)o(hange)14 b(\(eq.)19 b(16\))13 b(can)h(b)q(e)h(no) o(w)e(rewritten)i(as)f(follo)o(ws:)374 2256 y Fm(R)406 2239 y Fj(\031)426 2227 y Fb(\017)452 2256 y Fl(\000)c Fm(R)526 2239 y Fj(\031)590 2256 y Fo(=)663 2217 y Fd(X)679 2304 y Fj(m)730 2256 y Fm(P)763 2239 y Fj(\031)783 2227 y Fb(\017)800 2256 y Fo(\()p Fm(m)p Fo(\))p Fm(J)t Fo(\()p Fm(m;)d(\031)991 2239 y Fj(\017)1008 2256 y Fm(;)g(\031)q(;)g(\031)q Fo(\))h(+)i Fm(O)q Fo(\()p Fm(\017)1228 2239 y Fk(2)1246 2256 y Fo(\))p eop %%Page: 8 8 8 7 bop 590 85 a Fo(=)663 46 y Fd(X)679 133 y Fj(m)730 85 y Fm(P)763 68 y Fj(\031)785 85 y Fo(\()p Fm(m)p Fo(\))860 46 y Fd(X)882 133 y Fj(a)928 85 y Fm(\031)953 68 y Fj(\017)969 85 y Fo(\()p Fm(a)p Fl(j)p Fm(m)p Fo(\)[)p Fm(Q)1116 68 y Fj(\031)1138 85 y Fo(\()p Fm(m;)7 b(a)p Fo(\))i Fl(\000)h Fm(V)1331 68 y Fj(\031)1354 85 y Fo(\()p Fm(m)p Fo(\)])f(+)h Fm(O)q Fo(\()p Fm(\017)1551 68 y Fk(2)1569 85 y Fo(\))75 b(\(18\))225 207 y(where)14 b(the)g(b)q(ounds)g(\(see)g (ab)q(o)o(v)o(e\))f(ha)o(v)o(e)g(b)q(een)h(used)g(for)f Fm(P)1156 192 y Fj(\031)1176 179 y Fb(\017)1193 207 y Fo(\()p Fm(m)p Fo(\))8 b Fl(\000)g Fm(P)1342 192 y Fj(\031)1364 207 y Fo(\()p Fm(m)p Fo(\).)18 b(This)13 b(completes)225 252 y(the)h(pro)q(of.)1286 b Fa(2)225 350 y Fp(Ac)o(kno)o(wledgmen)o (ts)225 429 y Fo(The)14 b(authors)g(thank)g(Ric)o(h)f(Sutton)h(for)g(p) q(oin)o(ting)f(out)g(errors)j(at)d(early)h(stages)h(of)e(this)h(w)o (ork.)225 475 y(This)d(pro)r(ject)i(w)o(as)e(supp)q(orted)i(in)e(part)g (b)o(y)g(a)h(gran)o(t)f(from)e(the)j(McDonnell-P)o(ew)g(F)m(oundation,) 225 521 y(b)o(y)j(a)h(gran)o(t)f(from)f(A)m(TR)h(Human)f(Information)f (Pro)q(cessing)k(Researc)o(h)g(Lab)q(oratories,)f(b)o(y)f(a)225 566 y(gran)o(t)f(from)e(Siemens)i(Corp)q(oration)g(and)g(b)o(y)g(gran)o (t)g(N00014-94-1-0777)c(from)j(the)i(O\016ce)g(of)225 612 y(Na)o(v)n(al)d(Researc)o(h.)20 b(Mic)o(hael)13 b(I.)h(Jordan)g(is) f(a)h(NSF)g(Presiden)o(tial)g(Y)m(oung)f(In)o(v)o(estigator.)225 709 y Fp(References)225 789 y Fo(Barto,)18 b(A.,)e(and)h(Du\013,)g(M.)g (\(1994\).)26 b(Mon)o(te-Carlo)17 b(matrix)e(in)o(v)o(ersion)h(and)h (reinforcemen)o(t)225 834 y(learning.)g(In)12 b Fe(A)n(dvanc)n(es)i(of) f(Neur)n(al)g(Information)g(Pr)n(o)n(c)n(essing)g(Systems)h(6)p Fo(,)e(San)g(Mateo,)g(CA,)225 880 y(1994.)17 b(Morgan)c(Kaufmann.)225 951 y(Bertsek)n(as,)g(D.)d(P)m(.)h(\(1987\).)16 b Fe(Dynamic)d(Pr)n(o)n (gr)n(amming:)k(Deterministic)12 b(and)h(Sto)n(chastic)g(Mo)n(d-)225 996 y(els)p Fo(.)18 b(Englew)o(o)q(o)q(d)13 b(Cli\013s,)g(NJ:)h(Pren)o (tice-Hall.)225 1067 y(Da)o(y)o(an,)h(P)m(.)g(\(1992\).)22 b(The)17 b(con)o(v)o(ergence)g(of)e(TD\()p Fm(\025)p Fo(\))h(for)f(general)h Fm(\025)p Fo(.)24 b Fe(Machine)17 b(L)n(e)n(arning,)g(8)p Fo(,)225 1113 y(341-362.)225 1183 y(Jaakk)o(ola,)9 b(T.,)h(Jordan)h(M.)f(I.,)g(and)g(Singh,)g(S.)g (P)m(.)g(\(1994\).)16 b(On)11 b(the)g(con)o(v)o(ergence)h(of)e(sto)q(c) o(hastic)225 1229 y(iterativ)o(e)k(Dynamic)e(Programmi)o(ng)f (algorithms.)16 b Fe(Neur)n(al)e(Computation)h(6)p Fo(,)f(1185-1201.) 225 1299 y(Monahan,)i(G.)g(\(1982\).)25 b(A)17 b(surv)o(ey)g(of)f (partially)f(observ)n(able)i(Mark)o(o)o(v)f(decision)h(pro)q(cesses.) 225 1345 y Fe(Management)f(Scienc)n(e,)f(28)p Fo(,)f(1-16.)225 1416 y(Singh,)c(S.)f(P)m(.,)g(Jaakk)o(ola,)g(T.,)h(Jordan,)g(M.)f(I.)h (\(1994\).)16 b(Learning)9 b(without)h(state)g(estimation)e(in)225 1461 y(partially)i(observ)n(able)i(en)o(vironmen)o(ts.)k(In)c Fe(Pr)n(o)n(c)n(e)n(e)n(dings)g(of)h(the)f(Eleventh)h(Machine)h(L)n(e)n (arning)225 1507 y(Confer)n(enc)n(e)p Fo(.)225 1578 y(Sutton,)i(R.)f (S.)g(\(1988\).)23 b(Learning)15 b(to)h(predict)h(b)o(y)e(the)h(metho)q (ds)g(of)f(temp)q(oral)f(di\013erences.)225 1623 y Fe(Machine)i(L)n(e)n (arning,)e(3)p Fo(,)g(9-44.)225 1694 y(Sc)o(h)o(w)o(artz,)e(A.)f (\(1993\).)16 b(A)11 b(reinforcemen)o(t)g(learning)g(metho)q(d)f(for)h (maximi)o(zing)d(undiscoun)o(ted)225 1739 y(rew)o(ards.)19 b(In)14 b Fe(Pr)n(o)n(c)n(e)n(e)n(dings)h(of)f(the)h(T)m(enth)g (Machine)h(L)n(e)n(arning)f(Confer)n(enc)n(e)p Fo(.)225 1810 y(Tsitsiklis)20 b(J.)g(N.)g(\(1994\).)36 b(Async)o(hronous)21 b(sto)q(c)o(hastic)h(appro)o(ximation)17 b(and)j(Q-learning.)225 1856 y Fe(Machine)c(L)n(e)n(arning)f(16)p Fo(,)f(185-202.)225 1926 y(W)m(atkins,)f(C.J.C.H.)g(\(1989\).)20 b Fe(L)n(e)n(arning)c(fr)n (om)e(delaye)n(d)i(r)n(ewar)n(ds)p Fo(.)j(PhD)c(Thesis,)g(Univ)o(ersit) o(y)225 1972 y(of)e(Cam)o(bridge,)f(England.)225 2042 y(W)m(atkins,)g(C.J.C.H,)h(&)h(Da)o(y)o(an,)e(P)m(.)h(\(1992\).)18 b(Q-learning.)f Fe(Machine)f(L)n(e)n(arning,)f(8)p Fo(,)e(279-292.)p eop %%Trailer end userdict /end-hook known{end-hook}if %%EOF