gusucode.com > 马尔科夫决策过程包括一些例程源码程序 > value_determination.m
function V = value_determination(p, T, R, discount_factor) % VALUE_DETERMINATION Solve Bellman's equation for a fixed policy % V = value_determination(p, T, R, discount_factor) S = size(T,1); A = size(T,2); % Extract the part of T and R which is specific to this policy Tp = zeros(S,S); % Tp(s,s') = T(s, p(s), s') Rp = zeros(S,1); % Rp(s) = R(s, p(s)) for a=1:A % avoid looping over S ind = find(p==a); % the rows that use action a if ~isempty(ind) Tp(ind,:) = reshape(T(ind,a,:), length(ind), S); Rp(ind) = R(ind,a); end end % V = R + gTV => (I-gT)V = R => V = inv(I-gT)*R V = (eye(S) - discount_factor*Tp) \ Rp; %V = pinv(eye(S) - discount_factor*Tp) * Rp;