gusucode.com > datatypes 工具箱matlab源码程序 > datatypes/splitapply.m

    function varargout = splitapply(fun,varargin)
% SPLITAPPLY Split data into groups and apply function
%   Y = SPLITAPPLY(FUN,X,G) splits the variable X into groups specified by G 
%   and applies the function FUN to each group. SPLITAPPLY returns Y as a 
%   column vector where each row contains the output from FUN for each group. 
%   Specify G as a vector of positive integers. You can use FINDGROUPS 
%   to create G. If G contains NaN values, SPLITAPPLY discards the 
%   corresponding values in X.
%
%   Y = SPLITAPPLY(FUN,X1,X2,...,G) splits variables X1,X2,... into groups 
%   specified by G and applies FUN to each group. SPLITAPPLY calls FUN once per 
%   group, with X1,X2,... as the input arguments to FUN.
%
%   [Y1,Y2,...] = SPLITAPPLY(FUN,...) splits variables into groups and applies FUN 
%   to each group. FUN returns multiple output arguments. FUN can return output 
%   arguments that belong to different classes, but the class of each output must 
%   be the same each time FUN is called. You can use this syntax with any of the 
%   input arguments of the previous syntaxes. The number of output arguments need 
%   not equal the number of input data variables.
%
%   Examples:
%      % Load patients data.
%      % List Height, Weight, Gender, and Smoker variables for patients.
%      load patients;
%      whos Height Weight Gender Smoker
%      
%      % Find groups of patients by gender and status as a smoker.
%      % Make a table that lists the four group identifiers.
%      [G,gender,smoker] = findgroups(Gender,Smoker);
%      results = table(gender,smoker)
%
%      % Split Weight into groups. Calculate mean weights for the groups
%      % of patients.
%      results.meanWeight = splitapply(@mean,Weight,G)
% 
%      % Find the average BMI by gender and status as a smoker.
%      meanBMIFcn = @(h,w)mean((w ./ (h.^2)) * 703);
%      results.meanBMI = splitapply(meanBMIFcn,Height,Weight,G)
%
%   See also FINDGROUPS, UNIQUE, VARFUN, ROWFUN

% Copyright 2015-2016 MathWorks, Inc.

% Check number of inputs
narginchk(3,inf);

gnums = varargin{end};
varargin(end) = [];

% Check Function handle
if ~isa(fun,'function_handle')
    error(message('MATLAB:splitapply:InvalidFunction'));
end

% Check indices
if isempty(gnums) || ~isnumeric(gnums) || ~isvector(gnums) || ...
        any(gnums <= 0) || issparse(gnums)
    error(message('MATLAB:splitapply:InvalidGroupNums'));
end

% Drop leading singleton dimensions to find dimension to split on
[gnums, shiftby] = shiftdim(gnums);
gsize = length( gnums );
gdim = shiftby + 1;

% Ensure that indices are sorted (for transparent accumarray behavior)
[gnums, sgnums] = sort( gnums );

% Account for NaN Groups
ngroups = max(gnums);
if isnan(ngroups) %for the case of gnums being all NaN
    emptyGroup = 1;
else
    emptyGroup = ngroups+1;
end

% Filter out empty group numbers
emptyIdx = emptyGroups(gnums);
sgnums(emptyIdx,:) = emptyGroup;
gnums(emptyIdx,:) = emptyGroup;

% Check for non-integer group numbers (after filtering out the data) 
if any(floor(gnums) ~= gnums) || ~isreal(gnums)
    error(message('MATLAB:splitapply:InvalidGroupNums'));
end

% Check data
for argnumber = 1:length(varargin)
    argsize = size(varargin{argnumber},gdim);
    if isscalar(gnums) || isequal( gsize, argsize )
        continue; % Sizes match
    end
    
    % Different error messages depending on grouping vector orientation
    if gdim == 1 %column vector gnums
        error(message('MATLAB:splitapply:RowMismatch', gsize, argnumber, argsize));
    elseif gdim == 2 %row vector gnums
        error(message('MATLAB:splitapply:ColumnMismatch', gsize, argnumber,argsize));
    end
end


% Check for non-continuous group numbers
% When sorted, valid group number vector will start at 1, and the numbers
% will not differ by more than 1
gdiffed = diff(gnums);
if ~isempty(gnums) && ((gnums(1) ~= 1) || ~all(gdiffed== 1 | gdiffed==0))
    error(message('MATLAB:splitapply:MissingGroupNums'));
end 

dataVars = {};
for argnumber = 1:length(varargin)
    expandedVars = expandVariables(varargin{argnumber});
    dataVars(end+1:end+size(expandedVars,2)) = expandedVars;
end

if isscalar(gnums)
   % Vector of group numbers is a scalar,  Use the first non-singleton
   % dimension as the dimension to split data on.
   sz = size(dataVars{1});
   gdim = find(sz == 1,1,'first');
   if isempty(gdim)
       gdim = 1;
   end
end

splitData = localsplit(dataVars,gnums,sgnums,gdim);

varargout = localapply(fun,splitData,gdim,nargout);

% Clean up NaN Groups
if any(emptyIdx)
    for ii = 1:length(varargout)
        % Preserve shape along non-grouping dimension when cleaning out NaN groups
        emptyGroupIdx = repmat({':'}, 1, ndims(varargout{ii}));
        emptyGroupIdx{gdim} = emptyGroup;
        varargout{ii}(emptyGroupIdx{:}) = [];
    end
end

end


%-------------------------------------------------------------------------------
function varRows = getVarRows(datavar,i,gdim)
if isscalar(datavar)
    varRows = datavar;
elseif matlab.internal.datatypes.istabular(datavar)
    varRows = datavar(i,:);
elseif ismatrix(datavar) && gdim == 1
    varRows = datavar(i,:);
elseif ismatrix(datavar) && gdim == 2
    varRows = datavar(:,i);
else
    % Each var could have any number of dims, no way of knowing,
    % except how many rows they have.  So just treat them as 2D to get
    % the necessary rows, and then reshape to their original dims.
    indexVar = repmat({':'}, 1, ndims(datavar));
    indexVar{gdim} = i;
    varRows = datavar(indexVar{:});
end
end


%-------------------------------------------------------------------------------
function out = localsplit(datavars,gnums,sgnums,gdim)
if isscalar(gnums)
    out = datavars; % all datvariables are the observations if gnums is scalar
else
    gmax = gnums(end);
    for i = 1:length(datavars)
        groupNums = accumarray(gnums,sgnums,[gmax,1],@(ii){ii});
        if i==1
            out = cell(length(groupNums),length(datavars));
        end
        
        for j=1:length(groupNums)
            out{j,i} = getVarRows(datavars{i},groupNums{j},gdim);
        end
        
    end
end
end


%-------------------------------------------------------------------------------
function finalOut = localapply(fun,dataVars,gdim,nout)
    import matlab.internal.tableUtils.ordinalString;

    % Call function passing parameters
    [numGroups,numVars] = size(dataVars);
    funOut = cell(numGroups,nout);
    if (gdim > 1)
        funOut = funOut';
    end
    
    for curGroup = 1:numGroups
        try
            % Invoke the function based on the number of output arguments
            if nout > 0
                if gdim == 1
                    [funOut{curGroup,:}] = fun(dataVars{curGroup,:});
                else
                    [funOut{:,curGroup}] = fun(dataVars{curGroup,:});
                end
            else
                clear ans;
                fun(dataVars{curGroup,:});
                
                % did the call to 'fun' above output to ans?  
                % If so pass it through.
                if exist('ans','var')
                    funOut{1} = ans; %#ok<NOANS>
                    nout = 1;
                end
            end
        catch ME
            funStr = func2str(fun);
            throwAsCaller(MException(message('MATLAB:splitapply:FunFailed', funStr, ordinalString(curGroup), ME.message)));
        end

        if nout > 0
            for curVar=1:nout
                if gdim == 1
                    var = funOut{curGroup,curVar};
                else
                    var = funOut{curVar,curGroup};
                end

                if isscalar(var) || (size(var,gdim) == 1) 
                    % Output is Uniform
                    continue;
                end
                
                % Construct a suggested correction to be included in the
                % error message
                funStr = func2str(fun);
                if strcmp(funStr(1), '@') % anonymous function
                    funTokens = regexp(funStr, '(@\([^\(\)]*\))(.*)', 'tokens', 'once');
                    funSuggest = [funTokens{1}, '{',funTokens{2},'}'];
                else % simple function handle
                    funArgs = strjoin( strcat('x', strsplit(int2str(1:numVars)) ), ',');
                    funSuggest = ['@(',funArgs,'){',funStr,'(',funArgs,')}'];
                end
                
                throwAsCaller(MException(message('MATLAB:splitapply:OutputNotUniform', funStr, ordinalString(curGroup), funSuggest)));
            end
        end
    end
    
    finalOut = cell(1,nout);
    for curVar = 1:nout 
        if gdim == 1
            finalOut{curVar} = vertcat(funOut{:,curVar}); 
        else
            finalOut{curVar} = horzcat(funOut{curVar,:}); 
        end
    end
end


%-------------------------------------------------------------------------------
function emptyIdx = emptyGroups(gnums)
    emptyIdx = isnan(gnums);
end


%-------------------------------------------------------------------------------
function out = expandVariables(inVar)
    if matlab.internal.datatypes.istabular(inVar)
        out = table2cell(varfun(@(x){x}, inVar));
    else
        out = {inVar};
    end
end