gusucode.com > bigdata 工具箱 matlab源码程序 > bigdata/@tall/findgroups.m

    function [tx, varargout] = findgroups(varargin) %#ok<STOUT> for error messages
%FINDGROUPS Find groups and return group numbers
%   Supported syntaxes for tall arrays:
%   G = FINDGROUPS(A)
%   G = FINDGROUPS(A1,A2,...)
%
%   Limitations:
%   1) Multiple output is not supported.
%   2) Tall table input is not supported.
%   3) G the group number may be in different order from non-tall implementation.
%
%   See also FINDGROUPS.

%   Copyright 2016 The MathWorks, Inc.

narginchk(1,inf);
if nargout > 1
    error(message('MATLAB:bigdata:array:FindgroupsSingleOutput'));
end
tx = findgroupsViaCategorical(varargin{:});
end

function tx = findgroupsViaCategorical(varargin)
% For each chunk, using categorical arrays to combine inputs.
% tc is not a proper tall categorical array because each chunk can have
% different categories.
tc = elementfun(@localCategorical,varargin{:});
tc.Adaptor = matlab.bigdata.internal.adaptors.CategoricalAdaptor();
% Now construct a tall categorical array which will ensure all chunks have 
% the same categories. The double group ids for a tall categorical arrays
% are the group numbers.
tx = double(categorical(tc));
end

function tc = localCategorical(varargin)
% Create categorical array from all inputs.
% Each categorical will correspond to a group index.
tc = categorical(varargin{1});
% Remove any unused categories in-case the original input was a categorical
% that contained unused categories. This may remove categories that are
% used in other chunks, these will be re-added by the tall categorical
% constructor.
tc = removecats(tc);
sizetc = size(tc);
if ~all(cellfun(@isvector, varargin))
    error(message('MATLAB:findgroups:GroupingVarNotVector'));
end
if ~all(cellfun(@(x)isequal(size(x),sizetc), varargin))
    error(message('MATLAB:findgroups:InputSizeMismatch'));
end
for i = 2:nargin
    tc = removecats(tc.*categorical(varargin{i}));
end
end