gusucode.com > datatypes 工具箱matlab源码程序 > datatypes/@tabular/table2gidx.m

    function [group,glabels,glocs] = table2gidx(a,avars,reduce)
% TABLE2GIDX Create group indices from table grouping variables.

%   Copyright 2012-2016 The MathWorks, Inc.

% Default behavior is to leave out categories that are not actually present in
% the data of a categorical variable. Non-categorical variables _always_, in
% effect, do that.
try
    if nargin < 3, reduce = true; end

    a_data = a.data;
    a_varnames = a.varDim.labels;
    nrows = a.rowDim.length;
    ngroupVars = length(avars);
    
    if ngroupVars == 0 % if no grouping vars, create one group containing all observations
        group = ones(nrows,1);
        glocs = ones(min(nrows,1),1); % 1 if there are rows, 0x1 if not
        glabels = {'All'};

    elseif ngroupVars == 1
        % Create an index vector based on the unique values of the grouping variable
        if avars == 0 % the row labels
            [group,glabels,glocs] = grp2idx(a.rowDim.labels,a.metaDim.labels{1},reduce);
        else
            [group,glabels,glocs] = grp2idx(a_data{avars},a_varnames{avars},reduce);
        end
        
    else % ngroupVars > 1
        % Get integer group codes and names for each grouping variable
        groups = zeros(nrows,ngroupVars);
        names = cell(1,ngroupVars);
        for j = 1:ngroupVars
            [groups(:,j),names{j}] = grp2idx(a_data{avars(j)},a_varnames{avars(j)},reduce);
        end

        % Create an index vector based on the unique combinations of individual grouping variables
        wasnan = any(isnan(groups),2);
        group = NaN(size(wasnan));
        groups(wasnan,:) = [];
        [urows,glocs,gidx] = unique(groups,'rows','sorted');
        ngroups = size(urows,1);
        group(~wasnan) = gidx;

        % Translate the NaN-reduced row indices back to the original rows 
        tmp = find(~wasnan); glocs = tmp(glocs);

        gnames = cell(ngroups,ngroupVars);
        for j = 1:ngroupVars
            gnames(:,j) = names{j}(urows(:,j));
        end

        glabels = cell(ngroups,1);
        for j = 1:ngroups
            glabels{j} = strjoin(gnames(j,:),'_');
        end
    end
catch ME
    throwAsCaller(ME)
end

%-------------------------------------------------------------------------------
function [gidx,gnames,gloc] = grp2idx(var,varName,reduce)
% GRP2IDX  Create index vector from a grouping variable.
%   [G,GN,GL] = GRP2IDX(S) creates an index vector G from the grouping variable
%   S. S can be a categorical, numeric, or logical vector; a cell vector of
%   character vectors; or a character matrix with each row representing a
%   group label. G is a vector of integer values from 1 up to the number K
%   of distinct groups. GN is a cell array of character vectors containing
%   group labels. GN(G) reproduces S (aside from any differences in type).
%   GL is a vector of indices into the first element of S for each group.
%
%   GRP2IDX treats NaNs (numeric or logical), empty character vectors (char
%   or cell array of character vectors), or <undefined> values
%   (categorical) in S as missing values and returns NaNs in the
%   corresponding rows of G. Neither GN nor GL include entries for missing
%   values.

if ischar(var)
    if isempty(var)
        var = cell(0,1);
    else
        var = cellstr(var);
    end
end

if ~iscolumn(var)
    error(message('MATLAB:table:GroupingVarNotColumn'));
end

if isa(var,'categorical')
    if reduce
        [glevels,gloc,gidx] = unique(var);
        if ~isempty(glevels) && isundefined(glevels(end)) % undefineds are sorted to end
            notNaN = ~isundefined(glevels);
            glevels = glevels(notNaN);
            gloc = gloc(notNaN);
            gidx(gidx > length(glevels)) = NaN; % other indices stay the same
        end
        gnames = cellstr(glevels);
    else
        gidx = double(var); % converts <undefined> to NaN
        gnames = categories(var)';
        [~,gloc] = ismember(1:length(gnames),gidx);
    end
else
    try
        [glevels,gloc,gidx] = unique(var,'sorted');
    catch ME
        m = message('MATLAB:table:VarUniqueMethodFailed',varName);
        throwAsCaller(addCause(MException(m.Identifier,'%s',getString(m)), ME));
    end
    if length(gidx) ~= length(var)
        m = message('MATLAB:table:VarUniqueMethodFailedNumRows',varName);
        throwAsCaller(MException(m.Identifier,'%s',getString(m)));
    end
    
    if isnumeric(var) || islogical(var)
        % Handle NaN missing values: return NaN group indices
        if ~isempty(glevels) && isnan(glevels(end)) % NaNs are sorted to end
            notNaN = ~isnan(glevels);
            glevels = glevels(notNaN);
            gloc = gloc(notNaN);
            gidx(gidx > length(glevels)) = NaN; % other indices stay the same
        end
        gnames = numericLabels(glevels);
    elseif isdatetime(var)
        % Handle NaT missing values: return NaT group indices
        if ~isempty(glevels) && isnat(glevels(end)) % NaTs are sorted to end
            notNaT = ~isnat(glevels);
            glevels = glevels(notNaT);
            gloc = gloc(notNaT);
            gidx(gidx > length(glevels)) = NaN; % other indices stay the same
        end
        gnames = cellstr(glevels);
    elseif isduration(var)
        % Handle NaN missing values: return NaN group indices
        if ~isempty(glevels) && isnan(glevels(end)) % NaNs are sorted to end
            notNaN = ~isnan(glevels);
            glevels = glevels(notNaN);
            gloc = gloc(notNaN);
            gidx(gidx > length(glevels)) = NaN; % other indices stay the same
        end
        gnames = cellstr(glevels);
    elseif isa(var,'string')
        % Handle empty strings, return NaN group indices
        if ~isempty(glevels) && (glevels(1) == '')
           notEmpty = ~(glevels == '');
           nEmpty = length(glevels) - sum(notEmpty);
           glevels = glevels(notEmpty,1);
           gloc = gloc(notEmpty);
           adjustIdx = [NaN(1,nEmpty) 1:length(glevels)]';
           gidx = adjustIdx(gidx);
        end
        gnames = cellstr(glevels);
    elseif iscell(var) % iscellstr enforced by unique above
        % Handle empty string missing values: return NaN group indices
        if ~isempty(glevels) && isempty(glevels{1}) % empty strings are sorted to beginning
            notNaN = ~cellfun('isempty',glevels);
            % All empties are treated as '', but defensively find the number of empty strings
            nEmpty = length(glevels) - sum(notNaN);
            glevels = glevels(notNaN);
            gloc = gloc(notNaN);
            adjustIdx = [NaN(1,nEmpty) 1:length(glevels)]';
            gidx = adjustIdx(gidx);
        end
        gnames = glevels;
    else
        error(message('MATLAB:table:GroupTypeIncorrect'));
    end
end


%-------------------------------------------------------------------------------
function gnames = numericLabels(glevels)
gl = full(glevels);
gnames = sprintfc('%d',gl); % a little less than 19 significant digits
ufmt = (fix(gl) == gl) & (gl > intmax('int64'));
gnames(ufmt) = sprintfc('%u',gl(ufmt)); % a little more than 19 significant digits
gfmt = (fix(gl) ~= gl)| (gl < intmin('int64')) | (gl > intmax('uint64'));
if any(gfmt)
    gnames(gfmt) = sprintfc('%g',gl(gfmt)); % six significant digits
    % If some values in the grouping variable differ by less than (about)
    % 1e-6 (relative), add more digits to make the names unique.
    if length(unique(gnames)) < length(gnames)
        tryFmt = {'%.16g' '%.17g' '0x%bx'};
        for i = 1:length(tryFmt)
            gnames(gfmt) = sprintfc(tryFmt{i},gl(gfmt));
            if length(unique(gnames)) == length(gnames), break; end
        end
    end
end