gusucode.com > bigdata 工具箱 matlab源码程序 > bigdata/+matlab/+bigdata/+internal/+adaptors/TableAdaptor.m

    %TableAdaptor Adaptor class for tabular tall arrays.

% Copyright 2016 The MathWorks, Inc.

classdef TableAdaptor < matlab.bigdata.internal.adaptors.AbstractAdaptor
    properties (SetAccess = immutable)
        VariableNames
    end
    properties (SetAccess = private)
        VariableAdaptors
    end
    methods (Access = private)
        function [varNames, varIdxs] = resolveVarNameSubscript(obj, subscript)

            % Subscript type conversions - resolve certain types up-front.
            if isa(subscript, 'matlab.bigdata.internal.util.EndMarker')
                % For table indexing, we must resolve EndMarkers in the second subscript at the
                % client right away.
                szVec = [0, numel(obj.VariableNames)];
                subscript = resolve(subscript, szVec, 2);
            elseif islogical(subscript)
                % Also convert logical to numeric up front
                subscript = find(subscript);
            elseif isstring(subscript)
                % Treat string as cellstr to fit in with ismember etc.
                subscript = cellstr(subscript);
            end
            
            if isnumeric(subscript)
                if max(subscript(:)) > numel(obj.VariableNames)
                    error(message('MATLAB:table:VarIndexOutOfRange'));
                end
                % Note that we are implicitly relying on the indexing into VariableNames to
                % perform validation of 'subscript' to check for negative / zero
                % / NaN etc. values. But see g1368852 - table allows t.(0) and
                % t.(-1)...
                varNames = obj.VariableNames(subscript);
            elseif iscellstr(subscript)
                varNames = subscript;
            elseif matlab.bigdata.internal.util.isColonSubscript(subscript)
                varNames = obj.VariableNames;
            else
                if ~ischar(subscript)
                    error(message('MATLAB:table:InvalidVarSubscript'));
                end
                varNames = {subscript};
            end
            
            % Ensure we've got varNames as a row
            varNames = reshape(varNames, 1, []);
            
            % Find missing names
            [tf, varIdxs] = ismember(varNames, obj.VariableNames);
            if ~all(tf)
                missingNames = varNames(~tf);
                error(message('MATLAB:table:UnrecognizedVarName', missingNames{1}));
            end
        end

        % Resolve a single dot-subscript
        function varName = resolveDotSubscript(obj, subscript, allowMissing)
            
        % For cases tt.Foo and equivalently tt.('Foo'), we allow the subscript to be
        % only: scalar string, char-vector, and numeric integer scalar.

            % Handle scalar strings by converting to char.
            if isstring(subscript) && isscalar(subscript)
                subscript = char(subscript);
            end
            
            if ischar(subscript) || ...
                    (isnumeric(subscript) && isscalar(subscript) && round(subscript) == subscript)

                if allowMissing && ischar(subscript)
                    % Missing variables are allowed - check valid variable name though
                    if ~isvarname(subscript)
                        error(message('MATLAB:table:InvalidVariableName', subscript));
                    end
                    varName = subscript;
                elseif allowMissing && isnumeric(subscript)
                    % Numeric integer scalar - must be in range, or one off the end.
                    if subscript <= numel(obj.VariableNames)
                        varName = obj.VariableNames{subscript};
                    elseif subscript == (1 + numel(obj.VariableNames))
                        % Appending a new variable with generated name
                        varName = sprintf('Var%d', subscript);
                        idx     = 1;
                        while ismember(varName, obj.VariableNames)
                            varName = sprintf('Var%d_%d', subscript, idx);
                            idx     = idx + 1;
                        end
                    else
                        % Outside allowed bounds
                        error(message('MATLAB:table:DiscontiguousVars'));
                    end
                elseif matlab.bigdata.internal.util.isColonSubscript(subscript)
                    error(message('MATLAB:table:UnrecognizedVarName', subscript));
                else
                    % Finally, get here to resolve numeric integer scalars or char-vectors against
                    % known variable names, re-use resolveVarNameSubscript.
                    varNames = obj.resolveVarNameSubscript(subscript);
                    assert(isscalar(varNames), ...
                           'Unexpectedly resolved dot-subscript to multiple variables.');
                    varName = varNames{1};
                end
            else
                error(message('MATLAB:table:IllegalVarSubscript'));
            end
        end
    end
    methods
        function obj = TableAdaptor(varargin)
            narginchk(1,2);
            obj@matlab.bigdata.internal.adaptors.AbstractAdaptor('table');
            if nargin == 1
                % preview data
                previewData = varargin{1};
                obj.VariableNames = previewData.Properties.VariableNames;
                obj.VariableAdaptors = cellfun( ...
                    @(vn) matlab.bigdata.internal.adaptors.getAdaptor(previewData{[],vn}), ...
                    obj.VariableNames, 'UniformOutput', false);
            else
                % names and adaptors
                assert(iscellstr(varargin{1}) && ...
                       numel(varargin{1}) == numel(varargin{2}));
                obj.VariableNames = reshape(varargin{1}, 1, []);
                obj.VariableAdaptors = reshape(varargin{2}, 1, []);
            end
               
            obj = setSmallSizes(obj, length(obj.VariableNames));
            for idx = 1:numel(obj.VariableAdaptors)
                obj.VariableAdaptors{idx} = copyTallSize(obj.VariableAdaptors{idx}, obj);
            end
        end

        function names = getProperties(obj)
            names = [obj.VariableNames, 'Properties'];
        end

        function idxs = resolveVarNamesToIdxs(obj, namesOrIdxs)
            [~, idxs] = obj.resolveVarNameSubscript(namesOrIdxs);
        end
        
        function obj = resetSizeInformation(obj)
        % Overloaded for TableAdaptor - NDims and num variables don't change.
            obj = resetTallSize(obj);
        end
        
        function obj = resetTallSize(obj, varargin)
        % Overloaded for TableAdaptor - need to propagate new tall size to contained
        % adaptors.
            obj = resetTallSize@matlab.bigdata.internal.adaptors.AbstractAdaptor(...
                obj, varargin{:});
            for idx = 1:numel(obj.VariableAdaptors)
                obj.VariableAdaptors{idx} = copyTallSize(obj.VariableAdaptors{idx}, obj);
            end
        end
        
        function displayImpl(obj, context, ~)
            if context.IsPreviewAvailable
                doDisplay(context);
            else
                % Fabricate a preview table
                var = ['?'; '?'; '?'];
                vars = repmat({var}, 1, numel(obj.VariableNames));
                previewData = table(vars{:}, 'VariableNames', obj.VariableNames);
                
                doDisplayWithFabricatedPreview(context, previewData, obj.NDims, obj.Size);
            end
        end
        
        function varargout = subsrefDot(obj, pa, szPa, s)
            if isequal(s(1).subs, 'Properties')
                rowName = getString(message('MATLAB:table:uistrings:DfltRowDimName'));
                varName = getString(message('MATLAB:table:uistrings:DfltVarDimName'));
                out = struct('Description', {''}, ...
                             'UserData', {[]}, ...
                             'DimensionNames', {{rowName, varName}}, ...
                             'VariableNames', {obj.VariableNames}, ...
                             'VariableDescriptions', {{}}, ...
                             'VariableUnits', {{}}, ...
                             'RowNames', {{}});
            else
                allowMissing = false;
                varName = obj.resolveDotSubscript(s(1).subs, allowMissing);
                % Extract the variable from the table
                out = subsrefBraces(obj, pa, szPa, ...
                                    struct('type', '{}', 'subs', {{':', varName}}));
            end
            [varargout{1:nargout}] = iRecurseSubsref(out, s(2:end));
        end
        
        function varargout = subsrefBraces(obj, pa, ~, s)
            if numel(s(1).subs) ~= 2
                error(message('MATLAB:table:NDSubscript'));
            end
            [firstSub, secondSub] = deal(s(1).subs{:});
            [~, secondSubNumeric] = obj.resolveVarNameSubscript(secondSub);
            shouldDereference = true;
            
            outValue = slicefun(iExtractVariableFunctor(secondSubNumeric, shouldDereference), pa);
            dim      = 2;
            if isempty(secondSubNumeric)
                % table brace indexing selecting an empty list of variables returns Nx0 double.
                adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType('double');
            else
                adaptor = matlab.bigdata.internal.adaptors.combineAdaptors(...
                    dim, obj.VariableAdaptors(secondSubNumeric));
            end
            out = tall(outValue, adaptor);
            
            % Use tall subsref to select rows. Note that this is a rather imperfect
            % implementation as it presumes no more than 3 non-tall dimensions.
            if ~matlab.bigdata.internal.util.isColonSubscript(firstSub)
                newSubs = cell(1,4);
                newSubs{1} = firstSub;
                newSubs(2:end) = {':'};
                out = subsref(out, substruct('()', newSubs));
            end
            
            [varargout{1:nargout}] = iRecurseSubsref(out, s(2:end));
        end
        
        function obj = subsasgnBraces(~, ~, ~, ~, ~) %#ok<STOUT>
            error(message('MATLAB:bigdata:table:SubsasgnBracesNotSupported'))
        end
        
        function out = subsrefParens(obj, pa, szPa, s)
            if numel(s(1).subs) ~= 2
                error(message('MATLAB:table:NDSubscript'));
            end

            [firstSub, secondSub] = deal(s(1).subs{:});

            % First off, subselect the columns specified by secondSub
            [varNames, varIdxs] = obj.resolveVarNameSubscript(secondSub);
            if ~isequal(varNames, obj.VariableNames)
                shouldDereference = false;
                selectedColumnsPa = slicefun(iExtractVariableFunctor(varIdxs, shouldDereference), pa);
            else
                selectedColumnsPa = pa;
            end
            selectedAdaptors = obj.VariableAdaptors(varIdxs);
            newAdaptor = matlab.bigdata.internal.adaptors.TableAdaptor(varNames, selectedAdaptors);
            
            % Next, perform the row selection
            selectedRowsAndColumnsPa = subsrefParensImpl(selectedColumnsPa, szPa, ...
                                                         substruct('()', {firstSub, ':'}));
            % Build the tall table
            tmp = tall(selectedRowsAndColumnsPa, newAdaptor);
            
            % and then recurse
            out = iRecurseSubsref(tmp, s(2:end));
        end
        
        function pa = subsasgnParens(obj, pa, szPa, s, b) %#ok<INUSD>
            error(message('MATLAB:bigdata:table:SubsasgnParensNotSupported'));
        end
        
        function out = subsasgnParensDeleting(obj, pa, szPa, s)
            import matlab.bigdata.internal.util.isColonSubscript

            % The language front-end should not permit expressions where there is any form
            % of indexing following parens.
            assert(numel(s) == 1);
            if numel(s(1).subs) ~= 2
                error(message('MATLAB:table:NDSubscript'));
            end

            [firstSub, secondSub] = deal(s(1).subs{:});

            if isColonSubscript(secondSub)
                % Delete whole slices
                if isColonSubscript(firstSub)
                    error(message('MATLAB:bigdata:table:DeleteWholeTableUsingIndexing'));
                elseif ~istall(firstSub)
                    error(message('MATLAB:bigdata:table:FirstSubscriptColonOrTallVariable'));
                end

                % Here we know we're left with a tall subscript in first place, we need to
                % negate it (providing it's logical)
                firstSub = tall.validateType(firstSub, 'subsasgn', {'logical'}, 1);
                out = obj.subsrefParens(pa, szPa, substruct('()', {~firstSub, secondSub}));
            else
                if matlab.bigdata.internal.util.isColonSubscript(firstSub)
                    % Deleting whole variables - negate the variable list
                    deleteNames = obj.resolveVarNameSubscript(secondSub);
                    keepNames = setdiff(obj.VariableNames, deleteNames, 'stable');
                    out = obj.subsrefParens(pa, szPa, substruct('()', {firstSub, keepNames}));
                else
                    error(message('MATLAB:table:InvalidEmptyAssignment'));
                end
            end

        end
        function out = subsasgnDot(obj, pa, szPa, s, b)
            if isequal(s(1).subs, 'Properties')
                error(message('MATLAB:bigdata:table:SetPropertiesUnsupported'));
            end
            allowMissing = true;
            varName = obj.resolveDotSubscript(s(1).subs, allowMissing);

            if numel(s) == 1
                % Adding or updating a whole variable.
                if ~istall(b)
                    % Note there's no scalar expansion for "t.x = b".
                    error(message('MATLAB:bigdata:table:AssignVariableMustBeTall'));
                end
                
                bAdaptor = matlab.bigdata.internal.adaptors.getAdaptor(b);
                if ~isnan(obj.Size(1)) && ~isnan(bAdaptor.Size(1)) && obj.Size(1) ~= bAdaptor.Size(1)
                    error(message('MATLAB:bigdata:array:IncompatibleTallStrictSize'));
                end

                % Build a new adaptor
                names = obj.VariableNames;
                adaptors = obj.VariableAdaptors;
                if ~ismember(varName, names)
                    names{end+1} = varName;
                end
                idx = find(strcmp(varName, names));
                assert(isscalar(idx));
                adaptors{idx} = bAdaptor;
                newAdaptor = matlab.bigdata.internal.adaptors.TableAdaptor(...
                    names, adaptors);
                
                outPa = strictslicefun(@(t, v) iUpdateWholeVariable(t, varName, v), ...
                                       pa, hGetValueImpl(b));
                out = tall(outPa, newAdaptor);
            else
                % Replacing part of variable - extract, update, replace.
                tallVar = obj.subsrefDot(pa, szPa, s(1));
                tallVar = subsasgn(tallVar, s(2:end), b);
                out     = obj.subsasgnDot(pa, szPa, substruct('.', varName), tallVar);
            end
        end
        function out = subsasgnDotDeleting(obj, pa, ~, S)
            if numel(S) > 1
                error(message('MATLAB:bigdata:table:DotDeletingSingleLevelIndexing'));
            end
            allowMissing = false;
            deletingName = obj.resolveDotSubscript(S(1).subs, allowMissing);
            % Need to work out which index we're removing
            deletingTF = strcmp(deletingName, obj.VariableNames);
            outPa = slicefun(@(x) iRemoveVariable(x, deletingName), pa);
            
            newAdaptor = matlab.bigdata.internal.adaptors.TableAdaptor(...
                obj.VariableNames(~deletingTF), ...
                obj.VariableAdaptors(~deletingTF));
            
            out = tall(outPa, newAdaptor);
        end
    end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function functor = iExtractVariableFunctor(vars, shouldDereference)
    functor = @fcn;
    function out = fcn(t)
        out = t(:, vars);
        if shouldDereference
            out = out{:,:};
        end
    end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function t = iRemoveVariable(t, varName)
    t.(varName) = [];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Simply apply a new variable into the table, ensuring the new data is the
% correct size. The check here is only needed in rare cases (i.e. where the
% table is completely empty) - otherwise the table assignment itself actually
% throws this error. See g1367363.
function t = iUpdateWholeVariable(t, varName, v)
    if size(t,1) ~= size(v,1)
        error(message('MATLAB:table:RowDimensionMismatch'));
    end
    t.(varName) = v;
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Apply remaining indexing expressions
function varargout = iRecurseSubsref(data, S)
    if isempty(S)
        varargout = {data};
    else
        [varargout{1:nargout}] = subsref(data, S);
    end
end