gusucode.com > datatypes 工具箱matlab源码程序 > datatypes/@tabular/joinInnerOuter.m

    function [c,il,ir] = joinInnerOuter(a,b,leftOuter,rightOuter,leftKeyVals,rightKeyVals, ...
                                    leftVars,rightVars,leftVarDim,rightVarDim)
%JOININNEROUTER Common calculations for innerJoin and outerJoin.

% C is [A(IA,LEFTVARS) B(IB,RIGHTVARS)], where IA and IB are row indices into A
% and B computed for each row of C from LEFTKEYVALS and RIGHTKEYVALS.  These
% index vectors may include zeros indicating "no source row in A/B)" for some
% rows of C.

%   Copyright 2012-2016 The MathWorks, Inc.

import matlab.internal.tableUtils.defaultarrayLike
import matlab.internal.tableUtils.coloncat

try
    % Sort each key.
    [lkeySorted,lkeySortOrd] = sort(leftKeyVals);
    [rkeySorted,rkeySortOrd] = sort(rightKeyVals);

    % Get unique key values and counts. This also gives the beginning and end of
    % each block of constant key values in each. All of these end up 0x1 if the
    % corresponding key is empty.
    lbreaks = find(diff(lkeySorted)); % breakpoints from one key value to the next
    rbreaks = find(diff(rkeySorted));
    lones = ones(~isempty(leftKeyVals),1); % scalar 1, or empty 0x1
    rones = ones(~isempty(rightKeyVals),1);
    lstart = [lones; lbreaks+1]; % start of each block of constant key values
    rstart = [rones; rbreaks+1];
    lend = [lbreaks; length(lkeySorted)*lones]; % end of each block of constant key values
    rend = [rbreaks; length(rkeySorted)*rones];
    lunique = lkeySorted(lstart); % unique key values
    runique = rkeySorted(rstart);
    luniqueCnt = lend - lstart + 1; % number of unique key values
    runiqueCnt = rend - rstart + 1;
    clear lbreaks rbreaks lstart lend % clear some potentially large variables no longer needed

    % Use the "block nested loops" algorithm to determine how many times to
    % replicate each row of A and B.  Rows within each "constant" block of keys in
    % A will need to be replicated as many times as there are rows in the matching
    % block of B, and vice versa.  Rows of A that don't match anything in B, or
    % vice versa, get zero.  Rows of A will be replicated row-by-row; rows in B
    % will be replicated block-by-block.
    il = 1;
    ir = 1;
    leftElemReps = zeros(size(lunique));
    rightBlockReps = zeros(size(runique));
    while (il <= length(lunique)) && (ir <= length(runique))
        if lunique(il) < runique(ir)
            il = il + 1;
        elseif lunique(il) == runique(ir)
            leftElemReps(il) = runiqueCnt(ir);
            rightBlockReps(ir) = luniqueCnt(il);
            il = il + 1;
            ir = ir + 1;
        elseif lunique(il) > runique(ir)
            ir = ir + 1;
        else % one must have been NaN
            % NaNs get sorted to end; nothing else will match
            break;
        end
    end

    % Identify the rows of A required for an inner join: expand out the number of
    % replicates within each block to match against the (non-unique) sorted keys,
    % then replicate each row index the required number of times.
    leftElemReps = repelem(leftElemReps,luniqueCnt);
    il = repelem(1:length(lkeySorted),leftElemReps)';

    % Identify the rows of B required for an inner join: replicate the start and
    % end indices of each block of keys the required number of times, then create
    % a concatenation of those start:end expressions.
    rstart = repelem(rstart,rightBlockReps);
    rend = repelem(rend,rightBlockReps);
    ir = coloncat(rstart,rend)';
    clear rstart rend % clear some potentially large variables no longer needed

    % Translate back to the unsorted row indices.
    il = lkeySortOrd(il);
    ir = rkeySortOrd(ir);

    % If this is a left- or full-outer join, add the indices of the rows of A that
    % didn't match anything in B.  Add in zeros for the corresponding B indices.
    if leftOuter
        left = find(leftElemReps(:) == 0); % force a column for one unique left key
        il = [il; lkeySortOrd(left)];
        ir = [ir; zeros(size(left))];
    end

    % If this is a right- or full-outer join, add the indices of the rows of B that
    % didn't match anything in A.  Add in zeros for the corresponding A indices.
    if rightOuter
        rightBlockReps = repelem(rightBlockReps,runiqueCnt);
        right = find(rightBlockReps(:) == 0); % force a column for one unique right key
        il = [il; zeros(size(right))];
        ir = [ir; rkeySortOrd(right)];
    end

    % Now sort the whole thing by the key.  If this is an inner join, that's
    % already done.
    if leftOuter || rightOuter
        pos = (il > 0);
        Key = zeros(size(il));
        Key(pos) = leftKeyVals(il(pos)); % Rows that have an A key value
        Key(~pos) = rightKeyVals(ir(~pos)); % Rows with no A key value must have a B key
        [~,ord] = sort(Key);
        il = il(ord);
        ir = ir(ord);
    end

    % Create a new table to combining the specified variables from A and from B.
    % Don't copy any per-array or per-row properties, but do assign var labels and
    % merge a's and b's per-var properties.
    c = a.cloneAsEmpty(); % respect the subclass
    c.metaDim = a.metaDim;
    numLeftVars = length(leftVars);
    numRightVars = length(rightVars);
    c.rowDim = c.rowDim.createLike(length(il));
    c_varDim = leftVarDim.lengthenTo(numLeftVars+numRightVars,rightVarDim.labels);
    c.varDim = c_varDim.moveProps(rightVarDim,1:numRightVars,numLeftVars+(1:numRightVars));
    c_data = cell(1,numLeftVars+numRightVars);

    % Compute logical indices of where A'a and B's rows will go in C,
    % and the indices of which rows to pick out of A and B.
    ilDest = (il > 0); ilSrc = il(ilDest);
    irDest = (ir > 0); irSrc = ir(irDest);

    % Move data into C.
    a_data = a.data;
    c_nrows = c.rowDim.length;
    for j = 1:numLeftVars
        leftvar_j = a_data{leftVars(j)};
        szOut = size(leftvar_j); szOut(1) = c_nrows;
        cvar_j = defaultarrayLike(szOut,'Like',leftvar_j);
        cvar_j(ilDest,:) = leftvar_j(ilSrc,:);
        c_data{j} = reshape(cvar_j,szOut);
    end
    b_data = b.data;
    for j = 1:numRightVars
        rightvar_j = b_data{rightVars(j)};
        szOut = size(rightvar_j); szOut(1) = c_nrows;
        cvar_j = defaultarrayLike(szOut,'Like',rightvar_j);
        cvar_j(irDest,:) = rightvar_j(irSrc,:);
        c_data{numLeftVars + j} = reshape(cvar_j,szOut);
    end
    c.data = c_data;
catch ME
    throwAsCaller(ME)
end