gusucode.com > datatypes 工具箱matlab源码程序 > datatypes/@tabular/writeTextFile.m
function writeTextFile(t,file,args) %WRITETEXTFILE Write a table to a text file. % Copyright 2012-2015 The MathWorks, Inc. % Parse and validate input arguments import matlab.internal.tableUtils.validateLogical pnames = {'WriteVariableNames' 'WriteRowNames' 'Delimiter' 'QuoteStrings' 'DateLocale'}; dflts = { true false 'comma' 'auto' ''}; [writeVarNames,writeRowNames,delimiter,quoteString,locale,isSpecified] = matlab.internal.table.parseArgs(pnames, dflts, args{:}); % Only write row names if asked to, and if they exist. writeParams.writeRowNames = validateLogical(writeRowNames,'WriteRowNames') && t.rowDim.hasLabels; writeParams.writeVarNames = validateLogical(writeVarNames,'WriteVariableNames'); writeParams.delimiter = standardizeDelimiter(delimiter); writeParams.locale = locale; if isSpecified.QuoteStrings writeParams.quoteString = validateLogical(quoteString,'QuoteStrings'); else % default quoting to automatic detection of embedded delimiter writeParams.quoteString = quoteString; end % Open the file for writing [fid,errmsg] = fopen(file,'Wt'); % text mode: CRLF -> LF if fid == -1 error(message('MATLAB:table:write:FileOpenError', file, errmsg)); else fileHandleCleanup = onCleanup(@()fclose(fid)); end % Write to file if (t.rowDim.length == 0) || (t.varDim.length == 0) % nothing to write else % write data in chunks % Extract & matricize variables (char arrays are processed to strings) % and get variable names adata = cellfun(@writetableMatricize, t.data, 'UniformOutput', false); avarnames = t.varDim.labels; % If writing row labels, tack them on up front as for any other variable if writeParams.writeRowNames adata = [{t.rowDim.labels} adata]; avarnames = [t.metaDim.labels{1} avarnames]; end % Get variable traits varTraits = variableTraits(adata,writeParams); % Write header if needed if writeVarNames writeHeader(fid, adata, avarnames, writeParams, varTraits); end % If all variables are of the same numeric type, it is faster to write % each chunk as a single array (especially for large number of rows) numTypes = cellfun(@class, adata, 'UniformOutput', false); writeParams.writeChunkAsOneArray = all(varTraits.isNumeric) && isequal(numTypes{1},numTypes{:}); % extract the first element to catch 1-variable case % Get indices that define the range of each variable pack (the same % range applies to all chunks) and varTraits of packed up variables packParams = varPackParams(adata, varTraits); % Estimate number of rows per chunk from size-in-memory of an one row % chunk processed the same way as if to be written to file if writeParams.writeChunkAsOneArray chunkSizeInBytes = 64*2^20; % 64MB else % if not writing chunk as one array, limit chunk size to 32MB as % use of character buffer created from SPRINTF() doubles memory % needed to write each chunk chunkSizeInBytes = 32*2^20; % 32MB end nRowsPerChunk = numRowsPerChunk(adata, chunkSizeInBytes, varTraits, packParams, writeParams); % Loop through chunks rowFmt = rowFormat(adata, writeParams.delimiter); % Get format for each row from original table data rowStart = 1; while rowStart <= t.rowDim.length % Extract the raw rows of the table for this chunk rowFinish = min([rowStart + nRowsPerChunk - 1, t.rowDim.length]); % end of chunk rowChunk = makeChunk(adata, rowStart, rowFinish); % Convert chunk into a cell array containing only numbers and % strings, in the correct orientation for fprintf() to write out % rows of the table rowChunk = processChunk(rowChunk, varTraits, packParams, writeParams); % Write chunk to file if writeParams.writeChunkAsOneArray fprintf(fid, rowFmt, rowChunk); else % format chunk to character buffer before writing to file fprintf(fid, '%s', sprintf(rowFmt, rowChunk{:})); end % update row-index to next chunk rowStart = rowFinish + 1; end end end %%%%%%%%%%%%%%%%%%%%%%%%%%%% MAIN FUNCTION END %%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function nRowsPerChunk = numRowsPerChunk(data, chunkSizeInBytes, varTraits, packParams, writeParams) row = makeChunk(data, 1, 1); row = processChunk(row, varTraits, packParams, writeParams); %#ok<NASGU>: used in WHOS rowSizeInBytes = getfield(whos('row'), 'bytes'); % size of one row in memory nRowsPerChunk = ceil(chunkSizeInBytes/rowSizeInBytes); % number of rows, at least one, per chunk end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function delimiter = standardizeDelimiter(delimiter) tab = sprintf('\t'); switch delimiter case {'tab', '\t', tab} delimiter = tab; case {'space',' '} delimiter = ' '; case {'comma', ','} delimiter = ','; case {'semi', ';'} delimiter = ';'; case {'bar', '|'} delimiter = '|'; otherwise throwAsCaller(MException(message('MATLAB:table:write:UnrecognizedDelimiter', delimiter(1)))); end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function varTraits = variableTraits(cellVector, writeParams) % VARIABLETRAITS returns a varTraits struct with type traits of contents in % cellVector and number of delimited fields needed to write out content of % each variable in nVarFields. function m = numCellVarFields(c) % Number of delimited fields or columns needed to write out the % contents of a cell (excluding contents in nested cells) if ischar(c) % Treat each row as a separate string, including rows in higher dims. [n,~,d] = size(c); % Each string gets one "column". Zero rows (no strings) gets a single % column to contain the empty string, even for N-D,. In particular, % '' gets one column. m = max(n*d,1); elseif isnumeric(c) || islogical(c) || iscategorical(c) || isdatetime(c) || isduration(c) || iscalendarduration(c) m = max(numel(c),1); % always write out at least one empty field else % unsupported types m = 1; % write as an empty field end end % Table variable traits nVars = numel(cellVector); varTraits.nVarFields = cell(1, nVars); varTraits.quoteVariable = false(1, nVars); for i = 1:nVars x = cellVector{i}; % Table variable type info varTraits.isNumeric(i) = islogical(x) || isnumeric(x); varTraits.isCharStrings(i) = ischar(x) || matlab.internal.datatypes.isCharStrings(x); varTraits.isCategorical(i) = iscategorical(x); varTraits.isTime(i) = isdatetime(x) || isduration(x) || iscalendarduration(x); varTraits.isNonStringCell(i) = iscell(x) && ~varTraits.isCharStrings(i); varTraits.isUnsupportedType(i) = ~(varTraits.isNumeric(i) || varTraits.isCharStrings(i) || varTraits.isCategorical(i) || varTraits.isTime(i) || varTraits.isNonStringCell(i)); varTraits.isSparse(i) = issparse(x); varTraits.isComplex(i) = isnumeric(x) && ~isreal(x); varTraits.isStringType(i) = isa(x,'string'); % Number of fields to write from each variable. For regular non-cell % variables, number of fields is a scalar; for cell-variable with % multiple columns, number of fields is a row-vector with element % mapping to each column of the cell. if varTraits.isNonStringCell(i) % Multiple rows in each cell element are converted to delimited % fields. Number of fields for each column of a cell variable thus % equals to the maximum number of rows that column. varTraits.nVarFields{i} = max(cellfun(@numCellVarFields,x), [], 1); else varTraits.nVarFields{i} = max(size(x, 2), 1); % always write out at least one empty field end % Quote string/datetime/categorical variables of any embedded delimiter % is found in the variable. For cell array of strings, the QuoteStrings % flag overrides this automatic detection: string variables are either % always or never quoted with QuoteStrings flag equals True/False % respectively. if (varTraits.isCharStrings(i) || varTraits.isStringType(i) || varTraits.isTime(i) || varTraits.isCategorical(i)) if strcmp(writeParams.quoteString,'auto') if varTraits.isStringType(i) varTraits.quoteVariable(i) = any(contains(x,writeParams.delimiter)); else if varTraits.isCharStrings(i) % String/Char variable strsToCheck = x; % Add quote if the string variable contains embedded delimiter elseif varTraits.isTime(i) % Datetime/Duration/CalendarDuration strsToCheck = {x.Format}; % Add quote if Format contains embedded delimiter elseif varTraits.isCategorical(i) % Categorical strsToCheck = categories(x); % Add quote if any category name contains embedded delimiter end varTraits.quoteVariable(i) = matlab.internal.datatypes.containsCharacter(strsToCheck, writeParams.delimiter); end else % user specified 'QuoteString' value % never add quotes to anything if user set QuoteString as FALSE. % always quote string, categorical, datetime if user set to TRUE. varTraits.quoteVariable(i) = writeParams.quoteString; end end end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [fmt, fmtCell] = rowFormat(adata, delimiter) % ROWFORMAT construct fprintf/sprintf format strings to write out contents % of each cell in cell-vector ADATA. % Base string and numeric format specifiers formatBase.string = ['%s' delimiter]; formatBase.doubleReal = ['%.15g' delimiter]; formatBase.doubleComplex = ['%.15g+%.15gi' delimiter]; formatBase.singleReal = ['%.7g' delimiter]; formatBase.singleComplex = ['%.7g+%.7gi' delimiter]; formatBase.integerReal = ['%d' delimiter]; formatBase.integerComplex = ['%d+%di' delimiter]; formatBase.logical = ['%d' delimiter]; % Construct format specifier for each variable fmtCell = cell(size(adata)); % pre-allocate for i = 1:numel(adata) % get field name into formatBase struct x = adata{i}; if isnumeric(x) % class base if isinteger(x) fmtField = 'integer'; else % single or double fmtField = class(x); end % tag on complexity if isreal(x) fmtField = [fmtField 'Real']; %#ok<AGROW> else fmtField = [fmtField 'Complex']; %#ok<AGROW> end elseif islogical(x) fmtField = 'logical'; else % isCharStrings(i) || isCategorical(i) || isTime(i) || isNonStringCell(i) || isUnsupportedType(i) fmtField = 'string'; % other supported types are written out as strings end % construct the full format specification for this variable fmtCell{i} = repmat(formatBase.(fmtField),1,size(x,2)); end % Merge all format specifiers into one fmt = [fmtCell{:}]; fmt = [fmt(1:end-length(delimiter)) '\n']; % Remove trailing delimiter at end of row and add newline end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function rowChunk = makeChunk(data, rowStart, rowFinish) % MAKECHUNK extracts a 1-by-NVars cell that contains chunk of rows from % rowStart to rowFinish from each variable rowChunk = cell(size(data)); % initialize rowChunk for i = 1:numel(rowChunk) rowChunk{i} = data{i}(rowStart:rowFinish,:); end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function rowChunk = processChunk(rowChunk, varTraits, packParams, writeParams) % standardize numerics to be compatible with fprintf() for i = find(varTraits.isNumeric) rowChunk{i} = standardizeNumericVar(rowChunk{i}, varTraits.isComplex(i), varTraits.isSparse(i)); end % Combine adjacent cell of matching supported types rowChunk = packVariables(rowChunk, packParams); if writeParams.writeChunkAsOneArray % If writeChunkAsOneArray is true, rowChunk is a 1x1 cell % containing all variables in this chunk packed in one array. rowChunk = rowChunk{1}'; % transpose for fprintf() else % Get 1-by-NVars cell arrays with either numerics or cellstrs resolveCell = true; % resolve and expand non-string cell variables rowChunk = stringify(rowChunk, writeParams, packParams.varTraitsPacked, resolveCell); % merge 1-by-NVars cell containing chunkNRows-by-... cell arrays % into a chunkNRows-by-NFields cell & transpose for fprintf() rowChunk = [rowChunk{:}]'; end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function writeHeader(fid, data, varnames, writeParams, varTraits) % WRITEHEADER writes a header line with names for each delimited fields. % Multiple columns variables are written as multiple delimited fields. function varnamej = colHeaders(varj, varnamej, ncellColsj) %COLHEADERS Create multiple column headers from a table variable name % Need multiple column headers if the variable has multiple columns. if ischar(varj) [~,~,ncols] = size(varj); else [~,ncols] = size(varj); % Treat N-D as 2-D. end if ncols > 1 varnamej = strcat(varnamej,'_',num2str((1:ncols)'))'; end % Need multiple column headers if the variable is a non-string cell % containing non-scalars. if iscell(varj) && ~matlab.internal.datatypes.isCharStrings(varj) && any(ncellColsj(:) > 1) vnj = cell(1,sum(ncellColsj)); cnt = 0; for ii = 1:ncols num = ncellColsj(ii); vnj(cnt+(1:num)) = strcat(varnamej(ii),'_',num2str((1:num)'))'; cnt = cnt + num; end varnamej = vnj; end end % colHeaders function % Get header for each variables varHeaders = cell(size(varnames)); for i = 1:length(varHeaders) varHeaders{i} = colHeaders(data{i}, varnames(i), varTraits.nVarFields{i}); end % Write out the header line varHeaders = [varHeaders{:}]; headerFmt = [strjoin(repmat({'%s'},1,length(varHeaders)), writeParams.delimiter), '\n']; fprintf(fid,headerFmt,varHeaders{:}); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function numericVar = standardizeNumericVar(numericVar, isVarComplex, isVarSparse) % expand sparse matrix in full if isVarSparse numericVar = full(numericVar); end % fprintf()/sprintf() does not write complex numbers. Convert to 2-element % vector of real and imaginery part for writing if isVarComplex numericVar = [real(numericVar), imag(numericVar)]; end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function packParams = varPackParams(v, varTraits) % variablePackIdx returns index vectors packStart and packFinish that % together define groups of adjacent variables in cell-vector v that have % the same kind (type, timezone, display format etc.) of supported datatypes % (numerics, datetime, duration, calendarDuration, categorical). % Get index of unique types (including timezone, format etc.) varTypes = cell(1, numel(v)); for i = 1:numel(v) x = v{i}; varTypes{i} = class(x); % datetime, duration or calendarDuration if varTraits.isTime(i) % treat time variables with different formats as different types so % they are not packed together varTypes{i} = [varTypes{i}, '_', x.Format]; % treat datetime with different timezones as different types so they % are not packed together into a single datetime array if isdatetime(x) varTypes{i} = [varTypes{i}, '_', x.TimeZone]; end end end [~, ~, idxVarType] = unique(varTypes,'stable'); % Type mask to perform packing on - force to be column. Only pack numeric, % datetime, duration, calendarDuration or categorical variables. isToPack = varTraits.isNumeric(:) | varTraits.isTime(:); % Pack categorical variables only if they all have the same categories. % Categorical variables with different categories are more time-consuming % to concatenate and thus are not packed. if (nnz(varTraits.isCategorical) > 1) % more than one categorical catVarIdx = find(varTraits.isCategorical); categoriesList = categories(v{catVarIdx(1)}); packCategorical = true; for i = catVarIdx(2:end) if ~isequal(categories(v{i}), categoriesList) packCategorical = false; break; end end if packCategorical isToPack = isToPack | varTraits.isCategorical(:); end end % Find start and finish of each matching datatype uniquePackStartMask = diff( [0; idxVarType] ) ~= 0; uniquePackFinishMask = diff( [idxVarType; 0] ) ~= 0; packParams.start = find(uniquePackStartMask & isToPack); packParams.finish = find(uniquePackFinishMask & isToPack); % Return varTraits when adjacent variables of the same type are packed as one varTraitFields = fieldnames(varTraits); for i=1:numel(varTraitFields) varTraits.(varTraitFields{i}) = varTraits.(varTraitFields{i})(uniquePackStartMask | ~isToPack); end packParams.varTraitsPacked = varTraits; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function v = packVariables(v, packParams) % packVariables(v) packs cell vector v by concatenating range of cells % defined by index vectors packStart and packFinish into consolidated % nRows-by-1 cells. packStart = packParams.start; packFinish = packParams.finish; for ii = numel(packStart):-1:1 % work backwards to cope with removal v{packStart(ii)} = [v{packStart(ii):packFinish(ii)}]; % concatenate v(packStart(ii)+1:packFinish(ii)) = []; % remove end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function rowChunk = stringify(rowChunk, writeParams, varTraits, resolveCell) nRows = size(rowChunk{1},1); numericVarBuf = cell(nRows, 1); for ii = 1:numel(rowChunk) if varTraits.isNumeric(ii) % wrap each row in cell and delegate to fprintf for formatting for jj = 1:nRows numericVarBuf{jj} = rowChunk{ii}(jj,:); % equivalent to NUM2CELL(rowChunk{ii},2) but faster end rowChunk{ii} = numericVarBuf; elseif varTraits.isCategorical(ii) rowChunk{ii} = cellstr(rowChunk{ii}); elseif varTraits.isStringType(ii) strchunk = rowChunk{ii}; % replace all missing values with empty chars strchunk(find(ismissing(strchunk))) = ''; rowChunk{ii} = cellstr(strchunk); elseif varTraits.isTime(ii) rowChunk{ii} = cellstr(rowChunk{ii},[],writeParams.locale); elseif varTraits.isUnsupportedType(ii) rowChunk{ii} = repmat({''}, nRows, 1); elseif varTraits.isNonStringCell(ii) if resolveCell rowChunk{ii} = stringify_cell(rowChunk{ii}, varTraits.nVarFields{ii}, writeParams); else rowChunk{ii} = repmat({''}, nRows, 1); end end % Add quotes to stringified variable if needed if varTraits.quoteVariable(ii) if ischar(rowChunk{ii}{1}) rowChunk{ii} = strcat('"',strrep(rowChunk{ii},'"','""'),'"'); else rowChunk{ii} = num2cell(strcat('"',strrep([rowChunk{ii}{:}],'"','""'),'"'))'; end end end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function cellVar = stringify_cell(cellVar, nFields, writeParams) % STRINGFY_CELL formats and converts content of each row of a cell array % into a single delimited string. % The STRINGIFY helper function assumes row cell vector. Stretch cell % variable out to a 1-by-numel(cellVar) cell to facilitate the call [nCellRows, nCellCols] = size(cellVar); % cache the shape for reconstruction at the end cellVar = cellVar(:)'; % Matricize elements in cellVar (char arrays are processed to strings) % as subsequent processing assumes 2D content in the cells. cellVar = cellfun(@writetableMatricize,cellVar,'UniformOutput',false); % variable traits of contents in this cell varTraits = variableTraits(cellVar, writeParams); % get format specifier for each cell [~, fmtCell] = rowFormat(cellVar, writeParams.delimiter); % process strings and numerics for i = 1:numel(cellVar) % pre-process: matricize char array and stringify numerics if varTraits.isNumeric(i) % standardize numerics to be compatible with sprintf() cellVar{i} = standardizeNumericVar(cellVar{i}, varTraits.isComplex(i), varTraits.isSparse(i)); % convert numerics into cell array of strings with respective % format specifiers fmt = strtok(fmtCell{i}, writeParams.delimiter); cellVar{i} = sprintfc(fmt, cellVar{i}); % the numeric have been converted to cellstrs varTraits.isNumeric(i) = false; varTraits.isString(i) = true; end end % convert non-numerics into cell array of strings resolveCell = false; % do not further resolve nested cells cellVar = stringify(cellVar, writeParams, varTraits, resolveCell); % Content of each cell is linearly indexed out as delimited fields in % each table row. Since each table row must have the same number of % fields, rows with fewer fields need to be pad up with empty fields. % The correct number of fields for each cell column is passed in % (nFields). Compute the number of empty fields to pad with respect to % number of elements in each cell. nFieldsCells = cellfun(@(x)max(numel(x),1),cellVar); % number of fields (at least one) in each cell nPadFields = max(repelem(nFields,nCellRows)-nFieldsCells, 0); % reshape back to the original number of rows cellVar = reshape(cellVar, nCellRows, nCellCols); for i = 1:numel(cellVar) % Combine multiple fields in each table row into one row with delimiters cellVar{i} = strjoin(cellVar{i}, writeParams.delimiter); % Pad each cell content with required number of empty fields (i.e. nPadFields) cellVar{i} = [cellVar{i}, repmat(writeParams.delimiter,1,nPadFields(i))]; end end