ocr.m vision工具箱matlab源码程序 - matlab工具箱源码

gusucode.com > vision工具箱matlab源码程序 > vision/ocr.m
    function txt = ocr(I, varargin)
%OCR Recognize text using Optical Character Recognition.
%    txt = OCR(I) returns an ocrText object containing the recognized text,
%    the location of the text within I, and a metric indicating the
%    confidence of the recognition result. Confidence values range between
%    0 and 1 and should be interpreted as probabilities.
%
%    txt = OCR(I, roi) recognizes text in I within one or more rectangular
%    regions defined by an M-by-4 matrix, roi. Each row of roi is a
%    four-element vector, [x y width height], that specifies the upper-left
%    corner and size of a rectangular region of interest in pixels. Each
%    rectangle must be fully contained within I.
%
%    [...] = OCR(...,Name,Value) specifies additional name-value pair
%    arguments described below:
%
%    'TextLayout'    Specify the layout of the text within I as a string.
%                    Valid string values are 'Auto', 'Block', 'Line',
%                    'Word', or 'Character'.
%
%                    Default: 'Auto'
%                
%    'Language'      Specify the language to recognize as a string or a
%                    cell array of strings. The language can be specified
%                    using the name of a language such as 'English' or
%                    'Japanese'.
%
%                    <a href="matlab:helpview(fullfile(docroot,'toolbox','vision','vision.map'),'ocrLanguage')">A list of supported languages is shown in the documentation.</a>
%
%                    <a href="matlab:helpview(fullfile(docroot,'toolbox','vision','vision.map'),'ocrLanguage')">Custom trained languages are also supported.</a>
%
%                    Default: 'English'        
%
%    'CharacterSet'  Specify the character set as a string of characters.
%                    The classification process is constrained to select
%                    the best matches from this smaller set of characters.
%                    By default, all characters in the Language are used.
%
%                    Default: ''
%                       
% Class Support
% -------------
% The input image I can be logical, uint8, int16, uint16, single, or
% double, and it must be real and nonsparse. 
%
% Example 1 - Recognize text within an image
% ------------------------------------------
%
%    businessCard   = imread('businessCard.png');
%    ocrResults     = OCR(businessCard)
%    recognizedText = ocrResults.Text;    
%    figure
%    imshow(businessCard)
%    text(600, 150, recognizedText, ... 
%        'BackgroundColor', [1 1 1])
%
% Example 2 - Recognize text in regions of interest (ROI)
% -------------------------------------------------------
%    I = imread('handicapSign.jpg');
%    
%    % Define one or more rectangular regions of interest within I.
%    roi = [360 118 384 560];
%
%    % You may also use IMRECT to select a region using a mouse:
%    %    figure; imshow(I); roi = round(getPosition(imrect))
%
%    ocrResults = OCR(I, roi);   
%
%    % Insert recognized text into original image
%    Iocr = insertText(I, roi(1:2), ocrResults.Text, ...
%        'AnchorPoint', 'RightTop', 'FontSize',16);
%    figure
%    imshow(Iocr)
%
% Example 3 - Display word bounding boxes and recognition confidences
% -------------------------------------------------------------------
%    businessCard = imread('businessCard.png');
%    ocrResults = OCR(businessCard)
%    Iocr = insertObjectAnnotation(businessCard, 'rectangle', ...
%        ocrResults.WordBoundingBoxes, ...
%        ocrResults.WordConfidences);
%    figure
%    imshow(Iocr)
%
% Example 4 - Find and highlight text in the image
% ------------------------------------------------
%
%    businessCard = imread('businessCard.png');
%    ocrResults   = OCR(businessCard);
%    bboxes = locateText(ocrResults, 'MathWorks', 'IgnoreCase', true);
%    Iocr   = insertShape(businessCard, 'FilledRectangle', bboxes);
%    figure
%    imshow(Iocr)
%
% See also ocrTrainer, ocrText, ocrText>locateText, insertShape, insertText

% References 
% ---------- 
% An Overview of the Tesseract OCR Engine In ICDAR '07: Proceedings of the
% Ninth International Conference on Document Analysis and Recognition
% (ICDAR 2007) Vol 2 (2007), pp. 629-633 by R. Smith
%
% Ray Smith and Daria Antonova and Dar-Shyang Lee. Adapting the Tesseract
% Open Source OCR Engine for Multilingual OCR. . 2009
%
% Ray Smith. Hybrid Page Layout Analysis via Tab-Stop Detection.
% Proceedings of the 10th international conference on document analysis and
% recognition. 2009.

%#codegen
%#ok<*EMCA>

[roi, hasROI, params] = parseInputs(I,varargin{:});


if islogical(I) && ~params.PreprocessBinaryImage  
    % Process binary images as-is if the PreprocessBinaryImage is false.
    % This by-passes tesseract's binarization stage.
    [rawtext, metadata] = tesseract(params, I, roi, hasROI);    
else    
    Iu8 = im2uint8(I);    
    img = vision.internal.ocr.convertRGBToGray(Iu8);
    
    [rawtext, metadata] = tesseract(params, img, roi, hasROI);    
end

txt = ocrText.create(rawtext, metadata, params);

% -------------------------------------------------------------------------
% Invoke Tesseract
% -------------------------------------------------------------------------
function [txt, ocrMetadata] = tesseract(params, Iu8, roi, hasROI)

[isSet, prefix] = unsetTessDataPrefix();

resetParameters = hasLanguageChanged(params.Language);
 
if vision.internal.ocr.isCodegen()            
    tessOpts = codegenParseParams(params);        
    [txt, ocrMetadata] = vision.internal.buildable.OCRBuildable.tesseract(tessOpts, Iu8, hasROI, resetParameters);       
else    
    tessOpts = parseParams(params);
    [txt, ocrMetadata] = tesseractWrapper(tessOpts, Iu8, hasROI, roi, resetParameters);
end

coder.extrinsic('setenv')
if isSimOrMex()
    if isSet
        setenv('TESSDATA_PREFIX',prefix)
    end
end

% -------------------------------------------------------------------------
% Return true if the input language does not match the cached language.
% -------------------------------------------------------------------------
function tf = hasLanguageChanged(language)
persistent cachedLanguage

language = convertToCacheableValue(language);

% used fixed size language string to support codegen
% with max length of 4096.
n = min(4096, numel(language));
if isempty(cachedLanguage)
    cachedLanguage = zeros(1,4096,'uint8');
    cachedLanguage(1:n) = cast(language(1:n), 'uint8');
end

if isequal(language(1:n), cachedLanguage(1:n))
    tf = false;
else
    % language has changed. update cached value.
    cachedLanguage(1:n) = cast(language(1:n), 'uint8');
    tf = true;
end

% -------------------------------------------------------------------------
function lang = convertToCacheableValue(lang)
% cached multiple languages as concatenated string
if isempty(coder.target) && iscell(lang)
    lang = [lang{:}];
end

% -------------------------------------------------------------------------
% Parse inputs.
% -------------------------------------------------------------------------
function [roi, hasROI, params] = parseInputs(I, varargin)

sz = size(I);
vision.internal.inputValidation.validateImage(I);

if mod(nargin-1,2) == 1
    hasROI = true;
    roi = int32(round(varargin{1}));
    checkROI(roi,sz(1:2));
else
    hasROI = false;
    roi = ones(0,4,'int32');
end

if vision.internal.ocr.isCodegen()
    if hasROI
        userInput = codegenParseInputs(varargin{2:end});
    else
        userInput = codegenParseInputs(varargin{:});
    end
else
    p = getInputParser();
    parse(p, varargin{:});
    userInput = p.Results;
    
    userInput.UsingCharacterSet = isempty(regexp([p.UsingDefaults{:} ''],...
        'CharacterSet','once'));
    
end

validTextLayout = checkTextLayout(userInput.TextLayout);
[validLanguage, isCustomLanguage] = checkLanguage(userInput.Language);

if userInput.UsingCharacterSet    
    checkCharacterSet(userInput.CharacterSet);
end

checkPreprocessBinaryImage(userInput.PreprocessBinaryImage);

params = setParams(userInput, validLanguage, validTextLayout, isCustomLanguage);

% -------------------------------------------------------------------------
% Parse inputs during codegen.
% -------------------------------------------------------------------------
function results = codegenParseInputs(varargin)
pvPairs = struct( ...
    'TextLayout',   uint32(0), ...
    'Language',     uint32(0),...
    'CharacterSet', uint32(0),...
    'PreprocessBinaryImage',  uint32(0));

popt = struct( ...
    'CaseSensitivity', false, ...
    'StructExpand'   , true, ...
    'PartialMatching', true);

defaults = getParamDefaults();

optarg = eml_parse_parameter_inputs(pvPairs, popt, varargin{:});

results.TextLayout  = eml_get_parameter_value(optarg.TextLayout, ...
    defaults.TextLayout, varargin{:});

language = coder.internal.const(eml_get_parameter_value(optarg.Language, ...
    coder.internal.const(defaults.Language), varargin{:}));

results.CharacterSet = eml_get_parameter_value(optarg.CharacterSet, ...
    defaults.CharacterSet, varargin{:});

results.PreprocessBinaryImage  = eml_get_parameter_value(optarg.PreprocessBinaryImage, ...
    defaults.PreprocessBinaryImage, varargin{:});

% UsingCharacterSet true if the user supplied one
results.UsingCharacterSet = logical(optarg.CharacterSet);

results.Language = language;

% Warning if a non-English language or custom language is specified.
if ~(optarg.Language==uint32(0)) && ~strcmpi(language,'english')    
    coder.internal.compileWarning('vision:ocr:codegenASCIIOnly');    
end

% -------------------------------------------------------------------------
function checkROI(roi,imageSize)

for i = 1:size(roi,1)
    vision.internal.detector.checkROI(roi(i,:),imageSize);
end

% -------------------------------------------------------------------------
function isCustomLang = checkIfCustomLanguage(lang)

idx = strfind(lang, '.traineddata');
if isempty(idx)
    isCustomLang = false;
else
    isCustomLang = true;
end 

% -------------------------------------------------------------------------
function [lang, isCustomLanguage] = checkLanguage(userLanguage)

validateattributes(userLanguage,{'char','cell'},{'vector','row'}, ...
    mfilename,'Language');

coder.extrinsic('exist','filesep');
if ischar(userLanguage)
    isCustomLanguage = checkIfCustomLanguage(userLanguage);
    
    if vision.internal.ocr.isCodegen()
        modifiedLang = userLanguage;
    else
        % fix up filesep for current platform. This converts PC style \ to
        % unix style / or vice versa. This prevents failures when loading
        % tesseract data files.      
        modifiedLang = fixFilesep(userLanguage);
    end
    
    if ~isCustomLanguage        
        lang = vision.internal.ocr.validateLanguage(modifiedLang, vision.internal.ocr.ocrSpkgInstalled());        
    else
        
        lang = modifiedLang;           
        if vision.internal.ocr.isCodegen()
            coder.internal.errorIf(vision.internal.codegen.exist(lang) ~= 2,...
                'vision:ocr:languageDataFileNotFound', lang);        
        else
            coder.internal.errorIf(exist(lang,'file') ~= 2,...
                'vision:ocr:languageDataFileNotFound', lang);
        end
    end
    
else % cell array of languages strings
    
    % check custom language strings. When multiple custom languages are
    % specified, they must all be custom languages (i.e. 'English' and a
    % custom language is not allowed).
    
    isCustomLanguage = cellfun(@(x)checkIfCustomLanguage(x),userLanguage); %#ok<EMFH>
       
    isMixedCustomAndNonCustomLanguages = any(isCustomLanguage) && ~all(isCustomLanguage);
    
    if isMixedCustomAndNonCustomLanguages        
        error(message('vision:ocr:customAndNonCustom'));
    end
            
    if all(isCustomLanguage)
        
        % all had at least tessdata/*.traineddata
        isCustomLanguage = true;
               
        lang    = fixFilesep(userLanguage);                
        pathstr = cell(numel(lang),1);
        for i = 1:numel(lang)
           pathstr{i} = fileparts(lang{i});
        end
        
        % all language data files must be in the same tessdata folder
        if numel(unique(pathstr)) > 1
            error(message('vision:ocr:notUniqueLanguagePath'));
            % this will fail if one is a relative path and the other is a
            % full path to the same folder.
        end
        
        % All language data files must be accessible
        for i = 1:numel(lang)
            if ~exist(lang{i},'file')
                error(message('vision:ocr:languageDataFileNotFound',lang{i}));
            end
        end
    else        
        % check non-custom language strings
        isSupportPackageInstalled = vision.internal.ocr.ocrSpkgInstalled();
        for i = 1:numel(userLanguage)            
            lang{i} = vision.internal.ocr.validateLanguage(userLanguage{i}, isSupportPackageInstalled);
        end        
        isCustomLanguage = false;
    end
end

% -------------------------------------------------------------------------
function checkCharacterSet(list)

validateattributes(list, {'char'},{},mfilename,'CharacterSet'); % allow empty ''

if ~isempty(list)
    % make sure it's a vector
    validateattributes(list, {'char'},{'vector'},mfilename,'CharacterSet');
end
% -------------------------------------------------------------------------
function str = checkTextLayout(layout)

str = validatestring(layout,{'Auto','Block','Line','Word','Character'},...
    mfilename,'TextLayout');

% -------------------------------------------------------------------------
function checkPreprocessBinaryImage(value)

validateattributes(value, {'numeric','logical'}, ...
    {'nonnan', 'scalar', 'real','nonsparse'}, mfilename, 'PreprocessBinaryImage');

% -------------------------------------------------------------------------
function defaults = getParamDefaults()

defaults.TextLayout   = coder.internal.const('Auto');
defaults.Language     = coder.internal.const('English');
defaults.CharacterSet = coder.internal.const('');
defaults.PreprocessBinaryImage  = true;

% -------------------------------------------------------------------------
function params = setParams(userInput, language, textLayout, isCustomLanguage)

params.TextLayout        = textLayout;
params.Language          = language;
params.CharacterSet      = userInput.CharacterSet;
params.UsingCharacterSet = userInput.UsingCharacterSet;
params.isCustomLanguage  = coder.internal.const(isCustomLanguage);
params.PreprocessBinaryImage = logical(userInput.PreprocessBinaryImage);


% -------------------------------------------------------------------------
% Parse tesseract parameters
% -------------------------------------------------------------------------
function tessOpts = parseParams(params)

% Specify tesseract variable names as the fields of setVariable. The
% variable values should be specified as strings.
tessOpts.setVariable.tessedit_pageseg_mode = getTextLayout(params);

if params.UsingCharacterSet
    tessOpts.setVariable.tessedit_char_whitelist = params.CharacterSet;
end

% enable save_blob_choices to save individual character confidence values.
tessOpts.setVariable.save_blob_choices = 'T';

[tessdata, lang] = getLanguageInfo(params);

tessOpts.tessdata     = tessdata;
tessOpts.lang         = lang;

% Specify tesseract initialization variables names as the fields of
% initVariable. The variable values should be specified as strings.
tessOpts.initVariable = [];

% -------------------------------------------------------------------------
% codegen: Parse tesseract parameters
% -------------------------------------------------------------------------
function tessOpts = codegenParseParams(params)

textLayout = getTextLayout(params); 

if params.UsingCharacterSet
    charSet = params.CharacterSet;
else
    charSet = '';
end    

[tessdata,lang] = getLanguageInfo(params);

tessOpts.textLayout   = textLayout;
tessOpts.characterSet = charSet;
tessOpts.tessdata     = tessdata;
tessOpts.lang         = lang;

% -------------------------------------------------------------------------
% Return the parameter value used by tesseract to set the page segmentation
% mode (PSM). Setting other values for the page segmentation mode is not
% recommended.
% -------------------------------------------------------------------------
function textLayout = getTextLayout(params)

switch params.TextLayout
    case 'Auto'
        textLayout = '3'; 
    case 'Block'
        textLayout = '6';
    case 'Line'
        textLayout = '7';
    case 'Word'
        textLayout = '8';
    case 'Character'
        textLayout = '10';
    otherwise
        textLayout = '';  % codegen requires assignments for all paths               
end

% -------------------------------------------------------------------------
% Return the path to the tessdata folder and the language string.
% -------------------------------------------------------------------------
function [tessdata,lang] = getLanguageInfo(params)
coder.extrinsic('ctfroot','matlabroot','fullfile','regexpi');

if params.isCustomLanguage     
    % params.Language contains a validated file path to the Tesseract
    % language data file. The expected format is
    %
    %    'path/to/tessdata/foo.traineddata'
    %   
    % where foo is the language name.          
    if iscell(params.Language)
        
        % this section of code is not supported for code generation due to
        % limited cell array support.
        
        lang = cell(size(params.Language));
        for i = 1:numel(params.Language)
            indexStart = strfind(params.Language{i}, ['tessdata' filesep]) + 9; 
            indexEnd   = strfind(params.Language{i}, '.traineddata') - 1;
            lang{i} = params.Language{i}(indexStart(end):indexEnd(end));
        end
        if numel(lang) > 1
            % multiple language take the form "lang1+lang2+..."
            lang = strjoin(lang,'+');
        else
            lang = lang{1};
        end
        
        % find the string 'tessdata/foo.traineddata' located at the end of
        % language data path specified by the user.
        indexStart = regexpi(params.Language{1},...
            'tessdata[\/\\]+(\w+)\.traineddata$','start');     
        
        tessdata = params.Language{1}(1:indexStart-1);
        
        if isempty(tessdata)
            % tessdata located in the current directory
            tessdata = ['.' filesep];
        end                
    else      
        tessdata = getTessdataFromPath(params.Language);
        lang     = getLanguageFromPath(params.Language);                 
    end
    
else % a non-custom language
    lang     = vision.internal.ocr.convertLanguageToAlias(params.Language);       
    tessdata = vision.internal.ocr.locateTessdataFolder(lang);
end

% -------------------------------------------------------------------------
% Return the location of the tessdata folder from the path to a custom
% language data file.
%--------------------------------------------------------------------------
function tessdata = getTessdataFromPath(datapath)
indexStart = strfind(datapath,'tessdata');

% codegen: use isempty to check strfind result
if isempty(indexStart)
    start = 0;
else
    start = indexStart(end);
end

if start-1 == 0
    tessdata = './';
else
    tessdata = datapath(1:start-1);
end

% -------------------------------------------------------------------------
% Return language alias from the path to a custom language data file.
%--------------------------------------------------------------------------
function lang = getLanguageFromPath(datapath)

indexStart = strfind(datapath, 'tessdata') + 9;
indexEnd   = strfind(datapath, '.traineddata') - 1;

if isempty(indexStart) || isempty(indexEnd)
    lang = '';
else
    iStart = indexStart(end);
    iEnd   = indexEnd(end);
    lang   = datapath(iStart:iEnd);
end

% -------------------------------------------------------------------------
% Return the inputParser used for parameter parsing. The inputParser is
% created once and stored in a persistent variable to improve performance.
% -------------------------------------------------------------------------
function parser = getInputParser()
persistent p;
if isempty(p)   
    defaults = getParamDefaults();
    p = inputParser();      
    addOptional(p, 'ROI', []);
    addParameter(p, 'TextLayout',   defaults.TextLayout);
    addParameter(p, 'Language',     defaults.Language);
    addParameter(p, 'CharacterSet', defaults.CharacterSet);
    addParameter(p, 'PreprocessBinaryImage', ...
        defaults.PreprocessBinaryImage);
    
    parser = p;
else
    parser = p;
end


% -------------------------------------------------------------------------
function modifiedLang = fixFilesep(userLanguage)
% Fix filesep for current platform. This converts PC style \ to unix style
% / or vice versa. This prevents failures when loading tesseract data
% files.
if vision.internal.ocr.isCodegen()
    % this function is not used in codegen, but codegen requires outputs to
    % be assigned on all execution paths.  
    modifiedLang = userLanguage;
else
    modifiedLang = regexprep(userLanguage,'[\/\\]',filesep);
end

% -------------------------------------------------------------------------
% Clear the TESSDATA_PREFIX environment variable if it is set. This enables
% ocr to use the tessdata files specified using file paths instead of
% defaulting to the location of TESSDATA_PREFIX.
% -------------------------------------------------------------------------
function [isSet, prefix] = unsetTessDataPrefix()
coder.extrinsic('setenv','getenv')

if isSimOrMex()
    prefix = getenv('TESSDATA_PREFIX');
    if isempty(prefix)
        isSet = false;
    else
        setenv('TESSDATA_PREFIX','');
        isSet = true;
    end
else
    isSet  = false;
    prefix = '';
end

% -------------------------------------------------------------------------
% Check whether we are in sim or mex mode
% -------------------------------------------------------------------------
function tf = isSimOrMex()

tf = isempty(coder.target) || coder.target('MEX');