gusucode.com > VC_C++源码,界面编程,网页爬虫源码程序 > VC_C++源码,界面编程,网页爬虫源码程序/code/webpageloader_SourceCode/RegExp1.cpp
#include "stdafx.h" #include "regexp1.h" // Does simple Pattern Matching (not regular expression). // Recognises *, ? and [] with ranges. // Although this function uses TCHAR it is not // multibyte enabled. BOOL MatchPatterns(LPCTSTR String, LPCTSTR Patterns) { CString sPatterns = Patterns; while( !sPatterns.IsEmpty() ) { CString sPattern = sPatterns.SpanExcluding("|"); if( MatchPattern(String, sPattern) ) return TRUE; sPatterns = sPatterns.Mid(sPattern.GetLength()); sPatterns.TrimLeft("|"); } return FALSE; } BOOL MatchPattern(LPCTSTR String, LPCTSTR Pattern) { TCHAR c, p, l; for (; ;) { switch( p = *Pattern++ ) { case _T('\0'): // end of pattern return TRUE; // SUCCESS case _T('*'): while (*String) { // match zero or more char if( MatchPattern(String++, Pattern) ) return TRUE; } return MatchPattern(String, Pattern); case _T('?'): if( *String++ == 0 ) // match any one char return FALSE; // not end of string break; case _T('['): if( (c = *String++) == 0 ) // match char set return FALSE; // syntax c = (TCHAR)_totupper(c); l = _T('0'); while( p = *Pattern++ ) { if( p==_T(']') ) // if end of char set, then return FALSE; // no match found if( p==_T('-') ) { // check a range of chars? p = *Pattern; // get high limit of range if( p == _T('\0') || p == _T(']') ) return FALSE; // syntax if( c >= l && c <= p ) break; // if in range, move on } l = p; if( c == p ) // if char matches this element break; // move on } while( p && p != _T(']') ) // got a match in char set p = *Pattern++; // skip to end of set break; default: c = *String++; if( (TCHAR)_totupper(c) != p ) // check for exact char return FALSE; // not a match break; } } }