gusucode.com > VC_C++源码,界面编程,网页爬虫源码程序 > VC_C++源码,界面编程,网页爬虫源码程序/code/webpageloader_SourceCode/ThreadFunctions.cpp
// ThreadFunctions.cpp: implementation of the ThreadFunctions class. // ////////////////////////////////////////////////////////////////////// #include "stdafx.h" #include "WebPageLoader.h" #include "ThreadFunctions.h" #ifdef _DEBUG #undef THIS_FILE static char THIS_FILE[]=__FILE__; #define new DEBUG_NEW #endif ////////////////////////////////////////////////////////////////////// // Helper functions ////////////////////////////////////////////////////////////////////// BOOL BfxHasValidExtension(LPCTSTR pstrFilename, LPCTSTR pstrExtensions) { ASSERT(AfxIsValidString(pstrFilename)); ASSERT(AfxIsValidString(pstrExtensions)); // Check if it's a image.... // Extract extension first CString sFilename( pstrFilename ); sFilename.MakeLower(); int pos; pos = sFilename.Find(_T('?')); if( pos>=0 ) sFilename = sFilename.Left(pos); pos = sFilename.ReverseFind(_T('.')); if( pos<0 ) return FALSE; CString sExt; sExt = sFilename.Mid(pos+1); pos = sExt.ReverseFind(_T('/')); if( pos>=0 ) sExt = sExt.Mid(pos); // Mask off some traditional URL extensions pos = sExt.Find(_T('?')); if( pos>0 ) sExt = sExt.Left(pos); pos = sExt.Find(_T('#')); if( pos>0 ) sExt = sExt.Left(pos); // Anything left? if( sExt.IsEmpty() ) return FALSE; // Scan known extensions // We look through the defined list of known image extensions CString sFilter( pstrExtensions ); sFilter.MakeLower(); while( !sFilter.IsEmpty() ) { CString tok = BfxRemoveToken(sFilter, _T(';')); if( sExt==tok ) return TRUE; }; return FALSE; }; BOOL BfxIsValidSession(CSession *pSession) { ASSERT_VALID(pSession); if( pSession==NULL ) return FALSE; // NOTE: This is a rough copy of the CObject::IsKindOf() method. if( ::IsBadCodePtr((FARPROC)pSession) ) return FALSE; if( ::IsBadReadPtr(pSession,sizeof(CSession)) ) return FALSE; if( ::IsBadReadPtr(*(void**)pSession, sizeof(void*)) ) return FALSE; if( ::IsBadReadPtr(pSession, pSession->GetRuntimeClass()->m_nObjectSize)) return FALSE; return TRUE; }; ////////////////////////////////////////////////////////////////////// // // ImageScanSession // ////////////////////////////////////////////////////////////////////// UINT DownloadSessionThread( LPVOID pParam ) { CSession *pSession = (CSession *) pParam; if( !BfxIsValidSession(pSession) ) return 1; TRACE(_T("Thread %d starts.\n"), pSession->m_hThread); TRY { pSession->Start(); // First we want to validate the session... bool bOk = true; { CSingleLock lock( *pSession, TRUE ); switch( pSession->m_Type ) { case TYPE_IMAGESCAN: if( pSession->m_sFormat.IsEmpty() ) bOk = false; if( pSession->m_nStartIndex > pSession->m_nStopIndex ) bOk = false; break; case TYPE_HTMLSCAN: break; default: // Not a valid type bOk = false; break; }; } if( !bOk ) { pSession->Done(); return 1; }; } CATCH_ALL( e ) { // oops... TRACE("WebPageLoader: Thread exception caught!\n"); pSession->Done(); return 1; } END_CATCH_ALL CString sProxy; sProxy.Format(_T("http://%s:%ld"), pSession->m_Preferences->m_sProxy, pSession->m_Preferences->m_iPort); CInternetSession inet( pSession->m_Preferences->m_sAgentName, pSession->m_iUniqueID, pSession->m_Preferences->m_bUseProxy ? INTERNET_OPEN_TYPE_PROXY : PRE_CONFIG_INTERNET_ACCESS, pSession->m_Preferences->m_bUseProxy ? (LPCTSTR) sProxy : NULL ); TRY { pSession->SetState(STATE_RUNNING); CDownloadFile *pFile = NULL; int nItem = 0; while( TRUE ) { // Download all scheduled files while( (pFile = pSession->m_Files.GetNextDownload() ) != NULL ) { pFile->Download(pSession, &inet, ++nItem); // Do a little checking if( !BfxIsValidSession(pSession) ) return 0; // EMERGENCY // Do we need to stop if( pSession->m_bSleepRequest || pSession->m_bStopRequest || pSession->m_bKillRequest ) { pSession->SafeLog(LOGTYPE_WARNING, IDS_LOG_INTERRUPT); break; } }; // Reschedule all broken files BOOL res; res = pSession->m_Files.RescheduleBrokenDownloads(pSession->m_Settings.m_nDownloadRetries); if( res ) pSession->Log(LOGTYPE_LOG, IDS_LOG_RESCHEDULED); // No files to download anymore? if( pSession->m_Files.GetNextDownload()==NULL ) { break; } // Do we need to stop if( pSession->m_bSleepRequest || pSession->m_bStopRequest || pSession->m_bKillRequest ) { break; } }; inet.Close(); } CATCH_ALL( e ) { // oops... TRACE("WebPageLoader: Thread download exception caught!\n"); inet.Close(); } END_CATCH_ALL // Done TRACE(_T("Thread %d is done.\n"), pSession->m_hThread); pSession->Done(); return 0; };