#import "msxml3.dll" |
using namespace MSXML2; // For Msxml3.dll. |
|
#include "stdio.h" |
#include "tchar.h" |
#include <regex> |
|
using namespace std; |
using namespace std::tr1; |
|
#define URL_SIZE 2048 |
#define TITLE_SIZE 1024 |
#define DILBERT_RSS_FEED "http://www.stickfigurehamlet.com/stickfigurehamlet.rss" |
//#define DILBERT_RSS_FEED "http://www.phdcomics.com/gradfeed_justcomics.php" |
//#define DILBERT_RSS_FEED "http://feedproxy.google.com/DilbertDailyStrip" |
|
//#pragma comment(lib, "rpcrt4") |
|
HRESULT __fastcall UnicodeToAnsi(LPCOLESTR pszW, LPSTR* ppszA) |
{ |
|
ULONG cbAnsi, cCharacters; |
DWORD dwError; |
|
// If input is null then just return the same. |
if (pszW == NULL) |
{ |
*ppszA = NULL; |
return NOERROR; |
} |
|
cCharacters = wcslen(pszW)+1; |
// Determine number of bytes to be allocated for ANSI string. An |
// ANSI string can have at most 2 bytes per character (for Double |
// Byte Character Strings.) |
cbAnsi = cCharacters*2; |
|
// Use of the OLE allocator is not required because the resultant |
// ANSI string will never be passed to another COM component. You |
// can use your own allocator. |
*ppszA = (LPSTR) CoTaskMemAlloc(cbAnsi); |
if (NULL == *ppszA) |
return E_OUTOFMEMORY; |
|
// Convert to ANSI. |
if (0 == WideCharToMultiByte(CP_ACP, 0, pszW, cCharacters, *ppszA, |
cbAnsi, NULL, NULL)) |
{ |
dwError = GetLastError(); |
CoTaskMemFree(*ppszA); |
*ppszA = NULL; |
return HRESULT_FROM_WIN32(dwError); |
} |
|
return NOERROR; |
} |
|
|
HRESULT __fastcall AnsiToUnicode(LPCSTR pszA, LPWSTR* ppszW) |
{ |
ULONG cCharacters; |
DWORD dwError; |
|
// If input is null then just return the same. |
if (NULL == pszA) |
{ |
*ppszW = NULL; |
return NOERROR; |
} |
|
// Determine number of wide characters to be allocated for the |
// Unicode string. |
cCharacters = strlen(pszA)+1; |
|
// Use of the OLE allocator is required if the resultant Unicode |
// string will be passed to another COM component and if that |
// component will free it. Otherwise you can use your own allocator. |
*ppszW = (LPOLESTR) CoTaskMemAlloc(cCharacters*2); |
if (NULL == *ppszW) |
return E_OUTOFMEMORY; |
|
// Covert to Unicode. |
if (0 == MultiByteToWideChar(CP_ACP, 0, pszA, cCharacters, *ppszW, cCharacters)) |
{ |
dwError = GetLastError(); |
CoTaskMemFree(*ppszW); |
*ppszW = NULL; |
return HRESULT_FROM_WIN32(dwError); |
} |
|
return NOERROR; |
} |
|
void dump_com_error(_com_error &e) |
{ |
printf("Error\n"); |
printf("\a\tCode = %08lx\n", e.Error()); |
printf("\a\tCode meaning = %s", e.ErrorMessage()); |
_bstr_t bstrSource(e.Source()); |
_bstr_t bstrDescription(e.Description()); |
printf("\a\tSource = %s\n", (LPCSTR) bstrSource); |
printf("\a\tDescription = %s\n", (LPCSTR) bstrDescription); |
} |
|
int main() |
{ |
HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); |
if (hr != S_OK) |
return 0; |
|
bool encounteredError = false; |
|
IXMLHTTPRequestPtr pXMLHTTPReq = NULL; |
MSXML2::IXMLDOMDocumentPtr pXMLDocPtr = NULL; |
MSXML2::IXMLDOMNodeListPtr pItemNodeList = NULL; |
MSXML2::IXMLDOMElementPtr pItemElement = NULL; |
|
try |
{ |
// Create an XMLHTTPRequest object to request the feed |
hr = pXMLHTTPReq.CreateInstance(__uuidof(MSXML2::XMLHTTP30)); |
if (FAILED(hr)) |
throw hr; |
|
// open the request |
hr = pXMLHTTPReq->open(_bstr_t(_T("GET")), _bstr_t(DILBERT_RSS_FEED), _variant_t(VARIANT_TRUE)); |
//hr = pXMLHTTPReq->open(_bstr_t(_T("GET")), _bstr_t(DILBERT_RSS_FEED), _variant_t(VARIANT_FALSE)); |
if (FAILED(hr)) |
throw hr; |
|
// Set the headers |
hr = pXMLHTTPReq->setRequestHeader(_bstr_t(_T("charset")), _bstr_t(_T("UTF-8"))); |
if (FAILED(hr)) |
throw hr; |
|
// Send the request |
hr = pXMLHTTPReq->send(NULL); |
if (FAILED(hr)) |
throw hr; |
|
long readyState = READYSTATE_UNINITIALIZED; |
MSG msg; |
while (readyState != READYSTATE_COMPLETE) |
{ |
// Without this message pump, readyState does not change. |
if (PeekMessage(&msg, 0, 0 ,0, PM_REMOVE)) |
{ |
TranslateMessage(&msg); |
DispatchMessage(&msg); |
} |
|
readyState = pXMLHTTPReq->GetreadyState(); |
} |
|
long nStatus = 0; |
hr = pXMLHTTPReq->get_status(&nStatus); |
if (FAILED(hr)) |
throw hr; |
|
// Process the feed if the response was received successfully |
if (nStatus == 200) |
{ |
// Retrieve the RSS XML DOM Document to process the RSS |
// Feed results and extract the comic strip's images info |
|
// Retrieve the XML DOM Document from the response |
BSTR bstrString = NULL; |
hr = pXMLHTTPReq->get_responseText(&bstrString); |
printf("Response Body:\r\n%S\r\n", bstrString); |
|
hr = pXMLHTTPReq->get_responseXML((IDispatch **) &pXMLDocPtr); |
if (FAILED(hr)) |
throw hr; |
|
BSTR bstrXMLDoc = NULL; |
pXMLDocPtr->get_text(&bstrXMLDoc); |
printf("XML Response:\r\n%S\r\n", bstrXMLDoc); |
|
// Retrieve the list of "item" elements |
pItemNodeList = pXMLDocPtr->getElementsByTagName(_bstr_t(_T("item"))); |
if (FAILED(pItemNodeList)) |
throw hr; |
|
//Here, if we're in error pDomNode is NULL |
if (pItemNodeList != NULL) |
{ |
long nItems = 0; |
hr = pItemNodeList->get_length(&nItems); |
if (FAILED(hr)) |
throw hr; |
|
for (int i = 0; (i < (int)nItems) && (encounteredError == false); i++) |
{ |
WCHAR rssTitle[TITLE_SIZE]; |
WCHAR rssLink[URL_SIZE]; |
WCHAR rssComicURL[URL_SIZE]; |
WCHAR rssComicFileName[MAX_PATH+1]; |
|
// Retrieve the ith item element |
pItemElement = pItemNodeList->item[i]; |
if (pItemElement != NULL) |
{ |
// Retrieve the title text |
_tcscpy_s(rssTitle, pItemElement->firstChild->text); |
|
// Retrieve the link element |
MSXML2::IXMLDOMNodeListPtr pLinkNodes = NULL; |
MSXML2::IXMLDOMElementPtr pLinkElement = NULL; |
|
pLinkNodes = pItemElement->getElementsByTagName(_T("link")); |
if (pLinkNodes != NULL) |
{ |
long nLinkElements = 0; |
hr = pLinkNodes->get_length(&nLinkElements); |
if (FAILED(hr)) |
throw hr; |
|
if (nLinkElements == 1) |
{ |
pLinkElement = pLinkNodes->item[0]; |
|
// Retrieve the link |
if (pLinkElement != NULL) |
_tcscpy_s(rssLink, pLinkElement->text); |
} |
} |
|
// Retrieve the description element |
MSXML2::IXMLDOMNodeListPtr pSummaryNodes = NULL; |
MSXML2::IXMLDOMElementPtr pSummaryElement = NULL; |
|
pSummaryNodes = pItemElement->getElementsByTagName(_bstr_t(_T("description"))); |
if (pSummaryNodes != NULL) |
{ |
long nSummaryElements = 0; |
hr = pSummaryNodes->get_length(&nSummaryElements); |
if (FAILED(hr)) |
throw hr; |
|
if (nSummaryElements == 1) |
{ |
pSummaryElement = pSummaryNodes->item[0]; |
|
// Retrieve the description |
if (pSummaryElement != NULL) |
{ |
LPSTR szRssSummary; |
CHAR szRssComicURL[MAX_PATH+1]; |
UnicodeToAnsi(pSummaryElement->text, &szRssSummary); |
|
// Retrieve the image URL |
const regex imageurl("\\b(https?|ftp)://([-a-zA-Z0-9.]+)(/[-a-zA-Z0-9+&@#/%=~_|!:,.;]*)?(gif|png|jpg)"); |
cmatch matches; |
|
if (regex_search(szRssSummary, matches, imageurl)) |
{ |
strcpy_s(szRssComicURL, matches[0].str().c_str()); |
#ifdef _UNICODE |
LPWSTR pszRssComicURL; |
AnsiToUnicode(szRssComicURL, &pszRssComicURL); |
_tcscpy_s(rssComicURL, URL_SIZE, pszRssComicURL); |
#else |
_tcscpy_s(rssComicURL, URL_SIZE, matches[0].str().c_str()); |
#endif |
} |
else |
{ |
_tcscpy_s(rssComicURL, URL_SIZE, _T("Not found")); |
} |
|
//const regex imageFileName("\\b(https?|ftp)://([-a-zA-Z0-9.]+)(/[-a-zA-Z0-9+&@#/%=~_|!:,.;]*)?(gif|png|jpg)"); |
const regex imageFileName("[\\w_.-]*?(?=\\?)|[\\w_.-]*$"); |
if (regex_search(szRssComicURL, matches, imageFileName)) |
{ |
#ifdef _UNICODE |
LPWSTR pszImageFileName; |
AnsiToUnicode(matches[0].str().c_str(), &pszImageFileName); |
_tcscpy_s(rssComicFileName, MAX_PATH+1, pszImageFileName); |
#else |
_tcscpy_s(rssComicFileName, MAX_PATH+1, matches[0].str().c_str()); |
#endif |
} |
else |
{ |
_tcscpy_s(rssComicFileName, MAX_PATH+1, _T("Not found")); |
} |
} |
} |
} |
} |
else |
{ |
encounteredError = true; |
} |
} |
encounteredError = false; |
} |
else |
{ |
encounteredError = true; |
} |
} |
else |
{ |
encounteredError = true; |
//cout << "Error selecting XML single node" ; |
} |
} |
catch(_com_error &e) |
{ |
bool encounteredError = true; |
dump_com_error(e); |
} |
|
return 0; |
} |
|