General information
Title:
Grabbing Web HTML Content
Hi All,
I thought I would start my first Excel VBA routine by doing something simple... automating the download and parsing web content into a spreadsheet...
I decided to use the WinInet API to do this as it looked relatively simple.
I managed to write code which could download html from an Intranet site, but as soon as I tried to get content from an external site, I got proxy authentication errors. I tried to use the HTTPQueryInfo to trap for this, and set the appropriate username and password, but the program just hangs when I call the HTTPQueryInfo API. Can anyone explain why?
Thanks Mace
Const INTERNET_OPEN_TYPE_PRECONFIG = 0
Const INTERNET_OPEN_TYPE_DIRECT = 1
Const INTERNET_OPEN_TYPE_PROXY = 3
Const INTERNET_OPTION_USERNAME = 28
Const INTERNET_OPTION_PASSWORD = 29
Const INTERNET_OPTION_PROXY_USERNAME = 43
Const INTERNET_OPTION_PROXY_PASSWORD = 44
Const INTERNET_SERVICE_FTP = 1
Const INTERNET_SERVICE_GOPHER = 2
Const INTERNET_SERVICE_HTTP = 3
Const HTTP_QUERY_FLAG_REQUEST_HEADERS = &H80000000
Const HTTP_QUERY_FLAG_NUMBER = &H20000000
Const HTTP_QUERY_STATUS_CODE = 19
Const scUserAgent = "VB OpenUrl"
Const INTERNET_FLAG_RELOAD = &H80000000
Const HTTP_STATUS_BAD_REQUEST = 400 ' invalid syntax
Const HTTP_STATUS_DENIED = 401 ' access denied
Const HTTP_STATUS_PAYMENT_REQ = 402 ' payment required
Const HTTP_STATUS_FORBIDDEN = 403 ' request forbidden
Const HTTP_STATUS_NOT_FOUND = 404 ' object not found
Const HTTP_STATUS_BAD_METHOD = 405 ' method is not allowed
Const HTTP_STATUS_NONE_ACCEPTABLE = 406 ' no response acceptable to client found
Const HTTP_STATUS_PROXY_AUTH_REQ = 407 ' proxy authentication required
Const HTTP_STATUS_REQUEST_TIMEOUT = 408 ' server timed out waiting for request
Const HTTP_STATUS_CONFLICT = 409 ' user should resubmit with more info
Const HTTP_STATUS_GONE = 410 ' the resource is no longer available
Const HTTP_STATUS_LENGTH_REQUIRED = 411 ' the server refused to accept request w/o a length
Const HTTP_STATUS_PRECOND_FAILED = 412 ' precondition given in request failed
Const HTTP_STATUS_REQUEST_TOO_LARGE = 413 ' request entity was too large
Const HTTP_STATUS_URI_TOO_LONG = 414 ' request URI too long
Const HTTP_STATUS_UNSUPPORTED_MEDIA = 415 ' unsupported media type
Public Declare Function InternetOpen Lib "wininet.dll" Alias "InternetOpenA" _
(ByVal sAgent As String, ByVal lAccessType As Long, ByVal sProxyName As String, _
ByVal sProxyBypass As String, ByVal lFlags As Long) As Long
Public Declare Function InternetConnect Lib "wininet.dll" Alias "InternetConnectA" (ByVal hInternetSession As Long, _
ByVal sServerName As String, ByVal nServerPort As Integer, _
ByVal sUsername As String, ByVal sPassword As String, _
ByVal lService As Long, ByVal lFlags As Long, _
ByVal lContext As Long) As Long
Private Declare Function HttpSendRequest Lib "wininet.dll" Alias "HttpSendRequestA" ( _
ByVal hHttpRequest As Long, _
ByVal lpszHeaders As String, _
ByVal dwHeadersLength As Long, _
ByVal lpOptional As String, _
ByVal dwOptionalLength As Long) As Boolean
Public Declare Function HttpOpenRequest Lib "wininet.dll" _
Alias "HttpOpenRequestA" ( _
ByVal hHttpSession As Long, _
ByVal sVerb As String, _
ByVal sObjectName As String, _
ByVal sVersion As String, _
ByVal sReferer As String, _
ByVal something As Long, _
ByVal lFlags As Long, _
ByVal lContext As Long) _
As Long
Public Declare Function InternetOpenUrl Lib "wininet.dll" Alias "InternetOpenUrlA" _
(ByVal hOpen As Long, ByVal sUrl As String, ByVal sHeaders As String, _
ByVal lLength As Long, ByVal lFlags As Long, ByVal lContext As Long) As Long
Private Declare Function InternetSetOption Lib "wininet.dll" _
Alias "InternetSetOptionA" _
(ByVal hInternet As Long, _
ByVal dwOption As Long, _
lpBuffer As Long, _
ByVal dwBufferLength As Long) As Boolean
Public Declare Function InternetReadFile Lib "wininet.dll" _
(ByVal hFile As Long, ByVal sBuffer As String, ByVal lNumBytesToRead As Long, _
lNumberOfBytesRead As Long) As Integer
Public Declare Function HttpQueryInfo Lib "wininet.dll" _
Alias "HttpQueryInfoA" _
(ByVal hHttpRequest As Long, ByVal lInfoLevel As Long, ByRef sBuffer _
As Any, ByRef lBufferLength As Long, ByRef lIndex As Long) As Integer
Public Declare Function InternetCloseHandle Lib "wininet.dll" _
(ByVal hInet As Long) As Integer
Public Function GetHTMLFromURL(sUrl As String, sRequest As String) As String
Dim s As String
Dim hOpen As Long
Dim hOpenUrl As Long
Dim bDoLoop As Boolean
Dim bRet As Boolean
Dim sReadBuffer As String * 2048
Dim lNumberOfBytesRead As Long
Dim hSession As Long
Dim hRequest As Long
Dim lCode As Long
Dim sStatus As String
Dim lStatusSize As Long
hOpen = InternetOpen(scUserAgent, INTERNET_OPEN_TYPE_PROXY, "10.1.1.23:80", "ch1srv10;ch1srv16;ch1srv12", 0)
GetHTMLFromURL = "Can not open Internet Connection: " & Err.LastDllError
If hOpen = 0 Then GoTo EndFunc
hSession = InternetConnect(hOpen, sUrl, 80, "mskeels", "mks5041a", INTERNET_SERVICE_HTTP, 0, 0)
GetHTMLFromURL = "Can not connect to the Internet : " & Err.LastDllError
If hSession = 0 Then GoTo EndFunc
'hOpenUrl = InternetOpenUrl(hOpen, sUrl, vbNullString, 0, INTERNET_FLAG_RELOAD, 0)
hRequest = HttpOpenRequest(hSession, "GET", sRequest, "HTTP/1.0", vbNullString, 0, INTERNET_FLAG_RELOAD, 0)
GetHTMLFromURL = "Can not open Request: " & Err.LastDllError
If hRequest = 0 Then GoTo EndFunc
resend:
llStatus = HttpSendRequest(hRequest, vbNullString, 0, vbNullString, 0)
GetHTMLFromURL = "Can not send Request: " & Err.LastDllError
If Not llStatus Then GoTo EndFunc
lCode = HTTP_QUERY_FLAG_NUMBER Or HTTP_QUERY_STATUS_CODE
llStatus = HttpQueryInfo(hRequest, lCode, sStatus, lStatusSize, Null)
' This code handles the authentication
Select Case sStatus
Case HTTP_STATUS_PROXY_AUTH_REQ
llStatus = InternetSetOption(hRequest, INTERNET_OPTION_PROXY_USERNAME, "myproxyuser", 12)
llStatus = InternetSetOption(hRequest, INTERNET_OPTION_PROXY_PASSWORD, "myproxypassword", 16)
GoTo resend
Case HTTP_STATUS_DENIED
llStatus = InternetSetOption(hRequest, INTERNET_OPTION_USERNAME, "mysiteuser", 11)
llStatus = InternetSetOption(hRequest, INTERNET_OPTION_PASSWORD, "mysitepassword", 15)
GoTo resend
End Select
s = ""
bDoLoop = True
While bDoLoop
sReadBuffer = vbNullString
bRet = InternetReadFile(hRequest, sReadBuffer, Len(sReadBuffer), lNumberOfBytesRead)
s = s & Left$(sReadBuffer, lNumberOfBytesRead)
If Not CBool(lNumberOfBytesRead) Then bDoLoop = False
Wend
GetHTMLFromURL = s
EndFunc:
If hRequest <> 0 Then InternetCloseHandle (hOpenUrl)
If hSession <> 0 Then InternetCloseHandle (hSession)
If hOpen <> 0 Then InternetCloseHandle (hOpen)
End Function
Next
Reply
View the map of this thread
View the map of this thread starting from this message only
View all messages of this thread
View all messages of this thread starting from this message only