Level Extreme platform
Subscription
Corporate profile
Products & Services
Support
Legal
Français
Stripping HTML keeping Text
Message
From
17/11/2010 18:25:13
 
General information
Forum:
Visual FoxPro
Category:
Coding, syntax & commands
Miscellaneous
Thread ID:
01489477
Message ID:
01489485
Views:
62
>>>>I'm pulling HTML off the internet and into a string then a memo field. I only want the text. How do I go about cleaning the HTML out of the string?
>>>
>>>Did you try using InnerText property?
>>>Re: Control eh? Thread #1488307 Message #1488318
>>
>>I am no longer using the WEBBR4OWSER41 - It's Tore's code with which I am playing.
>
>What is the code you're using? I think in either case you need to look for InnerText property to get text only.


Here is the code (so far)

*Function GetDataFrom URL
LPARAMETERS pcUrlName
DECLARE INTEGER InternetOpen IN wininet.DLL STRING sAgent, ;
INTEGER lAccessType, STRING sProxyName, ;
STRING sProxyBypass, INTEGER lFlags

DECLARE INTEGER InternetOpenUrl IN wininet.DLL ;
INTEGER hInternetSession, STRING sUrl, STRING sHeaders,;
INTEGER lHeadersLength, INTEGER lFlags, INTEGER lContext

DECLARE INTEGER InternetReadFile IN wininet.DLL INTEGER hfile, ;
STRING @sBuffer, INTEGER lNumberofBytesToRead, INTEGER @lBytesRead

DECLARE short InternetCloseHandle IN wininet.DLL INTEGER hInst

#DEFINE INTERNET_OPEN_TYPE_PRECONFIG 0
#DEFINE INTERNET_OPEN_TYPE_DIRECT 1
#DEFINE INTERNET_OPEN_TYPE_PROXY 3
#DEFINE SYNCHRONOUS 0
#DEFINE INTERNET_FLAG_RELOAD 2147483648
#DEFINE CR CHR(13)

local lsAgent, lhInternetSession, lhUrlFile, llOk, lnOk, lcRetVal, lcReadBuffer, lnBytesRead
pcUrlName = "http://runaway-human-population.blogspot.com/"
* what application is using Internet services?
lsAgent = "VPF 9.0"

lhInternetSession = InternetOpen( lsAgent, INTERNET_OPEN_TYPE_PRECONFIG, ;
'', '', SYNCHRONOUS)

* debugging line - uncomment to see session handle
* WAIT WINDOW "Internet session handle: " + LTRIM(STR(hInternetSession))

IF lhInternetSession = 0
WAIT WINDOW "Internet session cannot be established" TIME 2
RETURN .null.
ENDIF

lhUrlFile = InternetOpenUrl( lhInternetSession, pcUrlName, '', 0, ;
INTERNET_FLAG_RELOAD, 0)

* debugging line - uncomment to see URL handle
* WAIT WINDOW "URL Handle: " + LTRIM(STR(hUrlFile))

IF lhUrlFile = 0
WAIT WINDOW "URL cannot be opened" Timeout 5
RETURN .null.
ENDIF

lcRetVal = ""
llOk = .t.

DO WHILE llOK
* set aside a big buffer
lsReadBuffer = SPACE(32767)
lnBytesRead = 0
lnOK = InternetReadFile( lhUrlFile, @lsReadBuffer, LEN(lsReadBuffer), @lnBytesRead)

if ( lnBytesRead > 0 )
lcRetVal = lcRetVal + left( lsReadBuffer, lnBytesRead )
endif

* error trap - either a read failure or read past eof()
llOk = ( lnOK = 1 ) and ( lnBytesRead > 0 )
ENDDO

? lcRetVal
I ain't skeert of nuttin eh?
Yikes! What was that?
Previous
Reply
Map
View

Click here to load this message in the networking platform