Level Extreme platform
Corporate profile
Products & Services
Saving web site
16/02/2015 13:11:43
16/02/2015 12:16:52
General information
Coding, syntax and commands
Environment versions
VB 9.0
Windows Server 2012
Windows 2008 Server
MS SQL Server
Thread ID:
Message ID:
>I'm downloading a web page in c#
>HTML = wc.DownloadString(fullHTMLString);
>I would like to be able to save a copy of the webpage (or part of the page) in the string as a pdf.
>Any ideas on the best way to do this .
>The need is to have an non editable copy of the page to prove its state at the time of the download. so any ideas about how to achieve that.

You can save Html to Pdf with Evo PDF or any other similar libraries. Here is my method when I use Evo Pdf:
    ' Convert a Html page to Pdf
    Public Function HtmlToPdf() As Boolean
        Dim lcDirectory As String = ""
        Dim lcFile As String = ""
        Dim lcKeyWord As String = ""
        Dim lcLicenseKey As String = ""
        Dim lcUrl As String = ""
        Dim llSuccess As Boolean = False
        Dim loByte() As Byte = Nothing
        Dim loConversionSummary As EvoPdf.ConversionSummary = Nothing
        Dim loPdfConverter As EvoPdf.PdfConverter = New EvoPdf.PdfConverter
        Dim loPdfDocument As EvoPdf.Document = Nothing
        Dim loPdfPage As EvoPdf.PdfPage = Nothing

        ' Reset the values
        cMessage = ""

        ' Initialization
        lcDirectory = Trim(cDirectory)
        lcFile = Trim(cFile)
        lcKeyWord = Trim(cKeyword)
        lcLicenseKey = Trim(cLicenseKey)
        lcUrl = Trim(cUrl)

        ' Initialization
        lcDirectory = oApp.GetPath(lcDirectory)

        loPdfConverter.PdfDocumentOptions.PdfPageSize = EvoPdf.PdfPageSize.A4
        loPdfConverter.PdfDocumentOptions.PdfCompressionLevel = EvoPdf.PdfCompressionLevel.Normal
        loPdfConverter.PdfDocumentOptions.LeftMargin = 5
        loPdfConverter.PdfDocumentOptions.RightMargin = 5
        loPdfConverter.PdfDocumentOptions.TopMargin = 5
        loPdfConverter.PdfDocumentOptions.BottomMargin = 5
        loPdfConverter.PdfDocumentOptions.InternalLinksEnabled = True
        loPdfConverter.PdfDocumentOptions.LiveUrlsEnabled = False

        ' This is needed for HTML page that take a certain time to be generated
        ' Without that, this may result in a PDF being cut and not always be generated in the same way.
        loPdfConverter.ConversionDelay = 5

        ' If we have a keyword
        If lcKeyWord.Length > 0 Then
            loPdfConverter.PdfDocumentInfo.Keywords = lcKeyWord
        End If

        ' If we have a license key
        If lcLicenseKey.Length > 0 Then
            loPdfConverter.LicenseKey = lcLicenseKey
        End If

        ' Html to PDF

            ' Get the URL into a PdfDocument object
            loPdfDocument = loPdfConverter.GetPdfDocumentObjectFromUrl(lcUrl)

            ' Get the conversion summary object from the event arguments
            loConversionSummary = loPdfConverter.ConversionSummary

            ' If we have to remove the first page
            If lRemoveFirstPage Then

                ' Get an object to page 1
                loPdfPage = loPdfDocument.Pages(0)

                ' Remove that page 1

            End If

            ' Save the PdfDocument object into a Byte array
            loByte = loPdfDocument.Save()

            System.IO.File.WriteAllBytes(lcDirectory + lcFile, loByte)
            llSuccess = True
        Catch loError As Exception
            cMessage = loError.Message
        End Try

        ' Reset the values
        cKeyword = ""
        cLicenseKey = ""
        cUrl = ""
        lRemoveFirstPage = False

        Return llSuccess
    End Function
You can also save Html to MHTML. This is what I would recommend in this case. For that, I am using aspNetMHT:
    ' Get the MHTML file
    Public Function GetMHTML() As Boolean
        Dim lcDirectory As String = ""
        Dim lcFile As String = ""
        Dim lcLicenseKey As String = ""
        Dim lcUrl As String = ""
        Dim llSuccess As Boolean = False
        Dim loFileDirectory As Framework.FileDirectory = Nothing
        Dim loFileFunction As Framework.FileFunction = Nothing
        Dim loMHT As aspNetMHT.MHT = Nothing
        Dim loRowFile As DataRow = Nothing

        ' Get the proper definition as per the current scope
        If oProcess Is Nothing Then
            loFileDirectory = New FileDirectory(oApp)
            loFileFunction = New FileFunction(oApp)
            loFileDirectory = New FileDirectory(oProcess)
            loFileFunction = New FileFunction(oProcess)
        End If

        ' Reset the values
        cMessage = ""
        nMessage = 0

        ' Initialization
        lcFile = Trim(cFile)
        lcLicenseKey = Trim(cLicenseKey)
        lcUrl = Trim(cUrl)

        ' Initialization
        lcDirectory = oApp.GetPath(cDirectory)

        ' If the server does not respond, we have to loop to the next record

            ' Initialization

            ' Load the Url
            loMHT = New aspNetMHT.MHT(lcUrl)

            ' Parse the HTML into its MHT counterpart

            ' Save it to a file
            loMHT.SaveToFile(lcDirectory + lcFile)

            ' If we have to check for a maximum file size
            If lMaximumFileSize Then

                ' If we cannot get the files
                loFileDirectory.cDirectory = oApp.cHttpFat + "Temp"
                loFileDirectory.lLogError = lLogError
                If Not loFileDirectory.GetFile() Then
                    cMessage = loFileDirectory.cMessage
                    Exit Try
                End If

                ' Get access to the file
                loRowFile = loFileDirectory.oRows(0)

                ' If this is bigger than the limit
                If loRowFile("Size") > nMaximumFileSize Then
                    nMessage = 10
                    cMessage = cDirectory
                    Exit Try
                End If

            End If

            ' If we have to make the Html available
            If lMakeHtmlAvailable Then

                ' If we cannot create a string from the file
                loFileFunction.cFile = lcDirectory + lcFile
                loFileFunction.lLogError = False
                If Not loFileFunction.FileToString() Then
                    cMessage = loFileFunction.cMessage
                    Return False
                End If

                ' Initialization
                cHtml = loFileFunction.cString

            End If

            llSuccess = True
        Catch loException As Exception

            ' If we log the error
            If lLogError Then

                ' Get the proper definition as per the current scope
                If oProcess Is Nothing Then
                End If


                ' If this is one of those messages
                If oApp.Inlist(loException.Message, "Operation aborted", "The system cannot locate the resource specified.") Then
                    cMessage = loException.Message
                    nMessage = 8
                    Return False
                End If

                cMessage = loException.Message
                nMessage = 9
            End If

        End Try

        ' Reset the values
        cDirectory = ""
        cFile = ""
        cLicenseKey = ""
        lLogError = True
        lMakeHtmlAvailable = False
        lMaximumFileSize = False
        nMaximumFileSize = 0

        Return llSuccess
    End Function
Michel Fournier
Level Extreme Inc.
Designer, architect, owner of the Level Extreme Platform
Subscribe to the site at https://www.levelextreme.com/Home/DataEntry?Activator=55&NoStore=303
Subscription benefits https://www.levelextreme.com/Home/ViewPage?Activator=7&ID=52

Click here to load this message in the networking platform