Question

When I connect to some sites, it gives me:

Content-Type: text/html; charset=ISO-8859-1

Connection: close

Transfer-Encoding: chunked

Date: Tue, 01 Jan 2013 18:49:53 GMT   


fff8

And at the end of the file, it looks like:

</script><!-- vBadvanced 1-3-9-4-8-0 -->

</body>
</html

1

>

0

But when I do stackoverflow.com, it prints out perfectly fine.. It may have extra blank lines in the source but that's fine.. Why does the other site add numbers though?

How can I fix it? Also, how can I separate that header from the html itself?

My code is as follows:

#define _WIN32_WINNT 0x501

#include <iostream>
#include <winsock2.h>
#include <ws2tcpip.h>
#include <stdio.h>
#include <fstream>
#include <vector>

using namespace std;

void Get(string WebPage)
{
    WSADATA wsaData;
    string Address;
    struct addrinfo *result;
    struct sockaddr_in  *sockaddr_ipv4;

    char Buffer[99000];

    string Header = "GET / HTTP/1.1\r\n";
    Header += "Host: " + WebPage + "\r\n";
    Header += "Connection: close\r\n";
    Header += "\r\n";

    if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0) return;

    SOCKET Socket = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);

    getaddrinfo(WebPage.c_str(), NULL, NULL, &result);
    if (result->ai_family == AF_INET)
    {
        sockaddr_ipv4 = (struct sockaddr_in *) result->ai_addr;
        Address = inet_ntoa(sockaddr_ipv4->sin_addr);
    }
    freeaddrinfo(result);


    SOCKADDR_IN SockAddr;
    memset(&SockAddr, 0, sizeof(SockAddr));
    SockAddr.sin_port = htons(80);
    SockAddr.sin_family = AF_INET;
    SockAddr.sin_addr.s_addr = inet_addr(Address.c_str());

    if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) == SOCKET_ERROR) return;

    if (send(Socket, Header.c_str(), Header.size(), 0) == SOCKET_ERROR) return;
    shutdown(Socket, SD_SEND);

    std::string Response;

    int bytes = 1;
    while (bytes > 0)
    {
        bytes = recv(Socket, Buffer, sizeof(Buffer), 0);
        Buffer[bytes] = '\0';
        Response.append(Buffer, bytes);
    };

    closesocket(Socket);
    WSACleanup();
}

int main()
{
    Get("google.com");
}
Was it helpful?

Solution

See this wiki page: http://en.wikipedia.org/wiki/Chunked_transfer_encoding

Each of these hex numbers (chunk length) is followed by the actual chunk data (payload) of the specified size, immediately followed by another chunk length. If chunk length is zero, no further data bytes follow (eof). These elements are separated by line breaks. I'm not sure, whether the content you posted can be catenated correctly, it seems, you'd need to handle multiple, contiguous line breaks. Just look at the page and its source in a browser.

EDIT:

Just found this sniffing tool, it displays all the details I'd like to know in your situation:

http://web-sniffer.net/

OTHER TIPS

This function will 'unchunk' your HTTP data - In VB6, but you'll get the idea (really OLD CODE)

Private Function UnChunk(Indata As String) As String
  If InStr(LCase(Indata), "transfer-encoding:") = 0 And InStr(LCase(Indata), "chunked") = 0 Then
    'not chunked, so return the input
    UnChunk = Indata
    Exit Function
  End If
  'can't let this crash
  On Error GoTo returnInData

  Dim crlfstart As Long
  Dim crlfend As Long
  Dim chunksize As Long

  'first, get header, which ends with 2 line feeds
  crlfstart = InStr(Indata, vbCrLf & vbCrLf)
  If crlfstart = 0 Then
    'invalid http
    UnChunk = Indata
    Exit Function
  End If
  UnChunk = Left(Indata, crlfstart + 2)

  'start looking for vbCrLf
  crlfstart = InStr(crlfstart + 2, Indata, vbCrLf)
  Do While crlfstart > 0
    'find the next vbCrLf
    crlfend = InStr(crlfstart + 1, Indata, vbCrLf)

    If crlfend > crlfstart And crlfend - crlfstart < 10 Then
      'convert the HEX string to the chunksize
      chunksize = Val("&h" & Mid(Indata, crlfstart + 2, crlfend - (crlfstart + 2)))
      'by spec, if 0 then no more data
      If chunksize > 0 Then
        'there's more data
        'this should be unnecessary, but one more test
        If Mid(Indata, crlfend + 2 + chunksize, 2) = vbCrLf Then
          UnChunk = UnChunk & Mid(Indata, crlfend + 2, chunksize)
        Else
          'oops, failed
          Exit Do
        End If
      Else
        'there's no more data so return what we have
        Exit Function
      End If
    End If
    'look again
    crlfstart = InStr(crlfstart + 1, Indata, vbCrLf)
  Loop
  'just in case this fails, return the input data
returnInData:
  UnChunk = Indata
End Function
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top