Question

I am trying to reconstruct a webpage from a libpcap file from a python script. I have all the packets so the goal I guess is to have a libpcap file as input and you find all the necessary packets and somehow have a webpage file as output with all pictures and data from that page. Can anyone get me started off in the right direction. I think I will need dkpt and/or scaPY.

Update 1: Code is below! Here is the code I have come up so far with in Python. It is suppose to grab the first set of packets from a single HTTP session beginning with a packet with the SYN and ACK flags set to 1 and ends with a packet that has the FIN flag set to 1.

Assuming there is only one website visited during the packet capture does this code append all the necessary packets needed to reconstruct the visited webpage?

Assuming I have all the necessary packets how do I reconstruct the webpage?

import scaPy

pktList = list() #create a list to store the packets we want to keep
pcap = rdpcap('myCapture.pcap') #returns a packet list with every packet in the pcap
count = 0                       #will store the index of the syn-ack packet in pcap
for pkt in pcap:                #loops through packet list named pcap one packet at a time
    count = count + 1       #increments by 1
    if pkt[TCP].flags == 0x12 and pkt[TCP].sport == 80: #if it is a SYN-ACK packet session has been initiated as http
    break #breaks out of the for loop
currentPkt = count    #loop from here
while pcap[currentPkt].flags&0x01 != 0x01: #while the FIN bit is set to 0 keep loops stop when it is a 1 
    if pcap[currentPkt].sport == 80 and pcap[currentPkt].dport == pcap[count].dport and pcap[currentPkt].src == pcap[count].src and pcap[currentPkt].dst == pcap[count].dst:
            #if the src, dst ports and IP's are the same as the SYN-ACK packet then the http packets belong to this session and we want to keep them

        pktList.append(pcap[currentPkt])
#once the loop exits we have hit the packet with the FIN flag set and now we need to reconstruct the packets from this list.
        currentPkt = currentPkt + 1
Was it helpful?

Solution

Perhaps something like tcpick -r your.pcap -wRS does the job for you.

OTHER TIPS

This python script will extract all unencrypted HTTP webpages that are in a PCAP File and output them as HTML Files. It uses scaPY to work with the individual packets (another good python module is dpkt).

from scapy.all import *
from operator import *
import sys



def sorting(pcap):
    newerList = list()
        #remove everything not HTTP (anything not TCP or anything TCP and not HTTP (port 80)
    #count = 0 #dont need this it was for testing
    for x in pcap:
        if x.haslayer(TCP) and x.sport == 80 and bin(x[TCP].flags)!="0b10100": 
            newerList.append(x);
    newerList = sorted(newerList, key=itemgetter("IP.src","TCP.dport"))
    wrpcap("sorted.pcap", newerList)
    return newerList


def extract(pcap,num, count):
    listCounter = count
    counter = 0
    #print listCounter

    #Exit if we have reached the end of the the list of packets
    if count >= len(pcap):
        sys.exit()
    #Create a new file and find the packet with the payload containing the beginning HTML code and write it to file
    while listCounter != len(pcap):
        thisFile = "file" + str(num) + ".html"
        file = open(thisFile,"a")
        s = str(pcap[listCounter][TCP].payload)
        #print "S is: ", s
        x,y,z = s.partition("<")
        s = x + y + z #before was y+z
        if s.find("<html") != -1: 
            file.write(s)
            listCounter = listCounter + 1
            break
        listCounter = listCounter + 1

    #Continue to loop through packets and write their contents until we find the close HTML tag and 
    #include that packet as well
    counter = listCounter
    while counter != len(pcap):
        s =  str(pcap[counter][TCP].payload)
        if s.find("</html>") != -1:
            file.write(s)
            file.close
            break
        else:
            file.write(s)
            counter = counter + 1

    #Recursively call the function incrementing the file name by 1
    #and giving it the last spot in the PCAP we were in so we continue
    #at the next PCAP
    extract(pcap, num+1, counter)


if __name__ == "__main__":
    #Read in file from user
    f = raw_input("Please enter the name of your pcap file in this directory.  Example: myFile.pcap")
    pcapFile  = rdpcap(f)
    print "Filtering Pcap File of non HTTP Packets and then sorting packets"
    #Sort and Filter the PCAP
    pcapFile = sorting(pcapFile)
    print "Sorting Complete"
    print "Extracting Data"
    #Extract the Data
    extract(pcapFile,1,0)
    Print "Extracting Complete"
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top