#! /usr/bin/perl -w
# Network Forensics Puzzle Contest #3
# Alan Tu <alantu@as2.info>
# January 16, 2010

# http_rx.pl v1.01
# Extracts HTTP response bodies from output of either "Follow TCP Stream" in Wireshark or stream.pl
# Usage: http_rx.pl filename
# Outputs the response bodies to filename.1, filename.2, etc

use strict;
use HTTP::Response; # use this module to parse the HTTP response content
use Digest::MD5;

my $fn = shift or die "Usage: $0 filename\n";
die "$fn does not exist\n" unless -f $fn;
open(IN, $fn) or die "$!\n";
binmode(IN);
local $/ = undef; # slurp the whole file
my $data = <IN>;
close(IN);

my $message = 1; # the number of the current HTTP response message
my $ptr = 0; # pointer to the beginning of the current message
while ($ptr < length($data))
{
    my $r = HTTP::Response->parse(substr($data, $ptr)); # create HTTP::Response object
    my $l = $r->header("content-length"); # get the content-length if specified
    my $message_offset = index(substr($data, $ptr), "\x0d\x0a\x0d\x0a") + 4; # start of the HTTP content
    $l = 0 if $r->code == 304; # no data for 304 responses
    if (defined($l)) # content length is known
    {
        $r = HTTP::Response->parse(substr($data, $ptr, $message_offset + $l)); # recreate HTTP::Response object of the correct length
    }

    my $c = $r->decoded_content;
open(OUT, ">$fn.$message"); # print out the decoded HTTP response
    binmode(OUT);
    print OUT $c;
    printf(STDERR "Writing file %s, length %d bytes, MD5 hash %s\n", "$fn.$message", length($c), Digest::MD5::md5_hex($c));
    close(OUT);

    exit unless defined($l); # if no length was specified, nothing more to do
    $ptr += $message_offset + $l; # skip past the HTTP header and past the current message to the next one
    $message++;
}

