#!/usr/bin/ruby
#    Copyright (C) 2010 Richard Springs
#    This program is free software: you can redistribute it and/or
#    modify it under the terms of the GNU General Public License as
#    published by the Free Software Foundation, either version 3 of
#    the License, or (at your option) any later version.
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    General Public License for more details.
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see
#    <http://www.gnu.org/licenses/>.

require 'rubygems'
require 'singleton'
require 'trollop'
require 'packetfu'

# parse command line options
opts = Trollop::options {
	  version "transmute.rb / v1.01 | richardsprings"  
	  banner <<-EOS
	  Usage: transmute.rb [options]
	  EOS
	  opt :file, "pcap file for HTTP analysis", :type => String
	  opt :eth, "write MAC and IP address table to a file"
	  opt :ip, "filter traffic to only include traffic to and from IP address", :type => String
	  opt :Verbose, "display verbose information"
	}
# check for mandatory arguments
Trollop::die :file, "not specified" if opts[:file].nil?


# save file
def savetofile(object, filename)
  # save attachment to file
  op_file = File.open(filename.to_s, "wb")
	op_file.write(object)
	op_file.close
end
  
   
def L2_analyzeframe(fourteenbytes)
  framestandard=String.new
  ethdst = String.new
  ethsrc = String.new 
  # destination MAC located in first 6 bytes (byte offset 0-5)
  ethdst = fourteenbytes[0..5].unpack('H*').to_s
  # source MAC located in next 6 bytes (byte offset 6-11)
  ethsrc = fourteenbytes[6..11].unpack('H*').to_s 
  # Determine framing standard - EthII or 802.2 (byte offset 12-13)
  typlen=fourteenbytes[12..13].unpack('H*').to_s
  if typlen.hex > 1500
    framestandard = case typlen
      # Detect most common EthII Ethertypes
      when "0800" : "EthernetII_IPv4"
      when "0806" : "EthernetII_ARP"
      else "Undetermined EthernetII Ethertype"
    end
  else
      framestandard = "Undetermined 802.2 framing standard (LLC or LLC/SNAP)"
  end
  return ethdst, ethsrc, framestandard
end


def L3_analyzepacket(sixtybytes)
  # IP header length in bytes (byte offset 0 - low order nibble)
  ihl = 4 * sixtybytes[0..0].unpack('H*').to_s[1..1].hex
  # IP protocol (byte 9)
  ipprotvalue = sixtybytes[9..9].unpack('H*').to_s
  ipprot = case ipprotvalue
    when "06" : "TCP"
    when "11" : "UDP"
    when "01" : "ICMP"  
    else "Undetermined IP protocol"
  end
  # source IP address (byte offset 12-15)
  srcipaddress = sixtybytes[12..12].unpack('H*').to_s.hex.to_s + "." + sixtybytes[13..13].unpack('H*').to_s.hex.to_s + "." + sixtybytes[14..14].unpack('H*').to_s.hex.to_s + "." + sixtybytes[15..15].unpack('H*').to_s.hex.to_s
  # destination IP address (byte offset 16-19)
  dstipaddress = sixtybytes[16..16].unpack('H*').to_s.hex.to_s + "." + sixtybytes[17..17].unpack('H*').to_s.hex.to_s + "." + sixtybytes[18..18].unpack('H*').to_s.hex.to_s + "." + sixtybytes[19..19].unpack('H*').to_s.hex.to_s
  return ihl, ipprot, srcipaddress, dstipaddress
end


def L4_analyzeTCPsegment(sixtybytes)
  # TCP source port (byte offset 0-1) 
  srcport = sixtybytes[0..1].unpack('H*').to_s.hex
  # TCP destination port (byte offset 2-3)
  dstport = sixtybytes[2..3].unpack('H*').to_s.hex
  # TCP sequence number (byte offset 4-7)
  seqnum = sixtybytes[4..7].unpack('H*').to_s.hex
  # Offset / TCP header length (byte offset 12)
  offset = 4 * sixtybytes[12..12].unpack('H*').to_s[0..0].hex
  # binary representation of TCP flags (byte offset 13)
  flags = sixtybytes[13..13].unpack('B*').pack('M')
  # TCP flags
  txtflags = String.new
  if flags[7..7] == "1": txtflags += "FIN" end
  if flags[6..6] == "1": txtflags += "SYN" end
  if flags[5..5] == "1": txtflags += "RST" end
  if flags[4..4] == "1": txtflags += "PSH" end
  if flags[3..3] == "1": txtflags += "ACK" end
  if flags[2..2] == "1": txtflags += "URG" end
  return srcport, dstport, seqnum, offset, txtflags  
end


class State
  include Singleton

  def initialize
    # Layer 2,3
    @maciptable=Hash.new
    # Layer 4
    @statetable = Hash.new
    @isntable = Hash.new
    # Layer 7 - regex matching on HTTP requests and tracking of HTTP request type
    @knownpatterns = Hash.new
    @knownpatterns["HTTP_GET"] = Regexp.new(/^^GET.*HTTP\/1\..\r\n/)
    @knownpatterns["HTTP_POST"] = Regexp.new(/^^POST.*HTTP\/1\..\r\n/)
    @traffictable = Hash.new
    # Layer 7 - tracking of HTTP request/responses
    @HTTPindex = Array.new
    @HTTPrequests = Array.new
      @hoststring = Array.new
    @HTTPresponses = Array.new
      @contentlength = Array.new
      @bytesreceived = Array.new
    
    # create directory for output
    @reportdirectory = "./conversations"    
    # delete existing and recreate report directory
    FileUtils.rm_rf @reportdirectory
    FileUtils.mkdir_p @reportdirectory
  end
  
  
  def l23updateMACIPtable(srceth,srcip,dsteth,dstip)
      @maciptable[srcip] = srceth
      @maciptable[dstip] = dsteth
  end
  
  def l23writeMACIPtable
    ethiptable = String.new
    @maciptable.sort.each {|ipaddress, macaddress| ethiptable << "#{macaddress} #{ipaddress}\n" }
    puts ethiptable
  end
  
  def l4TCPflags(srcip, srcport, dstip, dstport, flags, seqnum)
    flow1 = "#{srcip}:#{srcport}->#{dstip}:#{dstport}"
    flow2 = "#{dstip}:#{dstport}->#{srcip}:#{srcport}"
    # existing connection?
    if @statetable.has_key?(flow1) or @statetable.has_key?(flow2)
      # progress TCP 3 way handshakes
      if @statetable[flow2] == "SYN" and @statetable[flow1] == "CLOSED" and flags == "SYNACK" : @statetable[flow1] = flags and @isntable[flow1]=seqnum end
      if @statetable[flow1] == "SYN" and @statetable[flow2] == "SYNACK" and flags == "ACK" : @statetable[flow1] = "ESTABLISHED" and @statetable[flow2] = "ESTABLISHED" end
      # tear down connections (crude hack)
      if flags.include? "RST" or flags.include? "FIN"
         # close connections and delete isn values
         @statetable[flow1] = "CLOSED" and @statetable[flow2] = "CLOSED"
         @isntable[flow1] = "DELETED" and @isntable[flow1] = "DELETED"
         end
    # initial SYN
    elsif flags == "SYN"
      @statetable[flow1] = flags
      @statetable[flow2] = "CLOSED"
      @isntable[flow1] = seqnum        
    end
    return flow1, @statetable[flow1], @isntable[flow1]
  end


  def l7classifyprotocol(srcip, srcport, dstip, dstport, pload)    
    payloadclassification="UNKNOWN"
    flow1 = "#{srcip}:#{srcport}->#{dstip}:#{dstport}"
    flow2 = "#{dstip}:#{dstport}->#{srcip}:#{srcport}"
    # previously identified?
    if @traffictable.has_key?(flow1)
      payloadclassification = @traffictable[flow1]
    else
      # payload matches a known pattern?
      @knownpatterns.each do |key, value| 
        if pload.scan(value).length != 0
          payloadclassification = key
          # identify responses
          @traffictable[flow2] = payloadclassification+"_RESPONSE"
        end
      @traffictable[flow1] = payloadclassification
      end
    end
    return payloadclassification
  end


  def l7indexhttp(srcip, srcport, dstip, dstport)
    flow1 = "#{srcip}:#{srcport}->#{dstip}:#{dstport}"
    flow2 = "#{dstip}:#{dstport}->#{srcip}:#{srcport}"
    # does flow1 already exist? use the assigned index number
    if ! @HTTPindex.index(flow1).nil?
      position = @HTTPindex.index(flow1) + 1
    # does flow1 already exist? use the assigned index number
    elsif ! @HTTPindex.index(flow2).nil?
      position = @HTTPindex.index(flow2) + 1 
    # assign flow1 an index number
    else
      @HTTPindex << flow1
      position = @HTTPindex.index(flow1) + 1
    end
    return position
  end


  def l7writeHTTP(srcip, srcport, dstip, dstport, index)
    flow1 = "#{srcip}:#{srcport}->#{dstip}:#{dstport}"
    flow2 = "#{dstip}:#{dstport}->#{srcip}:#{srcport}"   
    # determine filenames - pad to 4 zeros
    requestfilename = sprintf("./#{@reportdirectory}/%04d-request", index)
    responsefilename = sprintf("./#{@reportdirectory}/%04d-response", index)
    # write filnames
    savetofile(@HTTPrequests[index].strip, requestfilename)
    savetofile(@HTTPresponses[index].strip, responsefilename)
    # update traffic table for subsequent HTTP requests/responses
    @traffictable.delete(flow1)
    @traffictable.delete(flow2)
    # update HTTPindex for subsequent HTTP requests/responses
    @HTTPindex[index-1]="" 
  end
  
  
  def l7modifyrequest(srcip, srcport, dstip, dstport, index)
    # manipulate HTTP request for OWASP webscarab compatibility
    if @hoststring[index].nil?
      hostheaderregex = Regexp.new(/^Host:.*\n/)
      if @HTTPrequests[index].scan(hostheaderregex).length > 0 
        hostheadervalue =  @HTTPrequests[index].scan(hostheaderregex)[0].split(" ")[1]
        # client destination port is the source port in the case as we are terminating the HTTP index by analyzing the response
        hoststring = "http://#{hostheadervalue}:#{srcport}"
        @HTTPrequests[index].insert(@HTTPrequests[index].index("/"),hoststring) 
        @hoststring[index]=hoststring
      end
    end
  end


  def l7processHTTPresponses(srcip, srcport, dstip, dstport, index, seqnum, pload)
    flow1 = "#{srcip}:#{srcport}->#{dstip}:#{dstport}"
    flow2 = "#{dstip}:#{dstport}->#{srcip}:#{srcport}"
    
    # Is this the beginning of a new HTTP response?
      # yes - find content length of response (content-length match should be case insensitive)
      
      httpresponse = Regexp.new(/^^HTTP\/1\..*$$/)
      if pload.scan(httpresponse).to_s.length != 0
        @bytesreceived[index] = 0
        responsebytes = @bytesreceived[index]
        @HTTPresponses[index] = ""
      # no - continuation of an HTTP response
      else 
        # contentlengthvalue = @contentlength[index]
        responsebytes = @bytesreceived[index]
        # entitystart = 0
      end  

      isn = @isntable[flow1]
      # compensate for sequence number wrap arounds (232)
      if seqnum < isn : seqnum += 4294967295 end
      # determine position in payload
      payloadposition = seqnum - isn - 1
      # insert padding if necessary
      if payloadposition > @HTTPresponses[index].length
        # determine difference in length
        padding = payloadposition -  @HTTPresponses[index].length
        # append padding for payload
        @HTTPresponses[index] += " " * padding
      end
       # append payload (HTTP response)
      @HTTPresponses[index][payloadposition..payloadposition+pload.length]=pload
      
      # identify if content length exists
      contentlengthregex = Regexp.new(/^Content-Length:.*\n/i)
      contentlengthvalue =  @HTTPresponses[index].scan(contentlengthregex)
      
      if contentlengthvalue.length > 0  
        @contentlength[index] = contentlengthvalue[0].split(" ")[1].to_i
        # determine entity start
        if ! @HTTPresponses[index].index("\r\n\r\n").nil?
          entitystart = @HTTPresponses[index].index("\r\n\r\n") + 4
        else
          entitystart = @HTTPresponses[index].index("\n\n") + 2 
        end
      # account for # of entity response bytes (counts response entity only)
      @bytesreceived[index] = @HTTPresponses[index].length - entitystart  
      else
      @contentlength[index] = -1
      @bytesreceived[index] = -1
      end
      return @bytesreceived[index], @contentlength[index]
  end


  def l7processHTTPrequests(srcip, srcport, dstip, dstport, index, seqnum, pload)
    flow1 = "#{srcip}:#{srcport}->#{dstip}:#{dstport}"
    flow2 = "#{dstip}:#{dstport}->#{srcip}:#{srcport}"    
    # initialize HTTPrequest array and insert URL (Webscarab compatibility), if necessary
    if @HTTPrequests[index].nil? : @HTTPrequests[index] = "" end     
    # calculate position in payload string
    isn = @isntable[flow1]
    # adjust for sequence number wrap arounds (232)
    if seqnum < isn : seqnum += 4294967295 end
    # determine position in payload
    payloadposition = seqnum - isn - 1
    # insert padding if necessary
    if payloadposition > @HTTPrequests[index].length  
      # determine difference in length
      padding = payloadposition - @HTTPrequests[index].length
      # append padding
      @HTTPrequests[index] += " " * padding
    end
     # append payload (HTTP response)
    @HTTPrequests[index][payloadposition..payloadposition+pload.length] = pload
    
    return @HTTPrequests[index].length    
  end

end


# initialize state
statetracker = State.instance

# open pcap and create an array of frames
frame_array = PacketFu::Read.f2a(:file => opts.file)

# report total number of packets in verbose mode
if ! opts.Verbose.nil?
  puts "# of packets = #{frame_array.length}"
end

# for each packet in the array of packets
frame_array.each do |frame| 

# report packet # in verbose mode
if ! opts.Verbose.nil?
  position = frame_array.index(frame)+1
  puts "\n\n<---- Frame #{position} ---->"
end


## Layer 2 - determine destination/source MAC addresses and framing standard
macheaderlength = 14
dstmac, srcmac, frame_type = L2_analyzeframe(frame[0..(macheaderlength-1)])

# report layer 2 information in verbose mode
if ! opts.Verbose.nil? 
  puts "-> Layer 2\nINFO:  Source MAC = #{srcmac}\nINFO:  Destination MAC = #{dstmac}\nINFO:  #{frame_type} detected as framing standard\nINFO:  Frame Length = #{frame.length}"
end


## Layer 3 - determine IP header length, IP protocol, source ip, and destination ip address
case frame_type
  when "EthernetII_IPv4" then ipheaderlength, ipprotocol, sourceipaddress, destinationipaddress = L3_analyzepacket(frame[macheaderlength..macheaderlength+60])
  when "EthernetII_ARP" then next
  else next
end 

# if IP address filter exists, skip packet if no match exists
if ! opts[:ip].nil? and sourceipaddress != opts[:ip] && destinationipaddress != opts[:ip] then next end

# update MAC / IP address table
statetracker.l23updateMACIPtable(srcmac, sourceipaddress, dstmac, destinationipaddress)

# report layer 3 information in verbose mode
if ! opts.Verbose.nil?
  puts "-> Layer 3\nINFO:  IP Packet Length = #{frame.length-14}\nINFO:  IP Header Length = #{ipheaderlength}\nINFO:  IP Protocol = #{ipprotocol}\nINFO:  Source IP address = #{sourceipaddress}\nINFO:  Destination IP address = #{destinationipaddress}"
end


## Layer 4 - determine pertinent segment information
case ipprotocol
  when "TCP" 
    # determine TCP src and destination ports, sequence #, offset, and flags
    sourceport, destinationport, tcpsequencenumber, l4headerlength, tcpflags = L4_analyzeTCPsegment(frame[(macheaderlength+ipheaderlength)..(macheaderlength+ipheaderlength+60)])
    # determine TCP payload length, accounting for the fact that frames with less than 60 bytes will pad to 60 bytes
    if frame.length > 60
      l4payloadlength = frame.length-l4headerlength-ipheaderlength-macheaderlength
    else
      l4payloadlength = 0
    end
    socket, state, isn = statetracker.l4TCPflags(sourceipaddress, sourceport, destinationipaddress, destinationport, tcpflags, tcpsequencenumber) 
  when "UDP" then next
  when "ICMP" then next
  else next
end

# report layer 4 information in verbose mode
if ! opts.Verbose.nil?
  puts "-> Layer 4\nINFO:  TCP Source Port = #{sourceport}\nINFO:  TCP Destination Port = #{destinationport}\nINFO:  TCP Sequence Number = #{tcpsequencenumber}\nINFO:  TCP Offset / Header Length = #{l4headerlength}\nINFO:  TCP Payload Length = #{l4payloadlength}\nINFO:  TCP flags: #{tcpflags}"
  puts "INFO:  #{socket} is in state #{state}\nINFO:  ISN is #{isn}"
end


## Layer 7 - determine application protocol information
case ipprotocol
  when "TCP" 
    # is the connection established and a TCP payload exists?
    if state == "ESTABLISHED" and l4payloadlength > 0
      payload = frame[macheaderlength+ipheaderlength+l4headerlength..frame.length]
      protclass = statetracker.l7classifyprotocol(sourceipaddress, sourceport, destinationipaddress, destinationport, payload)
      # HTTP?
      if protclass.include? "HTTP"
        # determine HTTP index
        httpindex = statetracker.l7indexhttp(sourceipaddress, sourceport, destinationipaddress, destinationport)
        # is this a HTTP response?
        if protclass.include? "RESPONSE"          
          response_bytes, response_content_length = statetracker.l7processHTTPresponses(sourceipaddress, sourceport, destinationipaddress, destinationport, httpindex, tcpsequencenumber, payload)
          # have we reached the content length? if so, it is time to prep and write the request / response pair
          if response_bytes == response_content_length
            statetracker.l7modifyrequest(sourceipaddress, sourceport, destinationipaddress, destinationport, httpindex)
            statetracker.l7writeHTTP(sourceipaddress, sourceport, destinationipaddress, destinationport, httpindex)
          end          
        else
          # no, it must be a HTTP request
          request_bytes = statetracker.l7processHTTPrequests(sourceipaddress, sourceport, destinationipaddress, destinationport, httpindex, tcpsequencenumber, payload)
        end
      end
    end
  when "UDP" then next
  when "ICMP" then next
  else next
end

# report layer 7 information in verbose mode
if ! opts.Verbose.nil?  
  puts "-> Layer 7\nINFO:  Protocol identified as #{protclass}\nINFO:  HTTP index # is #{httpindex}"
  if request_bytes != nil
    puts "INFO:  #{request_bytes} total request bytes received" 
  else
    puts "INFO:  #{response_bytes} of #{response_content_length} response bytes received"
  end
end

end

# if ethernet option is specified, dump MAC and IP address table to a file
if opts[:eth] : statetracker.l23writeMACIPtable  end

