#!/usr/bin/python

import string
from urlparse import urlparse
from hweb_config import *
import re
import random
import time
import shelve


def parse_req(path):
        """Parses out URL path and returns the values neccesary to search for a match"""
        result = []
        path_lower = string.lower(path)
        file = urlparse(path_lower)
        path = file[2]
        url_split = string.splitfields(path,'/')
        url_len = len(url_split) -1
        last = url_split[url_len]
        try:
                [comm,exten] = string.split(last,'.')
                result = [exten]
        except ValueError:
		req_len = len(last) -1
		if req_len == -1:
                	result = ['dir']
		else: 
                	result = ['none']
        result.append(path)
        return  result


def rand_serv(server_type):
        """Returns random server strings"""
        if server_type == 'random_win':
                r_serv = random.choice(w_serv.keys())
                server_string = w_serv[r_serv]
        elif server_type == 'random_unix':
                r_serv = random.choice(u_serv.keys())
                server_string = u_serv[r_serv]
        else:
                tmp = random.randint(0,1)
                if tmp < 1:
                        r_serv = random.choice(w_serv.keys())
                        server_string = w_serv[r_serv]
                else:
                        r_serv = random.choice(u_serv.keys())
                        server_string = u_serv[r_serv]


        return server_string

def get_match(path):
        """Pattern matching routine. Uses input returned from parse_req"""
        [search_tree, req] = parse_req(path)
        result = rand_serv('random_any') 
	sig_test = 0
	r_code = 200
	file = 'index.html'

	"""Search Generic Dict"""
        for k, v in generic.items():
               pattern = re.compile((k),re.IGNORECASE)
               match = pattern.search(path)
               if match:
                        result = v[0]
			z = len(v)
			if result[:5] == 'Micro':
				family = 'random_win'
			else:
				family = 'random_unix'
			if z !=1:
				r_code = v[1]
				if z == 3:
					file = v[2]
			sig_test = 1
                        break
               else:
			if persist == 1:
				if not check_bogus(req, path):
					r_code = 404
					family = 'random_any'
				else:
                               		result = rand_serv('random_any')
					family = 'random_any'
        if extensions.has_key(search_tree) and sig_test !=1:
                for k, v in extensions[search_tree].items():
                        pattern = re.compile((k),re.IGNORECASE)
                        match = pattern.search(req)
                        if match:
                                result = v[0]
                                family = extensions[search_tree]['request_default'][0]
				sig_test = 1
				z = len(v)
				if z !=1:
					r_code = v[1]
					if z == 3:
						file = v[2]
				sig_test = 1

		if sig_test == 0:
			if persist == 1:
				if not check_bogus(req, path):
					r_code = 404
			try:	
                       		result = extensions[search_tree]['request_default'][0]
			except KeyError:
       				keyerror = open('log/keyerror.txt', 'a')
       				keyerror.write('No Match:' + path + '\n')
       				keyerror.close()
			"""This searches for random  something as a default server
			   if found it passes it on to the rand_serv. Otherwise the 
		   	default server must be static
			"""
			if result[:6] == 'random':
                       		result = rand_serv(extensions[search_tree]['request_default'][0])
                        	family = extensions[search_tree]['request_default'][0]
			else:
				"""If a static server was found we need to give it a server
			   	family to be from otherwise a request could not exist 
			   	from that server"""
				if result[:5] == 'Micro': 
					family = 'random_win'
				else:
					family = 'random_unix'
	if sig_test != 1:
       		newsigs = open('log/newsigs.txt', 'a')
       		newsigs.write('No Match:' + path + '\n')
       		newsigs.close()
	servers = [result, family, r_code, file]
        return servers 


def persist_ver(path,ip,SERVER,cache):
	""" It would be better to cache server class initially so check doesn't need to be run on subsequent requests """
	""" Family needs to not be added with random_any as a value. Family needs to be determined from the inital server version return like the loops below. """
	persistent = shelve.open(cache)
	server_class = ''
	if persist == 1:
		if persistent.has_key(ip):
			n_time = time.time()
			[server, c_time] = persistent[ip][:2]
			if persist_time > n_time - c_time: 
				result = server
				[tmp_serv, family, resp_code, file] = get_match(path)
				if resp_code !=200:
					"""This would mean it failed bogus check. There is no need to do other checks.
					   if code was 200 we must check server families
					"""
				elif family != 'random_any':
					""" Family is random_any which means there hsould be no 404 returned becasue of 
  					    family. Loops below check if they are not random_any
					"""
					for k, v in u_serv.items():
						if server == v:
							server_class = 'random_unix'
							if family != 'random_unix':
								resp_code = '404'
					for k, v in w_serv.items():
						if server == v:	
							server_class = 'random_win'
							if family != 'random_win':
								resp_code = '404'
				
			else: 
				[result, family, resp_code, file] = get_match(path)
				c_time = time.time()
				""" quick hack to get family correct. Will make more elequent later"""
				if family !='random_any':	
					if SERVER !=0:
						result = SERVER	
					persistent[ip]=[result, c_time, family]
				else:
					found = 0
					for k, v in u_serv.items():
						if result == v:
							family = 'random_unix'
							if SERVER !=0:
								result = SERVER	
							persistent[ip]=[result, c_time, family]
							found = 1
					if found == 0:
						for k, v in w_serv.items():
							if result == v:
								family = 'random_win'
								if SERVER !=0:
									result = SERVER	
								persistent[ip]=[result, c_time, family]
		else:
			[result, family, resp_code, file] = get_match(path)
			c_time = time.time()
			""" quick hack to get family correct. Will make more elequent later"""
			if family != 'random_any' and SERVER !=0:	
				w = 0
				x = 0
				y = 0
				z = 0
				for k,v in u_serv.items():
					if result == v:
						w = 1
					if SERVER == v:
						x = 1
				for k,v in w_serv.items():
					if result == v:
						y = 1
					if SERVER == v:
						z = 1
				if w == 1 and x == 1 and resp_code == 200:
					resp_code = 200
				elif y == 1 and z == 1 and resp_code == 200:
					resp_code = 200
				else:
					resp_code = 404
				result = SERVER	
				persistent[ip]=[result, c_time, family]
			else:
				found = 0
				for k, v in u_serv.items():
					if result == v:
						family = 'random_unix'
						if SERVER !=0:
							result = SERVER	
						persistent[ip]=[result, c_time, family]
						found = 1
				if found == 0:
					for k, v in w_serv.items():
						if result == v:
							family = 'random_win'
							if SERVER !=0:
								result = SERVER	
							persistent[ip]=[result, c_time, family]
	else:
		[result, family, resp_code, file] = get_match(path)
	s_class = persistent[ip][2]
	persistent.close()
	return [result, resp_code, s_class, file]
			 
def check_bogus(req, path):
	"""Check for Bogus Requests"""
	strict = shelve.open('urls-strict')
	spath = path +'\n'
	if strict.has_key(spath):
		strict.close()
		return 1
	pattern = re.compile('[\w\s]{10}')
	match = pattern.search(req)
	if match:
		return 0
	for b in Bog_Words:
       		pattern = re.compile((b),re.IGNORECASE)
		match = pattern.search(path)
		if match:
	 		return 0
	return 1
		
 	

def serve_file(VER,path,file):
	if VER[:5] == 'Micro':
        	in_file = open('html/'+ file,'r')
        	tmp = in_file.read()
        	in_file.close()
		return tmp
	else:
		in_file = open('html/'+file, 'r')
		tmp = in_file.read()
		try:
			html = tmp % (path,VER)
		except TypeError: 
			html = tmp 
		in_file.close() 
		return html
	

def send_error_page(VER,path,server_class,code):
	if server_class == 'random_unix':
		page = "html/error-pages/apache_" + str(code) + ".html"
		in_file = open(page, 'r')
		tmp = in_file.read()
		html = tmp % (path,VER)
		in_file.close() 
		return  html
	elif server_class == 'random_win':	
		page = "html/error-pages/win_" + str(code) + ".html"
		in_file = open(page, 'r')
		html = in_file.read()
		in_file.close() 
		return html 
	
	elif VER[:5] == 'Micro':
		page = "html/error-pages/win_" + str(code) + ".html"
		in_file = open(page, 'r')
		html = in_file.read()
		in_file.close() 
		return  html
	else:
		page = "html/error-pages/apache_" + str(code)  + ".html"
		in_file = open(page, 'r')
		tmp = in_file.read()
		html = tmp % (path,VER)
		in_file.close() 
		return html 
