#!/usr/bin/env python

# IMPORTANT: If any module was installed by Easy_install, tools have to be imported before importing them in order to set up a CACHE directory for python-eggs.

import os, random, time, string, re, cPickle
from socket import gethostname

# some egg package need a cache directory. usually it is at users' home. but for apache you have to set up one with writting permission
#os.putenv('PYTHON_EGG_CACHE', '/var/tmp/.python-eggs') # putenv doest take effect immediately.
py_egg_dir = "/var/tmp/.python-eggs"
os.environ["PYTHON_EGG_CACHE"] = py_egg_dir #Note: On some platforms, including FreeBSD and Mac OS X, setting environ may cause memory leaks

# configuration files:
Daemon_Host = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'DAEMON_HOST')
MPMDB_INI = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'mpmdb.ini')
CLUST_NODES = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'CLUSTER_NODES')
try:
	exec open(Daemon_Host).read()
except: pass
RESERVED_USERNAME = ('root', 'admin', 'administrator', 'demo', 'guest')

def CPUInfo():
	# processor: No. of core or thread. from 0 to the total number of core/thread
	# physical id: ID of CPU. from 0 to the total number of CPU
	# siblings: number of cores/threads within a CPU
	# core id: Id of cores within a CPU
	# cpu cores: core number within a CPU
	try: info = os.popen('cat /proc/cpuinfo').read()
	except: return {'n_threads':1, 'n_cpus':1, 'n_cores':1}
	# the re.findall in python 2.3 cannot accept a third parameter, then use "(?m)" for multiple lines.
	procs = re.findall(r'(?m)^processor\s*:\s*(\d+)\b', info) #, re.M)
	cpu_ids = re.findall(r'(?m)^physical id\s*:\s*(\d+)\b', info) #, re.M)
	n_cores = re.findall(r'(?m)^cpu cores\s*:\s*(\d+)\b', info) #, re.M)
	n_cores = map(lambda a:int(a), n_cores)
	n_cores = dict(zip(cpu_ids, n_cores))
	n_cpus = len(n_cores)
	n_cores = sum(n_cores.values())
	if os.path.exists(CLUST_NODES): 
		nodes = filter(lambda b:b, map(lambda a:a.strip(), open(CLUST_NODES).readlines()))
	else: 
		#import socket
		#nodes = [socket.gethostname()] * n_cores
		nodes = [gethostname()] * n_cores
	return {'n_threads':len(procs), 'n_cpus':n_cpus, 'n_cores':n_cores, 'nodes':nodes }

def UniqueNameWithinListSeq(prefix='', suffix='', names=[], num_len=4, start_num=0): # name can be a directory or a list of names
	i = start_num
	tmpname = prefix + str(i).zfill(num_len) + suffix
	if type(names) in (type(''), type(u'')):
		names = os.listdir(names)
		#while os.path.exists(os.path.join(dir ,tmpname)):
		#	i = i+1
		#	tmpname = prefix + str(i).zfill(num_len) + suffix
	# dir must be list
	while tmpname in names:
		i = i+1
		tmpname = prefix + str(i).zfill(num_len) + suffix
	return tmpname


def UniqueNameSeq(prefix='', suffix='', dir='.', num_len=4, start_num=0):
	i = start_num
	tmpname = prefix + str(i).zfill(num_len) + suffix
	while os.path.exists(os.path.join(dir ,tmpname)):
		i = i+1
		tmpname = prefix + str(i).zfill(num_len) + suffix
	return tmpname

def TmpUniqueNameSeq(prefix='tmp', suffix='.tmp', dir='.', num_len=4, start_num=0):
	return UniqueNameSeq(prefix=prefix, suffix=suffix, dir=dir, num_len=num_len, start_num=start_num)

def UniqueNameWithinListRand(prefix='', suffix='', names='.', num_len=6): # names can be a directory or a list of names
	tmpname = prefix + str(int(random.random()*10**num_len)).zfill(num_len) + suffix
	if type(dir) in (type(''), type(u'')):
		names = os.listdir(names)
	while tmpname in names: #os.path.exists(os.path.join(dir ,tmpname)):
		tmpname = prefix + str(int(random.random()*10**num_len)).zfill(num_len) + suffix
	return tmpname

def UniqueNameRand(prefix='', suffix='', dir='.', num_len=6):
	tmpname = prefix + str(int(random.random()*10**num_len)).zfill(num_len) + suffix
	while os.path.exists(os.path.join(dir ,tmpname)):
		tmpname = prefix + str(int(random.random()*10**num_len)).zfill(num_len) + suffix
	return tmpname

def TmpUniqueNameRand(prefix='tmp', suffix='.tmp', dir='.', num_len=6):
	return UniqueNameRand(prefix=prefix, suffix=suffix, dir=dir, num_len=num_len)

def ExpDesign(chs):
	design = []
	error_msg = ""
	error_design = 'Error in the experiment design!'
	has_ref = None
	if len(chs) < 2: error_msg = error_design
	else:
		#has_ref = None
		for k in chs: # check if there two channels are the same (-- that is not correct)
			if k[0] == k[1]:
				error_msg = error_design
				break
		if not error_msg:
			for k in chs:
				if 'Reference' in k:
					has_ref = 1
					break
			if not has_ref: # no reference
				for k in chs:
					if k[0] == 'Control': # then k[1] must be 'Experiment'
						design.append(1)
					else: design.append(-1)
			else: # has reference
				for k in chs: 
					if "Reference" not in k: # all slides should have reference
						error_msg = error_design
						break
				if not error_msg: 
					for k in chs:
						if 'Experiment' in k: design.append((1,1))
						else:	design.append((1,0))
					pass # here need further work to check the order of Experiment/Control and Reference
				error_msg = error_design # disable Reference since the R code cannot deal with it at present
	return {"error_msg" : error_msg, "design" : design, "has_reference" : has_ref}
	
class Log:
	"""file like for writes with auto flush after each write
	to ensure that everything is logged, even during an
	unexpected exit."""
	MAX_SIZE = 1000000
	def __init__(self, f):
		self.success = True
		if f is None: self.success = False
		elif type(f) is str:
			self.name = f
			try:
				self.f = open(f, 'a+')
			except: self.success = False
		else:
			self.f = f
			self.name = f.name
		if not self.success: return
		i = os.path.basename(self.name).find('.')
		if i < 0:
			self.back_name = self.name+'_1'
		else:
			i = self.name.rfind('.') 
			self.back_name = self.name[:i]+'_1'+self.name[i:]
		if self.success: self.limitSize()
	def write(self, s):
		if self.success:
			try:
				self.limitSize()
				self.f.write(s)
				self.f.flush()
			except: pass
	def limitSize(self):
		self.f.seek(0,2)
		size = self.f.tell()
		if size > self.MAX_SIZE:
			#import shutil
			#shutil.copyfile(self.name, self.back_name)
			self.f.close()
			if os.path.exists(self.back_name): 
				try:
					os.remove(self.back_name)
					os.rename(self.name, self.back_name)
				except: pass
			self.f = open(self.name, 'a+')
			#self.f.truncate(0)
			#self.f.flush()

def getErrInfo():
	import cStringIO, traceback
	cfile = cStringIO.StringIO()
	traceback.print_exc(None, cfile)
	return cfile.getvalue()


########### unicode functions  ##############
# now in upload
#import codecs
#def recodeWithUTF8(s):
#	if s.startswith(codecs.BOM_UTF32_BE): return s[4:].decode('utf-32-be').encode('utf-8')
#	elif s.startswith(codecs.BOM_UTF32_LE): return s[4:].decode('utf-32-le').encode('utf-8')
#	elif s.startswith(codecs.BOM_UTF16_BE): return s[2:].decode('utf-16-be').encode('utf-8')
#	elif s.startswith(codecs.BOM_UTF16_LE): return s[2:].decode('utf-16-le').encode('utf-8')
#	elif s.startswith(codecs.BOM_UTF8): return s[3:]
#	else: return s


########### Crypto functions  ##############

import re, md5
from Crypto.Cipher import RC5
from base64 import encodestring, decodestring
#from binascii import b2a_hex
rc5_passwd = "anything"
rc5 = RC5.new(rc5_passwd, RC5.MODE_ECB)
esc_char = '_' # didn't use now
avoid_chars = [] # didn't use now

def encodeStr(file_path):
	#return file_path
	ln_add = 8 - len(file_path) % 8
	file_path = ln_add and 'X'*ln_add+file_path or file_path
	file_path = encodestring(rc5.encrypt(file_path)+str(ln_add))
	file_path = file_path[:-1] # to remove the trailing newline character ('\n') added by encodestring
	file_path = re.sub('_', '__', file_path)
	#file_path = re.sub(r'\s|=|/|\\', lambda m:'_'+ hex(ord(m.group(0)))[2:].zfill(2), file_path)
	# to avoid "*", '?", and python dict symbols: "{", "}", ":", ","
	# to avaoid ";", which is used to separate cookies in a string
	file_path = re.sub(r'''\s|=|/|\\|\?|\*|\{|\}|,|:|'|"|;''', lambda m:'_'+ hex(ord(m.group(0)))[2:].zfill(2), file_path)
	return file_path

def decodeStr(file_name):
	#return file_name
	#file_name = re.sub(r'(?:(?<=^)|(?<=[^_]))(_[0-9a-fA-F]{2})', lambda m:m and chr(int(m.groups()[0][1:], 16)) or '', file_name)
	#file_name = re.sub(r'((?:__)+)(_[0-9a-fA-F]{2})', lambda m:m and m.groups()[0]+chr(int(m.groups()[1][1:], 16)) or '', file_name)

	# recover "_dd" to "X"
	file_name = re.sub(r'(?:(?<=^)|(?<!_))(_[0-9a-fA-F]{2})', lambda m:m and chr(int(m.groups()[0][1:], 16)) or '', file_name)
	# recover "___dd" to "__X"
	file_name = re.sub(r'([^_](?:__)+)(_[0-9a-fA-F]{2})', lambda m:m and m.groups()[0]+chr(int(m.groups()[1][1:], 16)) or '', file_name)
	file_name = re.sub(r'([^_|^](?:__)*)(_[0-9a-fA-F]{2})', lambda m:m and m.groups()[0]+chr(int(m.groups()[1][1:], 16)) or '', file_name)
	# recover "__" to "_"
	file_name = re.sub(r'(__)', '_', file_name)

	file_name = decodestring(file_name+'\n') # restore the trailing '\n' added by encodestring
	ln_add = int(file_name[-1])
	file_name = rc5.decrypt(file_name[:-1])
	return ln_add and file_name[ln_add:] or file_name

def ascMd5(src):
	obj = repr(md5.md5(srctab_str = re.compile(r'((?<=[^\\])\\t)|(^\\t)')).digest())
	obj = encodestring(obj)
	obj = re.sub('_', '__', obj)
	obj = re.sub('\s', lambda m:'_'+hex(ord(m.group(0)))[2:].zfill(2), obj)
	return obj

def myMD5(src):
	obj = md5.md5(src).digest()
	return obj.replace('"','2').replace("'",'1')


esc_tab_str = re.compile(r'((\\\\)*)\\t')  # the number of '\' before '\t' should be even
esc_line_str = re.compile(r'((\\\\)*)\\n')
esc_str = re.compile(r'\\\\')
esc_sql = re.compile(r'([\'\"])') # or r'''([\\'"])'''    Syntax: esc_sql.sub(r'\\\1', obj_str)
esc_sql_1 = re.compile(r'''([\\'"])''')    #Syntax: esc_sql_1.sub(r'\\\1', obj_str)
def escSql(s, es=esc_sql):
	return es.sub(r'\\\1', s)

def escNT(s): # use escape for TAB and NEW LINE (and of course backslash too)
	return ((type(s) is type('')) and (s.replace('\\', '\\\\').replace('\n', '\\n').replace('\t', '\\t'), ) or (s,))[0]

def descNT(s): # reverse escNT
	#return ((type(s) is type('')) and (esc_str.sub('\\', line_str.sub('\n', tab_str.sub('\t',a))),) or (s,))[0]
	return ((type(s) is type('')) and (esc_str.sub(r'\\', esc_line_str.sub(r'\1\n', esc_tab_str.sub(r'\1\t', s))),) or (s,))[0]


##### RSA functions #####

from string import *

def _getRSAkeys():
	fkey = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'rsakeys.txt')
	if not os.path.exists(fkey): 
		#return None
		os.system('openssl genrsa 1024 | openssl rsa -text -noout > %s' % fkey)
	ks = open(fkey).read()
	i_n, i_pub, i_priv, i_prim = ks.index('modulus:'), ks.index('publicExponent:'), ks.index('privateExponent:'), ks.index('prime1:')
	n, e_pub, e_priv = ks[i_n+8:i_pub].strip(), ks[i_pub+15:i_priv].strip(), ks[i_priv+16:i_prim].strip()
	n, e_pub, e_priv = re.sub(r'\s|:|(^00)', r'', n), e_pub[e_pub.index('(')+3:e_pub.index(')')], re.sub(r'\s|:', r'', e_priv)
	#n, e_pub, e_priv = atol(n, 16), atol(e_pub, 16), atol(e_priv, 16)
	return (e_pub, e_priv, n)

def getRSAkeys():
	if not globals().get('TRY_RSA', False): return None
	try: 
		rlt = _getRSAkeys()
	except: 
		rlt = None
	return rlt

def rsaStr(s, e, n, d=False, bits=8, hex_str=False):
	'''s is the string to encode/decode, e is the publicExponent/privateExponent(i.e private key) (hex string), n is modulus (public key) (hex string), d: True for decode or False for encode'''
	l = (len(n)+1)/2
	o, inb = l-d, l-1+d
	e, n = atol(e, 16), atol(n, 16)
	#s = s[:inb]
	rlt_ss = []
	s_left = s
	while s_left:
		if hex_str:
			s_left = ''
			s = atol(s, 16)
		else:
			s = s_left[:inb]
			s_left = s_left[inb:]
			s = reduce(lambda x,y:(long(x)<<8)+y, map(ord,s)) # same strings with different number of lead chars '\x00' will produce the same number !!!!!
		rlt_num = pow(s,e,n)
		#open('rlt_num.txt', 'a').write('\n'+str(rlt_num))
		rlt_s = ''.join(map(lambda i,b=rlt_num:chr(b>>bits*i&255),range(o-1,-1,-1)))
		if d: rlt_s = re.sub(r'\x00*', '', rlt_s)
		rlt_ss.append(rlt_s)
	return ''.join(rlt_ss)

def otpStr(s, k): 
	ns, nk = len(s), len(k)
	if nk < ns: k = k*(ns/nk) + k[:ns%nk]
	return ''.join(map(lambda i,ss=s,kk=k:chr(ord(ss[i])^ord(kk[i])), xrange(ns))) 
	#return ''.join(map(lambda i,ss=s,kk=k:'%x' % (ord(ss[i])^ord(kk[i])), xrange(ns))) # return Hex string with double length

def getSec(n=2):
	return long(time.time()) & atol('FF'*n, 16)

def getSecStr(n=2, hex_str=False):
	return Num2Str(getSec(n), n, hex_str)

def Num2Str(num, n=2, hex_str=False):
	if hex_str: return ''.join(map(lambda i,b=num:'%02x' % (b>>8*i&255),range(n-1,-1,-1)))
	return ''.join(map(lambda i,b=num:chr(b>>8*i&255),range(n-1,-1,-1)))

def Str2Num(s, hex_str=False):
	if hex_str: return reduce(lambda x,y:(long(x)<<8)+y, map(lambda i,a=s:atoi(a[i:i+2], 16),range(0, len(s), 2)))
	return reduce(lambda x,y:(long(x)<<8)+y, map(ord,s))

def escape(s):
	return re.sub(r'([^0-9A-Za-z_\-+\./\*])', lambda a:'%%%02x' % ord(a.group(1)), s)

def unescape(s):
	return re.sub(r'%([0-9a-fA-F]{2})', lambda a:chr(atoi(a.group(1),16)), s)

########### Databases functions  ##############

import MySQLdb

DB_SRC = 'DBSRC'
DB_OBJ = 'DB'
DB_EXAMPLE='DBSRC_EXAMPLE'
MYSQL_KEYS = {
"action" : 0, "add" : 1, "aggregate" : 1, "all" : 1, 
"alter" : 1, "after" : 1, "and" : 1, "as" : 1, 
"asc" : 1, "avg" : 1, "avg_row_length" : 1, "auto_increment" : 1, 
"between" : 1, "bigint" : 1, "bit" : 0, "binary" : 1, 
"blob" : 1, "bool" : 1, "both" : 1, "by" : 1, 
"cascade" : 1, "case" : 1, "char" : 1, "character" : 1, 
"change" : 1, "check" : 1, "checksum" : 1, "column" : 1, 
"columns" : 1, "comment" : 1, "constraint" : 1, "create" : 1, 
"cross" : 1, "current_date" : 1, "current_time" : 1, "current_timestamp" : 1, 
"data" : 1, "database" : 1, "databases" : 1, "date" : 0, 
"datetime" : 1, "day" : 1, "day_hour" : 1, "day_minute" : 1, 
"day_second" : 1, "dayofmonth" : 1, "dayofweek" : 1, "dayofyear" : 1, 
"dec" : 1, "decimal" : 1, "default" : 1, "delayed" : 1, 
"delay_key_write" : 1, "delete" : 1, "desc" : 1, "describe" : 1, 
"distinct" : 1, "distinctrow" : 1, "double" : 1, "drop" : 1, 
"end" : 1, "else" : 1, "escape" : 1, "escaped" : 1, 
"enclosed" : 1, "enum" : 0, "explain" : 1, "exists" : 1, 
"fields" : 1, "file" : 1, "first" : 1, "float" : 1, 
"float4" : 1, "float8" : 1, "flush" : 1, "foreign" : 1, 
"from" : 1, "for" : 1, "full" : 1, "function" : 1, 
"global" : 1, "grant" : 1, "grants" : 1, "group" : 1, 
"having" : 1, "heap" : 1, "high_priority" : 1, "hour" : 1, 
"hour_minute" : 1, "hour_second" : 1, "hosts" : 1, "identified" : 1, 
"ignore" : 1, "in" : 1, "index" : 1, "infile" : 1, 
"inner" : 1, "insert" : 1, "insert_id" : 1, "int" : 1, 
"integer" : 1, "interval" : 1, "int1" : 1, "int2" : 1, 
"int3" : 1, "int4" : 1, "int8" : 1, "into" : 1, 
"if" : 1, "is" : 1, "isam" : 1, "join" : 1, 
"key" : 1, "keys" : 1, "kill" : 1, "last_insert_id" : 1, 
"leading" : 1, "left" : 1, "length" : 1, "like" : 1, 
"lines" : 1, "limit" : 1, "load" : 1, "local" : 1, 
"lock" : 1, "logs" : 1, "long" : 1, "longblob" : 1, 
"longtext" : 1, "low_priority" : 1, "max" : 1, "max_rows" : 1, 
"match" : 1, "mediumblob" : 1, "mediumtext" : 1, "mediumint" : 1, 
"middleint" : 1, "min_rows" : 1, "minute" : 1, "minute_second" : 1, 
"modify" : 1, "month" : 1, "monthname" : 1, "myisam" : 1, 
"natural" : 1, "numeric" : 1, "no" : 0, "not" : 1, 
"null" : 1, "on" : 1, "optimize" : 1, "option" : 1, 
"optionally" : 1, "or" : 1, "order" : 1, "outer" : 1, 
"outfile" : 1, "pack_keys" : 1, "partial" : 1, "password" : 1, 
"precision" : 1, "primary" : 1, "procedure" : 1, "process" : 1, 
"processlist" : 1, "privileges" : 1, "read" : 1, "real" : 1, 
"references" : 1, "reload" : 1, "regexp" : 1, "rename" : 1, 
"replace" : 1, "restrict" : 1, "returns" : 1, "revoke" : 1, 
"rlike" : 1, "row" : 1, "rows" : 1, "second" : 1, 
"select" : 1, "set" : 1, "show" : 1, "shutdown" : 1, 
"smallint" : 1, "soname" : 1, "sql_big_tables" : 1, "sql_big_selects" : 1, 
"sql_low_priority_updates" : 1, "sql_log_off" : 1, "sql_log_update" : 1, "sql_select_limit" : 1, 
"sql_small_result" : 1, "sql_big_result" : 1, "sql_warnings" : 1, "straight_join" : 1, 
"starting" : 1, "status" : 1, "string" : 1, "table" : 1, 
"tables" : 1, "temporary" : 1, "terminated" : 1, "text" : 0, 
"then" : 1, "time" : 0, "timestamp" : 0, "tinyblob" : 1, 
"tinytext" : 1, "tinyint" : 1, "trailing" : 1, "to" : 1, 
"type" : 1, "use" : 1, "using" : 1, "unique" : 1, 
"unlock" : 1, "unsigned" : 1, "update" : 1, "usage" : 1, 
"values" : 1, "varchar" : 1, "variables" : 1, "varying" : 1, 
"varbinary" : 1, "with" : 1, "write" : 1, "when" : 1, 
"where" : 1, "year" : 1, "year_month" : 1, "zerofill" : 1
}


def encodeDB(s='', DB_OBJ=DB_OBJ, DB_SRC=DB_SRC, RM_SRC=True):
	if s: open(DB_OBJ, 'w').write(encodeStr(s))
	elif os.path.exists(DB_SRC): 
		encoded_str = encodeStr(open(DB_SRC).read())
		open(DB_OBJ, 'w').write(encoded_str)
		if RM_SRC: os.unlink(DB_SRC)

def decodeDB(s='', DB_OBJ=DB_OBJ, DB_SRC=DB_SRC, RM_OBJ=False):
	if s: open(DB_SRC, 'w').write(decodeStr(s))
	elif os.path.exists(DB_OBJ):
		file(DB_SRC, 'w').write(decodeStr(file(DB_OBJ).read()))
		if RM_OBJ: os.unlink(DB_OBJ)

def get_dbstr(DB_SRC = DB_SRC, DB_OBJ = DB_OBJ, DB_EXAMPLE = DB_EXAMPLE, dbstr_encoded = True):		
	if os.path.exists(DB_SRC): 
		dbstr = open(DB_SRC).read()
		try: encodeDB(DB_OBJ=DB_OBJ, DB_SRC=DB_SRC)
		except: pass
	else: 
		if os.path.exists(DB_OBJ): 
			dbstr = decodeStr(open(DB_OBJ).read())
			db_str_encoded = False
		else: 
			# now search in the directory of tools.py
			wd = os.path.split(os.path.realpath(__file__))[0]
			DB_SRC_LAST = os.path.join(wd, DB_SRC)
			DB_OBJ_LAST = os.path.join(wd, DB_OBJ)
			if wd != os.path.realpath(os.getcwd()):
				if os.path.exists(DB_SRC_LAST): 
					dbstr = open(DB_SRC_LAST).read()
					try: encodeDB(DB_SRC=DB_SRC_LAST, DB_OBJ=os.path.join(wd, DB_OBJ))
					except: pass
				elif os.path.exists(DB_OBJ_LAST): 
					dbstr = decodeStr(open(DB_OBJ_LAST).read()) 
				else:
					DB_EX = os.path.join(wd, DB_EXAMPLE)
					if os.path.exists(DB_EX):
						dbstr = file(DB_EX).read()
					else: pass  # pass here may cause problem if one use DB functions while no DB information offered
			else:
				DB_EX = os.path.join(wd, DB_EXAMPLE)
				if os.path.exists(DB_EX):
					dbstr = file(DB_EX).read()
				else: pass  # pass here may cause problem if one use DB functions while no DB information offered
	return dbstr

def connectDB():
	exec get_dbstr()
	return MySQLdb.connect(host=host, db=db, port=port, user=user, passwd=passwd)
	
def getConnectionCursor(host=None, db=None, port=None, user=None, passwd=None):
	phost, pdb, pport, puser, ppasswd = host, db, port, user, passwd
	exec get_dbstr()
	if phost is not None: host = phost
	if pdb is not None: db = pdb
	if pport is not None: port = pport
	if puser is not None: user = puser
	if ppasswd is not None: passwd = ppasswd
	connection = MySQLdb.connect(host=host, db=db, port=port, user=user, passwd=passwd)
	cursor = connection.cursor()
	return connection, cursor

def getCursor(host=None, db=None, port=None, user=None, passwd=None):
	return getConnectionCursor(host=host, db=db, port=port, user=user, passwd=passwd)[1]

TOOLS_LOG = None #os.path.join(os.path.split(os.path.realpath(__file__))[0], 'tools.log')
#TOOLS_LOG = None #'/var/log/webarray/sql.log'

from sets import Set
from array import array
def inquireDB(sql_statement, cursor=None, connection=None, fetch=False, use_str=True):
	if cursor:
		close_cursor = False
	else:
		if connection: 
			cursor = connection.cursor()
			close_connection = False
		else: 
			connection, cursor = getConnectionCursor()
			close_connection = True
		close_cursor = True
	try:
		#rlt = cursor.execute(sql_statement)
		cursor.execute(sql_statement)
		rlt = cursor.rowcount
		cursor.connection.commit() # MySQL on windows need this !!!!!!!!!!!!!!!!!!!!!!!!!
		#open('/home/xxia/temp/ERR_SQL.txt', 'a').write('\n\n'+sql_statement)
	except:
		# save sql_statement
		Log(TOOLS_LOG).write('\n\n%s\n\t%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), sql_statement))
		#print >>sys.stderr, sql_statement
		raise
	#rlt = cursor.rowcount
	if fetch: 
		rlt = rlt and cursor.fetchall() or [] #None
		if rlt and use_str: # rlt = map(lambda b:map(lambda a:((type(a) is Set) and [a.pop()] or [a])[0], b), rlt)
			set_type = Set
			# just checked the first line, sometimes a value in the first line is None, and Set in other lines. this might cause problems.
			idx = filter(lambda i:type(rlt[0][i]) is set_type, range(len(rlt[0])) ) 
			if idx: rlt = map(lambda b:map(lambda a:((type(a) is set_type) and [a and a.pop() or ''] or [a])[0], b), rlt)
			set_type = array
			# just checked the first line, sometimes a value in the first line is None, and Set in other lines. this might cause problems.
			idx = filter(lambda i:type(rlt[0][i]) is set_type, range(len(rlt[0])) ) 
			if idx: rlt = map(lambda b:map(lambda a:((type(a) is set_type) and [a and a.tostring() or ''] or [a])[0], b), rlt)
	if close_cursor:
		cursor.close()
		if close_connection: connection.close()
	return rlt

#from sets import Set
#def getFieldVal(v):
#	if type(v) is Set: return v.pop()
#	return v

def killProcs(pids):
	if os.name == 'posix': 
		for pid in pids: 
			if isinstance(pid, int): os.kill(pid, 9) # signal.SIGKILL
			else:
				pid = pid.split(':')
				if len(pid) == 1: os.kill(int(pid[0]), 9)
				else:
					hostname, pid = pid[0].strip(), int(pid[-1])
					if hostname in ('', 'localhost', gethostname()): os.kill(pid, 9)
					else: os.system('ssh %s kill -9 %d' % (hostname, pid))
	else: # try windows
		try:
			import win32api
			for pid in pids: 
				if isinstance(pid, int): win32api.TerminateProcess(pid, 0)
				else: win32api.TerminateProcess(int(pid.split(':')[-1]), 0) # only work on the local windows
		except: pass
	time.sleep(10)

def getPids(req_id, cursor=None):
	try: 
		#pids = map(int, inquireDB('SELECT error_msg FROM requests WHERE req_id=%d' % req_id, cursor=cursor, fetch=True)[0][0].split(';'))
		pids = inquireDB('SELECT error_msg FROM requests WHERE req_id=%d' % req_id, cursor=cursor, fetch=True)[0][0].split(';')
	except: pids = []
	return pids

# SAVE pid 
def savePid(req_id, pid=None, cursor=None, append=True):
	hostname = gethostname()
	if pid is None: pid = os.getpid()
	if isinstance(pid, int): pid = hostname + ':' + str(pid)
	#try: pids = inquireDB('SELECT error_msg FROM requests WHERE id=%d' % req_id, cursor=cursor, fetch=True)[0][0]
	#except: pids = ''
	#if pids: pids = pids + ';'
	if append: pids = getPids(req_id, cursor=cursor)
	else: pids = []
	pids.append(pid)
	#pids = map(str, pids)
	pids = ';'.join(pids)
	try: inquireDB('UPDATE requests SET error_msg="%s" WHERE req_id=%d' % (pids, req_id), cursor=cursor)
	except: pass

# SAVE db event
def saveEvent(ev_time=None, user_id=None, ev_catcode=None, ev_valcode=None, ev_valstr=None, cur=None):
	fields, vals = [], []
	mycur = cur or getCursor()
	if ev_time is None: ev_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
	fields.append('ev_time')
	vals.append('"%s"' % ev_time)
	if type(user_id) is str:
		username = user_id
		user_id = inquireDB('SELECT id FROM users WHERE user_name="%s"' % user_id, cursor=mycur, fetch=True)
		if user_id: user_id = user_id[0][0]
	else: # user_id should be an int ID
		username = inquireDB('SELECT user_name FROM users WHERE id=%d' % user_id, cursor=mycur, fetch=True)
		if username: username = username[0][0]
		else: username = ''
	if freeze_demo and username == 'demo': return 0

	if user_id: 
		fields.append('user_id')
		vals.append(str(user_id))
	if ev_catcode is not None:
		fields.append('ev_catcode')
		if type(ev_catcode) is str:
			from db_vars import TYPE_EVENT_CODE
		vals.append(str(TYPE_EVENT_CODE.get(ev_catcode, 'NULL')))
	if ev_valcode is not None:
		fields.append('ev_valcode')
		vals.append(str(ev_valcode))
	if ev_valstr is not None:
		fields.append('ev_valstr')
		vals.append('"%s"' % escSql(ev_valstr)) #.replace('\\','\\\\').replace('"','\\"'))
	Log(TOOLS_LOG).write('\n\n%s\n\t%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), 'INSERT INTO evlog (%s) VALUES (%s)' % (', '.join(fields), ', '.join(vals) ) ))
	mycur.execute('INSERT INTO evlog (%s) VALUES (%s)' % (', '.join(fields), ', '.join(vals) ) )
	n = mycur.rowcount
	mycur.connection.commit()
	if not cur: mycur.close()
	return n

def stopReq(req_id=None, req_name=None, req_type=None, user_id=None, user_name=None, cur=None):
	import db_vars
	if cur is None: cur = getCursor()
	# get req_id
	if req_id is None: 
		if req_name is None or (user_id is None and user_name is None): return
		if user_id is None: # use user_name
			req_id = inquireDB('SELECT r.req_id FROM requests r, users u WHERE u.user_name="%s" && u.id=r.user_id && r.req_name="%s"' % (user_name, req_name), cursor=cur, fetch=True)
		else: 
			req_id = inquireDB('SELECT req_id FROM requests WHERE user_id=%d && req_name="%s"' % (user_id, req_name), cursor=cur, fetch=True)
		if req_id: req_id = req_id[0][0]
		else: return
	# get state
	req_state = inquireDB('SELECT req_state, category FROM requests WHERE req_id=%d' % req_id, cursor=cur, fetch=True)
	if not req_state: return
	req_state, req_cat = req_state[0]
	if req_state != db_vars.STATE_WORKING: # set to STOP
		if req_state == db_vars.STATE_WAITING:
			cur.execute('UPDATE requests SET req_state=%d WHERE req_id=%d' % (db_vars.STATE_STOPPED, req_id))
		return
	if req_cat in db_vars.TYPE_WEBARRAYDB_FILL: return
	# stop a working job
	tellDaemon('Stop jobs %d' % req_id)
	time.sleep(3)
	while req_state == db_vars.STATE_WORKING:
		time.sleep(2)
		req_state = inquireDB('SELECT req_state FROM requests WHERE req_id=%d' % req_id, cursor=cur, fetch=True)
		if not req_state: return
		req_state = req_state[0]	

# SAVE user_params to database
def saveReq(req_type, req_name, req_params, username):
	if freeze_demo and username == 'demo': return 0
	req_str = cPickle.dumps(req_params)
	# escape \, ', " which may cause problem in SQL string
	#req_str = re.sub(r'([\\\'\"])', r'\\\1', req_str) #req_str.replace('"', '\\"').replace("'", "\\'")
	# escape ', " which may cause problem in SQL string
	req_str = esc_sql_1.sub(r'\\\1', req_str) #re.sub(r'([\'\"])', r'\\\1', req_str) #req_str.replace('"', '\\"').replace("'", "\\'")
	connection, cursor = getConnectionCursor()
	sql_statement = 'SELECT id FROM users WHERE user_name="%s"' % username
	n = cursor.execute(sql_statement)
	if cursor.rowcount < 1: # failed
		cursor.close()
		connection.close()
		return False
	user_id = cursor.fetchone()[0]
	
	# stop a running job first
	stopReq(req_name=req_name, req_type=req_type, user_id=user_id, cur=cursor)

	from db_vars import TYPE_DIC
	reqtype = TYPE_DIC.get(req_type, None)
	if False and reqtype: # cannot use same name even if the requests are of different type, because they may use the same folder
		tpstr = ' AND category IN %s' % str(tuple(reqtype))
	else: tpstr = ''
	#req_type = TYPE_NORM_ANALYSIS
	date_time = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
	#req_name = os.path.basename(os.path.normpath(result_subdirs['request']['name_full']))
	cursor.execute('DELETE FROM requests WHERE user_id=%d AND req_name="%s" %s' % (user_id, req_name, tpstr))
	sql_statement = 'INSERT INTO requests (user_id, req_name, req_time, req_info, category) VALUES (%d, "%s", "%s", "%s", %d)' % (user_id, req_name, date_time, req_str, req_type)

	try:
		n = cursor.execute(sql_statement)
		n = cursor.rowcount #or False
		saveEvent(ev_time=date_time, user_id=user_id, ev_catcode="request", ev_valcode=req_type, ev_valstr=None, cur=cursor)
	except:
		n = 0
		Log(TOOLS_LOG).write('\n\n%s\n\t%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), sql_statement))
	connection.commit()
	cursor.close()
	connection.close()
	return n

def copyReq(req=None, obj_user='', src_user='', cur=None): 
	# req, src_user, obj_user can be name or id
	if not req or not obj_user: return None
	if not cur: cur = getCursor()
	if type(req) is str:
		if not src_user: return None
		#req = inquireDB('SELECT req_id FROM requests ', cursor=cur, fetch=True)
		if type(src_user) is int: req = inquireDB('SELECT req_id FROM requests WHERE req_name="%s" AND user_id=%d)' % (req, src_user), cursor=cur, fetch=True)
		else: # str
			req = inquireDB('SELECT req_id FROM requests WHERE req_name="%s" AND user_id=(SELECT id FROM users WHERE user_name="%s")' % (req, src_user), cursor=cur, fetch=True)
		if not req: return None
		req = req[0][0]
	if type(obj_user) is str: 
		obj_user = inquireDB('SELECT id FROM users WHERE user_name="%s"' % obj_user, cursor=cur, fetch=True)
		if not obj_user: return None
		obj_user = obj_user[0][0]

	cur.execute('INSERT INTO requests (user_id, req_time, solve_time, req_state, req_name, req_info, availability, category, error_msg) SELECT %d, req_time, solve_time, req_state, req_name, req_info, availability, category, error_msg FROM requests WHERE req_id=%d' % (obj_user, req) )
	return cur.rowcount


def getRltDir(req_ids, cursor): # now only return the result dir for the user logged on
	#from cgitools import result_dir
	#return result_dir
	cursor.execute('SELECT category, req_info, req_id FROM requests WHERE req_id IN (%s) ORDER BY req_id' % ', '.join(map(str, req_ids)))
	if not cursor.rowcount: return [None] * len(req_ids)
	import db_vars
	tp_dic = {TYPE_LINEAR_DUAL:'result.data.dir', TYPE_LINEAR_AFFY:'result.data.dir', TYPE_NORMPCA:'output_dir'} #else :'result_dir'}
	idir = {}
	for r_cat, r_info, r_id in cursor.fetchall():
		r_info = cPickle.loads(r_info)
		dirnm = tp_dic.get(r_cat, 'result_dir')
		if dirnm in r_info: idir[r_id] = os.path.split(r_info[dirnm])[0]
		else: idir[r_id] = None
	return map(lambda a:idir.get(a, None), req_ids) 


def delReqs(result_dir=None, r_ids=[], r_names=[], username=''): 
	# should not delete reqs on working
	# now r_ids should be consistent to r_ids
	if freeze_demo and username == 'demo': return 0
	import db_vars
	nm_working, id_working, id_working_del, r_names_1, usernames = [], [], [], [], [] # id_working_del will be deleted later
	connection, cursor = getConnectionCursor()
	for k in r_ids:
		cursor.execute('SELECT r.req_state, r.req_name, r.category, u.user_name FROM requests r LEFT JOIN users u ON r.user_id=u.id WHERE r.req_id=%d' % k)
		if not cursor.rowcount: continue
		req_state, req_name, req_cat, unm = cursor.fetchone() # why no enumerate values?
		r_names_1.append(req_name)
		usernames.append(unm)
		# only delete not-working jobs here
		if req_state == db_vars.STATE_WORKING:
			nm_working.append(req_name)
			id_working.append(k)
			if req_cat not in db_vars.TYPE_WEBARRAYDB_FILL: # working database-filling jobs won't be deleted
				id_working_del.append(k)
		else:
			cursor.execute('DELETE FROM requests WHERE req_id=%d' % k)
	connection.commit()
	
	import shutil
	cwd = os.getcwd()
	if result_dir:
		os.chdir(result_dir)
		if not r_names and r_names_1: r_names = r_names_1 # in case that no r_names offered, use r_names_1 got from r_ids.
		for k in r_names:
			if (k not in nm_working) and os.path.exists(k): shutil.rmtree(k)
	else:
		#if username: result_dirs = [getRltDir(username)] * len(r_ids)
		#else: result_dirs = map(getRltDir, usernames)
		result_dirs = getRltDir(r_ids, cursor)
		for k, p in zip(r_names, result_dirs):
			if p is not None:
				unm = filter(lambda a:a, p.split(os.sep))[-3]
				if username and unm != username: continue # don't delete other users' files
				os.chdir(p)
				if (k not in nm_working) and os.path.exists(k): shutil.rmtree(k)
	os.chdir(cwd)
	cursor.close()
	connection.close()

	# send request to analyze_dmp for deleting working jobs
	if id_working_del: tellDaemon('Delete jobs %s' % ';'.join(map(str, id_working_del)))

def delOldStuff(days=None): #31*6):
	#exec open(Daemon_Host).read() # read this file again since the analyze_dmp may be using a old version of tools.py

	# optimize database
	try: opt_days = int(OPTIMIZE_DAYS)
	except: opt_days = 31 * 2
	if opt_days > 0 and (not int(time.time()/60/60/24) % opt_days): optimizeDB()

	# deal with old requests and files
	if days is None:
		try: days = int(REQUEST_LIFE)
		except: days = 31*6 # 6 months
	if days <= 0: return
	dt = time.time() - 60*60*24*days 
	dtstr = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(dt))
	# del old requests
	import db_vars
	sql_request = 'SELECT r.req_id FROM requests r LEFT JOIN users u ON r.user_id=u.id WHERE u.user_name!="demo" AND r.req_state NOT IN (%d, %d, %d) and req_time < "%s" ORDER BY req_id' % (db_vars.STATE_WAITING, db_vars.STATE_WORKING, db_vars.STATE_TO_DELETE, dtstr)
	rids = inquireDB(sql_request, fetch=True)
	if rids: 
		rids = map(lambda a:a[0], rids)
		delReqs(r_ids = rids)
		try: Log(TOOLS_LOG).write('\n\n%s\n\t%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), 'Deleted old requests'))
		except: pass
	# del old files
	# guess the object folder
	#pwd = os.path.split(os.path.abspath(__file__))[0] # use abspath instead of realpath to keep correct folder structure
	# pwd is: /xxx/.../xxx/cgi-bin/webarray or /xxx/.../public_html/cgi-bin/webarray
	# read object folder in 'DAEMON_HOST'
	#exec open(Daemon_Host).read()
	if 'users_dir' in locals(): 
		cwd = os.getcwd()
		os.chdir(users_dir)
		fdirs = os.listdir('.')
		if 'demo' in fdirs: fdirs.remove('demo')
		if fdirs:
			for fdir in fdirs: delOldFiles(fdir, dt)
			try: Log(TOOLS_LOG).write('\n\n%s\n\t%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), 'Deleted old files'))
			except: pass
		os.chdir(cwd)

def delOldFiles(fn, ftime):
	if os.path.isfile(fn):
		if os.stat(fn).st_mtime < ftime: 
			try: os.unlink(fn)
			except: 
				Log(TOOLS_LOG).write('\n\n%s\n\tFail to delete file "%s"' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), os.path.normpath(fn)))
	elif os.path.isdir(fn):
		cwd = os.getcwd()
		stm = os.stat(fn).st_mtime
		os.chdir(fn)
		for fntmp in os.listdir('.'): delOldFiles(fntmp, ftime)
		os.chdir(cwd)
		if stm < ftime and not os.listdir(fn): 
			if os.path.islink(fn): os.unlink(fn)
			else:
				try: os.rmdir(fn)
				except: 
					Log(TOOLS_LOG).write('\n\n%s\n\tFail to delete dirctory "%s"' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), os.path.normpath(fn)))

def optimizeDB(cur=None):
	dbs = readINI(MPMDB_INI).keys()
	if cur is None: cur = getCursor()
	for adb in dbs:
		cur.execute('USE %s' % adb)
		tbs = map(lambda a:a[0], inquireDB('SHOW TABLES FROM %s' % adb, cursor=cur, fetch=True))
		for atb in tbs: 
			#if atb == 'xpf': 
			#	open('/home/xxia/temp/db.txt', 'a').write('OPTIMIZE TABLE %s.%s\n' % (adb, atb))
			cur.execute('OPTIMIZE TABLE %s.%s' % (adb, atb))
	try: Log(TOOLS_LOG).write('\n\n%s\n\t%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()), 'Optimized databases'))
	except: pass
	

def saveUsersDir(usersdir):
	# save users dir to DAEMON_HOST, which will be used by analyze_dmp to delete old files
	txt = open(Daemon_Host).read()
	exec txt
	changed = False
	if 'users_dir' not in locals():
		txt = txt + ("\nusers_dir = '%s'" % usersdir)
		changed = True
	elif users_dir != usersdir:
		txt = re.sub(r'\busers_dir\s*=\s*\S+.*?\s*($|\n)', "users_dir = '%s\n" % usersdir, txt)
		changed = True
	if changed: open(Daemon_Host, 'w').write(txt)

# functions for communicate with WebArray Daemon:
def tellDaemon(info):
	from socket import socket, AF_INET, SOCK_DGRAM #*
	#HOST = 'localhost'
	#PORT = 1970
	#exec open(Daemon_Host).read()
	BUFSIZ = 1024
	ADDR = (HOST, PORT)
	REQUEST = info
	udpCliSock = socket(AF_INET, SOCK_DGRAM)

	## check if the Daemon is running
	#udpCliSock.sendto('Pease tell me your name', ADDR)
	#udpCliSock.setblocking(0)
	#isThere = 0
	#try:
	#	data, addr = udpCliSock.recvfrom(BUFSIZ)
	#	#print data, addr
	#	if data == 'This is analyze_d': isThere = 1
	#except: pass #raise
	#if not isThere:
	#	try: os.system('python analyze_d')
	#	except: pass

	# send message to Daemon
	udpCliSock.sendto(REQUEST, ADDR)
	udpCliSock.close()


#################### INI file functions ####################
# 	[section_name]
# 	var_name = var_value
# 	...
# in a dict {section_name : {var_name:var_value, ...}, ... } 

def readINI(fn, getGO=False):
	ini_dict = {} # {tbname:{'data_dir':location,...}, ...}
	if not os.path.exists(fn): return ini_dict
	import re
	dbline = re.compile(r'^\s*\[\s*([a-zA-Z0-9_]+)\s*\]\s*$')
	vline = re.compile(r'\s*([a-zA-Z0-9_]+)\s*=\s*(.+?)\s*$')
	db = None
	for line in file(fn):
		# find dbname first
		m = dbline.match(line)
		if m:# and m.groups()[0]: 
			db = m.groups()[0]
			cur_dict = ini_dict[db] = {}
			continue
		if not db: continue
		# find values then
		m = vline.match(line)
		if m: cur_dict[m.groups()[0]] = m.groups()[1]
	if not getGO and 'GO' in ini_dict: del ini_dict['GO']
	return ini_dict

def saveINI(ini_dict, fn):
	fp = open(fn, 'w')
	st = ''
	for k, v in ini_dict.items():
		fp.write('%s[%s]\n' % (st, k))
		st = '\n'
		for k1, v1 in v.items(): fp.write('\t%s = %s\n' % (k1, v1))
	fp.close()
	return ini_dict


def getTC(title_case=None):
	if type(title_case) is type(''): title_case = title_case.lower()
	import string
	if title_case == 'lower': tc = string.lower
	elif title_case == 'upper': tc = string.upper
	else: tc = lambda a:a #title_case = False
	return tc

def rmDelim(s, r=re.compile(r'\s*(["\'])(.*)\1\s*')):
	o = r.match(s)
	if o: return o.group(2)
	return s

def readTable(f, colnames = None, skip_blank=False, stop_at=None, use_list=True, title_case=None, sep_str='\t', autodelim=True, delim='', NULL_str=[], NULL_fill='NULL', rmdelim=rmDelim):
	" if stop_at is False, then return the list tb -- the table content, otherwise, the return value is a tuple, the table content and the name of next table (or None)"
	# if autodelim is True, delim will not be used
	# if delim is used, '\t' might be used in a string, and the separator will be delim+'\t'+delim
	if (type(f) is str): f = open(f)
	tc = getTC(title_case)
	tb = []
	if autodelim: delim =  '' # automatically remove paired leading-tailing " or '
	else: sep_str = delim+sep_str+delim
	ndelim = len(delim)

	for line in f:
		if stop_at:
			rlt = stop_at.search(line) # "match" doesn't work here (why?), so use "search"
			# f.seek(1, -len(line)) # cannot seek correctly in text mode
			if rlt: return tb, tc(rlt.groups()[0])
		if not line.strip():
			if skip_blank: continue
			else: 
				if stop_at: return tb, None
				else: return tb
		cols = line.replace('\n','').replace('\r','')
		if delim:
			if cols[:ndelim] == delim: cols = cols[ndelim:]
			if cols[-ndelim:] == delim: cols = cols[:-ndelim]
		cols = cols.split(sep_str)
		if autodelim: cols = map(rmdelim, cols)

		#cols = map(lambda a:tab_str.sub('\t',a), cols)
		#cols = map(lambda a:line_str.sub('\n',a), cols)
		#cols = map(lambda a:esc_str.sub('\\',a), cols)
		cols = map(lambda a:descNT(a), cols) # recover escaped chars
		
		if colnames is None: 
			#colnames = map(lambda a:a.strip().lower(), cols)
			colnames = map(lambda a:tc(a.strip()), cols)
		else: 
			if NULL_str: cols = map(lambda a:((a in NULL_str) and [NULL_fill] or [a])[0], cols)
			lc = len(cols)
			ln = len(colnames)
			if lc < ln: cols.extend([NULL_fill]*(ln-lc))
			elif lc > ln: del cols[ln:]
			if use_list:
				if not tb: tb.append(colnames) # the first item is a list of column names
				tb.append(cols)
			else: # use list
				tb.append(dict(zip(colnames, cols)))
	
	if stop_at: return tb, None
	else: return tb	

def getPfPairs(fn, sep='\t'):
	# fn is the name of a probe-mapping file
	def chkPfNames(fn):
		n = 0
		nmdic = {}
		fp = open(fn)
		for line in fp.xreadlines():
			if not line.strip():
				n = n+1
			elif sep not in line:
				n = n+1
				if '=' in line:
					i = line.index('=')
					k, v = line[:i].strip(), line[i+1:].strip()
					if k and v:
						nmdic[k] = v
			else: # has sep, should be the beginning row for mapping
				break
		return n, nmdic
	n, pfnmdic = chkPfNames(fn)
	fp = open(fn)
	map(fp.readline, range(n)) # skip lines to map platform names
	head = map(string.lower, fp.readline().split(sep))
	if not head: return None
	head_other = ['platform_a', 'unique_id_a', 'platform_b', 'unique_id_b']
	head_affy = ['a array name', 'a probe set name', 'b array name', 'b probe set name']
	if len(sets.Set(head_other).intersection(head)) == 4: head_use = head_other # head_other will override head_affy
	elif len(sets.Set(head_affy).intersection(head)) == 4: head_use = head_affy
	else: return None
	idx = map(lambda a:head.index(a), head_use)
	if pfnmdic: lines = map(lambda a:(lambda b=a.split(sep):map(lambda c:(c+1)%2 and pfnmdic.get(b[idx[c]],b[idx[c]]) or b[idx[c]], [0,1,2,3]))(), fp.xreadlines())
	else: lines = map(lambda a:(lambda b=a.split(sep):map(lambda c:b[c], idx))(), fp.xreadlines())
	#if not lines: return None
	#lines.insert(0, head_use)
	return lines

def ProbeFileType(fn):
	ann_ln = 0
	tp = 'tsv'
	#re_affy = re.compile(r'Annotation_platform_title\s*=\s*Affymetrix\s+GeneChip', re.I)
	re_affy = re.compile(r'Annotation_platform_title\s*=\s*Affymetrix\s+GeneChip|netaffx-annotation-tabular-format-version\s*=\s*\d+', re.I)
	for s in open(fn):
		s = s.strip()
		if not s:
			ann_ln = ann_ln + 1
			continue
		if s.startswith('#') or s.startswith('^') or s.startswith('!'): # these may found in Affy annotation files
			ann_ln = ann_ln + 1
			if re_affy.search(s): tp = 'affy' # GPL4685.annot
			continue
		break
	#if 'Probe Set ID' in s and 'GeneChip Array' in s: return 'affy', ann_ln
	if 'probe set id' in s.lower() and 'genechip array' in s.lower(): return 'affy', ann_ln
	return tp, ann_ln


