#!/usr/bin/env python
# IMPORTANT: the parameter (variable) names used in the web pages should be consistent to those used in Python and R.
# the above line is not need now, since a name map is used.

import sys, os, cgi, re, string, types, sets #, tempfile
import tools

import cgitb; cgitb.enable(display=0, logdir='/tmp')

from cgitools import *
#home_dir = '/home/xxia/public_html' # user data will based on this dir
#users_relative_dir = "users"
#users_dir = os.path.join(home_dir, users_relative_dir)
#R_code_dir = 'R_code' # under cgi-bin

__DEBUG__ = False

error_msg = []

src_params = {
	'arrays':[],
	'arrays_text':'',
	'groups':[],
	'groups_text':[],
	'grp_num' : 1,
	'match_probe' : True, 'by_col' : None, 'merge_method' : 'log mean', 
	'mapf':'', 'map_db_pf':[], 'mapcols':[],
	'db_pf':[], 'bg_correct':[], 'norm_in_array':[], 'norm_in_pf':[],
	'norm_x_pf' : '',
	'nbin' : 8,
	'use_ratio' : True, 'ratio_by' : 'array',
	'analysis_method' : 'ANOVA',
	'contrasts' : [], # ['group2-group1', 'group3 - group1']
	'anova_platform' : True, 'anova_array' : True, 'anova_dye' : True, 'anova_individual' : True, 'anova_sample' : True,
	'anova_factor' : 'factors_in_db', 'uf_name':[], 'uf_type':[], 'uf_dtype':[], 'uf_value':[], 'user_model':'',
	'ref_name' : '', # create by program
	'use_rma' : False, 'save_data' : False, 'plot_chart' : True, 'plot_pdf' : True, 'sort_rlt' : False, 
	'sum_rlt' : False, 'sum_rlt_by' : 'gene_symbol', 
	'screen_gene' : 'all',  'p_threshold' : 0.01, 'gene_num' : 100, # all, by_pvalue, by_number
	'clust_chs' : False, 'clust_grps' : False, 'plot_heatmap_chs' : False, 'plot_heatmap_grps' : False, 'plot_genome' : False,
	'result_dir' : '.',
	'req_name' : ''
	}

#user_params = src_params.copy() # these params will be passed to R

# now user_params are the same dict as form_params
user_params = form_params = {} # accept params from web users

#necessary_data = {'cannot be empty':['intensity_file', 'gal_file'], 'cannot be zero':[]}
necessary_data = {'cannot be empty':[], 'cannot be zero':[]}

# map name used by python to names in R, 
# the key is a name used by python, the value is the name used by R. 
# Names not included here will be simply use ('_' to '.') rules for translation

# Now this map is imported from cgitools
namemap = namemap_limma_linear
#namemap = {'intensity_file':'slides', 'gal_file':'gal.file.name', 'norm_in_array':'WithinArray.Nor',
#	'norm_bt_array':'BetweenArra.Nor', 'output_format':'Output.Format', 'bg_substraction':'BG.Sub',
#	'intensity_format':'MA.File.Format', 'replicates_in_array':'Num.of.Dup', 'ch1':'design.ch1', 'ch2':'design.ch2',
#	'contrast':'contrast.design'} 


MultiVars = {'arrays':1, 'arrays_text':1, 'db_pf':1, 'bg_correct':1, 'norm_in_array':1, 'norm_in_pf':1, 'mapcols':1, 'map_db_pf':1, 'uf_name':1, 'uf_type':1, 'uf_dtype':1, 'uf_value':1}

# mkdir temp dir for users
#users_dir = home_dir+'users'
#user_dir = tempfile.mkdtemp( , , users_dir)
#user_id = os.path.basename(user_dir)

#user_id = tools.UniqueNameRand(prefix='', suffix='', dir=os.path.abspath(users_dir))
#user_dir = os.path.abspath(os.path.join(users_dir, user_id))
#user_relative_dir = os.path.join(users_relative_dir, user_id)
#os.mkdir(user_dir)

# Set default values
# transform relative path to absolute path, then create these paths

#for k in ['graphDir', 'work.dir', 'raw.data.dir', 'result.data.dir', 'chart.dir']:
#	user_params[k] = tmpdir = os.path.normpath(os.path.join(user_dir, user_params[k]))
#	if k == 'graphDir': user_params[k] = tmpdir+os.sep
#	if not os.path.exists(tmpdir): os.makedirs(tmpdir)

# Lists of parameters that should be offered by users
data_user_str = ['arrays', 'arrays_text', 'by_col', 'merge_method', 'db_pf', 'bg_correct', 'norm_in_array', 'norm_in_pf', 'norm_x_pf', 'analysis_method', 'contrasts', 'ratio_by', 'request_name', 'mapf', 'map_db_pf', 'mapcols', 'screen_gene', 'anova_factor', 'uf_name', 'uf_type', 'uf_dtype', 'uf_value', 'user_model', 'probe_info', 'plot_genome_by', 'fun_smooth', 'gn_probefile', 'sum_rlt_by', 'cgh_m1', 'cgh_m2', 'cgh_m3']
data_user_int = ['grp_num', 'nbin', 'gene_num', 'nucleotide_num', 'probe_num', 'max_section', 'coa_num', 'bga_num', 'n_smooth', 'row_num', 'max_chart']
data_user_float = ['p_threshold', 'weak_perc', 'gn_p_threshold', 'tn_threshold_M', 'tn_threshold_A']
# for check buttons
data_user_logic = ['anova_platform', 'anova_array', 'anova_dye', 'anova_individual', 'anova_sample', 'clust_chs', 'clust_grps', 'plot_heatmap_chs', 'plot_heatmap_grps', 'screen_for_cluster', 'screen_for_heatmap', 'screen_for_coa', 'screen_for_bga', 'screen_for_genome', 'tn_analysis', 'gn_p_label' , 'debug_mode', 'sum_rlt', 'bac_cgh']
# for yes-or-no radio buttons 
data_user_str2log = ['use_ratio', 'match_probe', 'use_rma', 'save_data', 'plot_chart', 'plot_pdf', 'sort_rlt', 'coa_analysis', 'bga_analysis', 'plot_genome']
data_user_file = {
	# with format "file:info", and "info" is a dictionary with keys "need_entry", "location", etc.
	#'raw.data.files' : {'need_entry':0, 'location':user_params['raw.data.dir']}, 
	#'gal.file.name' : {'need_entry':1, 'location':user_params['work.dir']},
	#'sample.info.file' : {'need_entry':1, 'location':user_params['work.dir']}
	} 

# Retrieve data from user
form = cgi.FieldStorage()

#print cgi_token
#for k in form: print k, ':', form.getvalue(k), '<br>'
#sys.exit(0)

#print "Content-type:text/html\n\n"
#print form

for name_html in data_user_str:
	if form.has_key(name_html): 
		a = form.getvalue(name_html)
		if type(a) == str: a = a.strip()
		else: # should be a seq
			a = map(lambda b:b.strip(), a)
		form_params[name_html] = a
for name_html in data_user_int:
	if form.has_key(name_html): 
		try:
			form_params[name_html] = int(form.getvalue(name_html))
		except: error_msg.append('Invalid value "%s" for integer variable "%s"!' % (form.getvalue(name_html), name_html))

#print cgi_token
#print '<p>',form,'<p>'
for name_html in data_user_str2log:
	if form.has_key(name_html):
		#form_params[name_html] = True
		form_params[name_html] = form.getvalue(name_html).lower()!='no' #and True or False
		#v = form.getvalue(name_html).strip().lower()
		#if v == 'on': form_params[name_html] = True
		#else: form_params[name_html] = False
		#print name_html, ' ----------- ', form[name_html], '====', form_params[name_html]
	else: form_params[name_html] = False
for name_html in data_user_float:
	if form.has_key(name_html): 
		v = form.getvalue(name_html).strip()
		try:
			if v and v[-1]=='%': form_params[name_html] = float(v[:-1])*0.01
			else: form_params[name_html] = float(v)
		except: error_msg.append('Invalid value "%s" for float variable "%s"!' % (v, name_html))
		#print '<br>', v, form_params[name_html]

for name_html in data_user_logic:
	if form.has_key(name_html):
		form_params[name_html] = True
	else: form_params[name_html] = False

for name_html, info in data_user_file.items(): # won't use this recently, need to revise if use
	if form.has_key(name_html):
		#files = form.getlist(name) # raw_data_files should have several files
		files = form[name_html]
		if type(files) != type([]): files = [files]
		for file in files:
			if not file.file or not file.value: continue
			filename = os.path.join(info["location"], file.filename)
			fp = open(filename, "wb")
			fp.write(file.value)
			fp.close()
			if info['need_entry']: form_params[name_html] = filename # microarray data file need no entry in this dictionary

# convert multivars to list
for k, v in form_params.items():
	if k in MultiVars and type(v) is not types.ListType: form_params[k] = [v] #list(v)

# get grp data
form_params['groups'] = grps = []
form_params['groups_text'] = grps_text = []
for i in range(1, form_params['grp_num']+1):
	# get grps
	grp_nm = 'grps_'+str(i)
	v = form.getvalue(grp_nm, [])
	v = (type(v) is not types.ListType) and [v] or v
	grps.append(list(v))
	# get grp_text
	grp_nm = grp_nm + '_text'
	v = form.getvalue(grp_nm, [])
	v = (type(v) is not types.ListType) and [v] or v
	grps_text.append(v)

if __DEBUG__:
	print cgi_token
	fmitems = form_params.items()
	fmitems.sort()
	for k,v in fmitems:
		print '<br>', k, '\t:\t', v
	sys.exit(0)

# Check validation of form data here

# check contrasts
grp_n = len(form_params.get('groups', []))
grp_nms = map(lambda a:'group'+str(a), range(1, grp_n+1))
contrasts = form_params.get('contrasts', '').strip()
contrasts_raw = contrasts
contrasts = contrasts.lower()
if contrasts: 
	#contnms = sets.Set()
	#map(lambda a:contnms.update(map(lambda b:b.strip(), a.split('-'))), contrasts)
	#contnms = sets.Set(map(lambda a:a.strip(), re.split(r'[,;-]', contrasts )))
	contnms_raw = filter(lambda b:b, map(lambda a:a.strip(), re.split(r'[,;\-\+\(\)/]', contrasts_raw)))
	contnms_raw = filter(lambda a,m=re.compile(r'^(group)?\d+$'):not m.match(a), contnms_raw)
	contnms_raw.sort()
	# if contnms_raw: error_msg.append('Error: Invalid group names used in contrasts: %s' % ', '.join(contnms_raw))

	contnms = sets.Set(re.findall(r'group\d+', contrasts))
	df_nm = list(sets.Set(contnms).difference(grp_nms))
	df_nm.sort()
	df_nm = contnms_raw + df_nm
	if df_nm and form_params['analysis_method'] != 'none': error_msg.append('contrasts contain invalid group names: %s' % (', '.join(df_nm)))
	ref_nm = sets.Set(grp_nms).difference(contnms)
	if ref_nm: ref_nm = list(ref_nm)[0]
	else: ref_nm = 'group1'
	contrasts = re.split(r'[,;]\s*', contrasts )
	form_params['ref_name'] = ref_nm
#if not contrasts and grp_n == 2: 
#	contrasts = ['group2-group1']
#	form_params['ref_name'] = 'group1'
if not contrasts:# and grp_n == 2: 
	contrasts = []
	for i in range(2, grp_n+1):
		contrasts.append('group%d-group1' % i)
	form_params['ref_name'] = 'group1'
form_params['contrasts'] = contrasts
form_params['contrasts_simple'] = filter(lambda a,r=re.compile(r'^\s*group\d+\s*-\s*group\d+\s*$'):r.match(a), contrasts)

reqname = form_params.get('request_name', '')
if not reqname: pass #error_msg.append('Error: no request name offered!<br>')
elif not re.match(r'^[a-zA-Z0-9_]+$', reqname): error_msg.append('Error: Invalid characters in the user ID!<br>') 
#else: form_params['req_name'] = reqname #form_params['request_name']

#if not form_params.get('intensity_file', '') and not form_params.get('target_file', ''): error_msg.append('Error: No intensity data offered!<br>')


for tp, names in necessary_data.items():
	for name in names:
		if not form_params.get(name, None): error_msg.append(name + ' ' + tp + '!<br>')

# check params for ANOVA
if form_params['analysis_method'] == 'ANOVA':
	if form_params['anova_factor'] == 'user_model': # check user model, and add group
		mdl = form_params['user_model'].lower()
		if not re.match(r'[+-]?\s*\w+(\s*\*\s*\w+)*([+-]\s*\w+(\s*\*\s*\w+)*)*(\s*,\s*random\s*=\s*.+)?', mdl): error_msg.append('Invalid user-defined model!')

		uf_in_model = sets.Set(filter(lambda a:a and a not in ('0', '1', 'random'), map(string.strip, re.split(r'[-+*,=~|/]', mdl))) )
		form_params['uf_in_model'] = list(uf_in_model)
		#mdl = mdl.lower()
		if re.search(r',\s*random\s*=\s*\S+', mdl): # mixed-effect model
			#form_params['user_model_parsed'] = 'm <- lme(x ~ %s )' % re.sub(r'(^|(?<=\W))group(?=\W)', 'grps', mdl)
			form_params['user_model_parsed'] = 'm <- lme(x ~ %s, data=uf_dt)' % mdl
			form_params['user_model_disp'] = 'lme(x ~ %s)' % re.sub(r'(^|(?<=\W))[a-z]', lambda a:a.group().capitalize(), mdl) # capitalize the first letter for all factor names
			form_params['is_mixed_model'] = True
		else: # fixed-effect model 
			form_params['user_model_parsed'] = 'm <- lm(x ~ %s, data=uf_dt)' % mdl
			form_params['user_model_disp'] = 'lm(x ~ %s)' % re.sub(r'(^|(?<=\W))[a-z]', lambda a:a.group().capitalize(), mdl) # capitalize the first letter for all factor names
			form_params['is_mixed_model'] = False
		form_params['has_group'] = re.search(r'\bgroup\b', mdl) and True or False
		form_params['has_intercept'] = (not re.search(r'\b0\b|-\s*1\b', mdl)) and True or False
		chk_uf = chk_uf_in_model = True
	elif form_params['anova_factor'] == 'factors_by_user': 
		chk_uf = True
		chk_uf_in_model = False
	else: # 'groups_only' or 'factors_in_db'
		chk_uf = False

	if chk_uf:
		# filter out empty name first
		ufnms, uftps, ufdtps, ufvals = form_params['uf_name'][:], form_params['uf_type'][:], form_params['uf_dtype'][:], form_params['uf_value'][:]
		# all names are converted to lower case
		ufnms = map(string.lower, ufnms)
		# only keep factors to be used
		for i in range(len(ufnms)-1,-1,-1): 
			if not ufnms[i] or (chk_uf_in_model and ufnms[i] not in uf_in_model):
				del ufnms[i], uftps[i], ufdtps[i], ufvals[i]
		# check if names are valid
		bad_nms = []
		nm_str = re.compile(r'^[a-zA-Z]\w*$')
		for nm in ufnms:
			if not nm_str.match(nm): bad_nms.append(nm)
		if bad_nms: error_msg.append('Invalid name%s for user-defined factor%s: %s' % (len(bad_nms)>1 and 's' or '', len(bad_nms)>1 and 's' or '', ', '.join(bad_nms)) )
		form_params['uf_name_parsed'] = ufnms
		# check replicate names
		ufnm_set = sets.Set(ufnms)
		if len(ufnms) > len(ufnm_set):
			ufnm_tmp = ufnms[:]
			map(lambda a:ufnm_tmp.remove(a), ufnm_set)
			error_msg.append('Defined replicated name%s for user-defined factor%s: %s' % (len(ufnm_tmp)>1 and 's' or '', len(ufnm_tmp)>1 and 's' or '', ', '.join(ufnm_tmp) ) )
		# check undefined names
		uf_all = ufnms + ['group']
		if chk_uf_in_model:
			#uf_in_use.difference_update(ufnms + ['group', 'sample','array','platform','individual','dye'])
			uf_all = uf_all + ['sample','array','platform','individual','dye']
			uf_more = uf_in_model.difference(uf_all)
			if uf_more:
				error_msg.append('Undefined factor name%s: %s' % (len(uf_more)>1 and 's' or '', ', '.join(uf_more) ) )
		form_params['uf_nmdic'] = dict(zip(uf_all, map(string.capitalize, uf_all)))
		# check if the number of values are equal to data channels, and check the value type, value levels should be more than 1
		n_chs = sum(map(len, grps))
		bad_n = [] 
		bad_val = []
		less_level = []
		ops = {'integer':int, 'float':float, 'string':str}
		val_str = re.compile(r'[,;]')
		for i in range(len(ufvals)):
			val = map(string.strip, val_str.split(ufvals[i]))
			if len(val) != n_chs: bad_n.append(ufnms[i])
			try: val = map(ops[ufdtps[i]], val)
			except: bad_val.append(ufnms[i])
			if len(sets.Set(val)) < 2: less_level.append(ufnms[i])
			ufvals[i] = val
		if bad_n: error_msg.append('Incorrect length of factor%s: %s (doesn\'t equal to the number of data channels - %d !)' % (len(bad_n)>1 and 's' or '', ', '.join(bad_n), n_chs) )
		if bad_val: error_msg.append('Invalid values for factor%s: %s' % (len(bad_val)>1 and 's' or '', ', '.join(bad_val)) )
		if less_level: error_msg.append('At least two levels are required for factor%s: %s' % (len(less_level)>1 and 's' or '', ', '.join(less_level)) )
		form_params['uf_value_parsed'] = ufvals

if error_msg: 
	error_msg.append('<p><A href ="javascript:history.go(-1)" >Please correct it</a>')
	exitWithInfo('<br>'.join(error_msg))

#exitWithInfo('<br>No problem')

# prepare necessary directories.
#result_subdirs = prepareResultDirs(form_params.get('request_name', ''))


######### pass params to user_params ###############

#if design.get('error_msg', None):
#	error_msg.append(design['error_msg'])
#else:
#	user_params['design'] = design['design']

#for k in form_params.keys():
#	kr = k #namemap.get(k, '') or k.replace('_', '.')
#	if user_params.has_key(kr):
#		user_params[kr] = form_params[k]
# user_params = form_params

###### Special parameters.

if error_msg: 
	error_msg.append('<p><A href ="javascript:history.go(-1)" >Please correct it</a>')
	exitWithInfo('<br>'.join(error_msg))

# prepare necessary directories.
result_subdirs = prepareResultDirs(form_params.get('request_name', ''))


user_params['result_dir'] = result_subdirs['request']['name_full']
user_params['req_name'] = os.path.basename(os.path.normpath(result_subdirs['request']['name_full']))

#has_ref = None

#print cgi_token
#print 'Requests submitted!'
#sys.exit(0)

req_type = TYPE_DBS_ANALYSIS
req_name = user_params['req_name'] #os.path.basename(os.path.normpath(result_subdirs['request']['name_full']))
n = saveReq(req_type, req_name, user_params, username)
if not n: 
	#exitMsg('An error ocurred when accessing database!<p><A href ="javascript:history.go(-1)" >Go back</a>') 
	exitMsg('An error ocurred when accessing database!<p><input type="button" value="Go back" onClick="javascript:history.go(-1)"/></p>') 
	

tellDaemon('New requests')

import time
time.sleep(1)

print cgi_token

#print '<body><iframe width=%s, height=%s frameborder=0 src="%s/ui/browse"</body>' % ('100%', '100%', script_path_url)
#from PythonInsideHTML import PIH
#exec PIH(script_path_file + '/pages/ui_browse.pih').pythonCode()
#sys.exit(0)

print '''<body onload=javascript:window.location.href="%s/ui/browse?page_type='webarraydb_analysis'"></body>''' % (script_path_url)

