#!/usr/bin/env python

import sys, os, cgi, re, string, types #, tempfile
import tools

import cgitb; cgitb.enable(display=0, logdir='/tmp')

from cgitools import *
#home_dir = '/home/xxia/public_html' # user data will based on this dir
#users_relative_dir = "users"
#users_dir = os.path.join(home_dir, users_relative_dir)
#R_code_dir = 'R_code' # under cgi-bin

# get db name
exec get_dbstr()

err_msg = []

# Retrieve data from user
form = cgi.FieldStorage()

#print "Content-type:text/html\n\n"
#print form, '<p>'
#for a in form.keys(): print a, ' : ', form.getvalue(a), ' --- ', form[a], '<br>'
#sys.exit(0)

monoVars = {'new_project':1, 'request_name':1, 'prj_name':1, 'prj_kw':1, 'factor':1, 'tissue':1, 'design':1, 'QC':1, 'authors':1, 'journal':1, 'publish_year':1, 'pubmed_id':1, 'data_link':1, 'release_date':1, 'release_year':1, 'release_month':1, 'release_day':1, 'prj_description':1, 'prj_usercols':1, 'cur_db':1, 'new_dyes':1}
# reCh = re.compile(r'') # for channel names
#def isList(k):
#	pass

# webpage_name : DB_col_name for filldbs.py (not for DB directly)
prjVars = {'prj_name':'name', 'prj_kw':'keyword', 'factor':'factor', 'tissue':'tissue', 'design':'design', 'QC':'QC', 'authors':'authors', 'journal':'journal', 'publish_year':'publish_year', 'pubmed_id':'pubmed_id', 'data_link':'data_link', 'release_date':'release_date', 'prj_description':'description', 'prj_usercols':'user_added_cols', 'prjanno_file':'related_files'} #, 'prj_files':'related_files'}

arrayVars = {'identifier':'identifier', 'platform':'platform', 'platform_other':'platform_other', 'channel_num':'channel_num', 'intensity_file':'intensity_file', 'data_type':'data_type', 'intensity_format':'intensity_format', 'hyb_date':'hyb_date', 'protocol_hyb':'protocol_hyb', 'protocol_image':'protocol_image', 'protocol_data':'protocol_data', 'array_kw':'keyword', 'array_description':'description', 'array_usercols':'user_added_cols'}

sampVars = {
	db : {'samp_name':'name', 'organism':'organism', 'samp_tissue':'tissue', 'gender':'gender', 'age':'age', 'individual':'individual_id', 'samp_kw':'keyword', 'samp_description':'description', 'samp_usercols':'user_added_cols'},
	'bac' : {'samp_name':'name', 'organism':'organism', 'subspecie':'subspecie', 'variant':'variant', 'serotype':'serotype', 'strain':'strain', 'samp_kw':'keyword', 'samp_description':'description', 'samp_usercols':'user_added_cols'},
	'prostate' : {'samp_name':'name', 'organism':'organism', 'samp_tissue':'tissue', 'gender':'gender', 'age':'age', 'individual':'individual_id', 'samp_kw':'keyword', 'samp_description':'description',    'provider':'provider', 'state_develop':'state_develop', 'state_disease':'state_disease', 'relapse_status':'relapse_status', 'relapse_time':'relapse_time', 'pre_op_PSA':'pre_op_PSA', 'patient_id':'patient_id', 'cell_type':'cell_type', 'sample_type':'sample_type', 'GLSN':'GLSN', 'meta_site':'meta_site', 'tissue_perc_T':'tissue_perc_T', 'tissue_perc_S':'tissue_perc_S', 'tissue_perc_B':'tissue_perc_B', 'tissue_perc_G':'tissue_perc_G', 'tissue_perc_SMS':'tissue_perc_SMS', 'tissue_perc_NSS':'tissue_perc_NSS', 'capsularlnv':'capsularlnv', 'SM':'SM', 'TNN_1992':'TNN_1992', 'race':'race', 'samp_usercols':'user_added_cols'}
	}
sampVars['bacteria'] = sampVars['bac']

protVars = {'protocol_name':'name', 'protocol_cat':'category', 'prot_kw':'keyword', 'protocol_description':'description', 'prot_usercols':'user_added_cols', 'prot_file':'prot_file'}

pfVars = {'pf_name':'name', 'pf_cat':'category', 'probe_num':'probe_num', 'replicate':'replicate', 'space':'space', 'manufactuer':'manufactuer', 'pf_org':'organism', 'pf_kw':'keyword', 'pf_description':'description', 'pf_avail':'availability', 'probe_file':'probe_file', 'pf_usercols':'user_added_cols'}

chVars = {'samp_array_':'sample_', 'dye_array_':'dye_', 'prot_proc_array_':'protocol_process_', 'prot_tech_array_':'protocol_tech_', 'prot_label_array_':'protocol_label_', 'exp_factor_array_':'exp_factor_', 'image_file_array_':'image_file_', 'image_format_array_':'image_format_'}

secKeys = {'project':['prj_name'], 'array':['identifier'], 'sample':['samp_name'], 'protocol':['protocol_name'], 'platform':['pf_name']} # if the secKeys are not all available, this section will be omitted.

#requiredVars = {'prj_name':'Project name', 'identifier':'Array identifier', 'request_name':'Request name'} # these key names should be offered at least once, values are used for error msg
requiredVars = {'prj_name':'Project name'} #, 'request_name':'Request name'} # these key names should be offered at least once, values are used for error msg
requireOneAtLeast = {'identifier':'array', 'samp_name':'sample', 'protocol_name':'protocol', 'pf_name':'platform'} # one or more of them should be offered
jointVars = {'array':{'identifier':{'platform':'Platform', 'intensity_file':'Intensity file', 'intensity_format':'Intensity file source (format)'}}, 'platform':{'pf_name':{'probe_num':'Probe number', 'replicate':'Replicate', 'space':'Space', 'probe_file':'Probe file'}} } # for each value dict, if the key name is valid, and joint names are not all offered, errors occur.

# put all data into a dict
page_values = {}
for k in form.keys():
	v = form.getvalue(k) #form[k] 
	#if type(v) == types.ListType or monoVars.has_key(k): page_values[k] = v
	#else: page_values[k] = [v]
	# now, leading and tailing blank will be removed!!!
	if type(v) == types.ListType: page_values[k] = map(string.strip, v)
	elif monoVars.has_key(k): page_values[k] = v.strip()
	else: page_values[k] = [v.strip()]

# check platform names for arrays first
pfs = page_values.get('platform', [])
pfs_other = page_values.get('platform_other', [])
for i in range(len(pfs)):
	if not pfs[i]:
		pfs[i] = pfs_other[i]

cur_db = page_values['cur_db']
sampVars = sampVars[db] #sampVars.get(cur_db, sampVars[db])

secVars = {'project':prjVars, 'array':arrayVars, 'sample':sampVars, 'protocol':protVars, 'platform':pfVars}

################ validation ##################
#err_msg = []

# check required named first
for k,webname in requiredVars.items():
	pgvs = page_values[k]
	if monoVars.has_key(k): pgvs = [pgvs]
	for i in range(len(pgvs)):
		if pgvs[i].strip(): break
	else: err_msg.append('%s should be offered!' % webname)

# check if one or more of the following itmes are available
for k in requireOneAtLeast.keys():
	v = page_values[k]
	if type(v) is types.ListType:
		if filter(lambda a:a, v): break
	elif v: break
else: # no one offered
	err_msg.append('One of %s should be defined!' % ', '.join(requireOneAtLeast.values()) )

#print "Content-type:text/html\n\n"
# check jointVars then
for sec, kv in jointVars.items(): # sec - 'array',..., kv - {}
	for k,kv1 in kv.items(): # k - 'identifier', ..., kv1 - {}
		pgvs = page_values[k]
		if monoVars.has_key(k): pgvs = [pgvs]
		for i in range(len(pgvs)):
			pgv = pgvs[i]
			if not pgv.strip(): continue
			for jk, webname in kv1.items(): # jk - dependent names - 'probe_file', .... webname - web info to display
				#if k == 'pf_name': print '<br>',i, jk, 'is', '"%s"' % page_values[jk][i],'</br>'
				if not page_values[jk][i].strip(): err_msg.append('A value should be offered for %s for the %s named "%s"!' % (webname, sec, pgv) )

# every channel should be offered a sample #each named array should has at least one channel
array_nms = page_values['identifier']
for i in range(len(array_nms)):
	if not array_nms[i].strip(): continue
	#for v in page_values['samp_array_'+str(i+1)]:
	#	if v.strip(): break
	#else: err_msg.append('At least one channel should be defined for Array %d!' % (i+1))
	for v in page_values['samp_array_'+str(i+1)]:
		if not v.strip(): 
			err_msg.append('A sample should be selected for each channel of Array %d' % (i+1))
			break;

# request name
#if page_values.get('request_name', '') and not re.match(r'^[a-zA-Z0-9_]+$', page_values['request_name']): err_msg.append('Error: Invalid characters in the user ID!<br>') 
#################### read some special values ####################

req_name = page_values.get('request_name','').strip()
if req_name:
	#if not re.match(r'^[a-zA-Z0-9_]+$', req_name): err_msg.append('Error: Invalid characters in request name!<br>')
	pass # any name should be ok for data depositing
else: 
	#req_name = UniqueNameSeq(prefix='dbreq_', suffix='', dir=result_dir, num_len=3, start_num=len(os.listdir(result_dir))+1)
	# now use prj_name as req_name
	req_name = page_values['prj_name']

# check user-added cols
nm_valid = re.compile(r'^[a-zA-Z][a-zA-Z0-9_]*$')
sep_re = re.compile(r'[,;]')
nms_bad = []
mysql_keys = MYSQL_KEYS
for tbnm, user_cols in {'project':'prj_usercols', 'array':'array_usercols', 'sample':'samp_usercols', 'protocol':'prot_usercols', 'platform':'pf_usercols'}.items():
	user_cols = page_values.get(user_cols, None)
	if not user_cols: continue
	if tbnm=='project': user_cols = [user_cols]
	for kws in user_cols:
		for item in filter(lambda a:a, map(lambda a:a.strip(), sep_re.split(kws))):
			kv = map(lambda a:a.strip(), item.split('=') )
			if len(kv) != 2: 
				err_msg.append('Invalid "column_value = value" pair: "%s" in the "user_added_cols" in "%s" section' % (item, tbnm))
				continue
			k, v = kv
			if mysql_keys.get(k, False): nms_bad.append('"%s" (in section: "%s", column: "user_added_cols", SQL reserved word!)' % (k, tbnm) )
			if not nm_valid.match(k): nms_bad.append('"%s" in (section: "%s", column: "user_added_cols")' % (k, tbnm) )
if nms_bad: err_msg.append('Bad column names: %s' % '; '.join(nms_bad))

if err_msg: 
	err_msg.append('<p><A href ="javascript:history.go(-1)" >Please correct it</a>')
	exitWithInfo('<br>'.join(err_msg))


# make path dict for fillall.py #add path to selected files

# back up page_values before change it
import copy
page_values_orig = copy.deepcopy(page_values)

src_dir = {}

joint_chs = ' /// '
for d, f in [('project', 'prj_files'), ('prjanno', 'prjanno_file'), ('intensity', 'intensity_file'), ('image', 'image_file'), ('probe', 'probe_file'), ('protocol', 'prot_file'), ('other', 'other_file')]:
	dir_name = data_subdirs[d].get('name_full','')
	src_dir[d] = dir_name
	# page_values[f] = map(lambda a:(a and os.path.join(dir_name, a)), page_values[f])
	#if f=='prj_files': page_values[f] = joint_chs.join(page_values[f]) # prj_files should in one line
src_dir['platform'] = src_dir['probe'] # filldbs.py use platform instead of "probe"

#if 'prj_files' in page_values: page_values['prj_files'] = joint_chs.join(page_values['prj_files']) # prj_files should in one line
for fnm in ('prj_files', 'prjanno_file'):
	if fnm in page_values: page_values[fnm] = joint_chs.join(page_values[fnm]) # prj_files should in one line



################ make tbs ###################
# result dict
tbs = {}
new_project = page_values.get('new_project', 'yes') == 'yes'
if not new_project:
	del secVars['project']
idx_dic = {}
for k, v in secVars.items(): 
	pgnms, head = v.keys(), v.values()
	secList = [map(lambda a:a.lower(), head)] # title_line use lowercase
	ModLine = [''] * len(head)
	idx_dic[k] = idx = dict(zip(pgnms, range(len(pgnms))))
	keynames = secKeys[k]
	if type(page_values[pgnms[0]]) is not types.ListType: # is a single value (for project only)
		# check validity first
		valid = True
		for knm in keynames:
			if not page_values[knm].strip():
				valid = False
				break
		if valid:
			line = ModLine[:]
			for nm in pgnms:
				if page_values.has_key(nm): line[idx[nm]] = page_values[nm]
			secList.append(line)
	else: # values are lists
		n = len(page_values[pgnms[0]]) #len(page_values[head[0]])
		for i in range(n):
			# check validity first
			valid = True
			for knm in keynames:
				if not page_values[knm][i].strip():
					valid = False
					break
			if valid:
				line = ModLine[:]
				for nm in pgnms:
					if page_values.has_key(nm): line[idx[nm]] = page_values[nm][i]
				secList.append(line)
	# put it in tbs
	tbs[k] = secList

#################### amend ####################

# amend project

# make user_name and release_date from release_year(/month/day)
if len(tbs.get('project', []))==2: 
	tbs['project'][0].append('user_name')
	tbs['project'][1].append(username)
	tbs['project'][1][idx_dic['project']['release_date']] = '/'.join((page_values['release_year'], page_values['release_month'], page_values['release_day']))

# add project columns for forms if no project form (for update project)
if not new_project: # add project name to each row in each section since there is no project section
	for v in tbs.values():
		v[0].append('project')
		for i in range(1, len(v)): v[i].append(page_values['prj_name'])

# amend array
# release_date and channels
if False:# type(page_values['hyb_year']) is not types.ListType:
	line0 = tbs['array'][0]
	# get channel var names in page
	pgnm = map(lambda a:a+'1', chVars.keys())

	# extend the title
	m = type(page_values['samp_array_1']) == types.ListType and len(page_values['samp_array_1']) or 1 # channel No.
	for j in range(m): line0.extend(map(lambda a:a+str(j+1), chVars.items()))

	# extend data
	line = tbs['array'][1]
	line[idx_dic['array']['hyb_date']] = '/'.join((page_values['hyb_year'], page_values['hyb_month'], page_values['hyb_day']))
	if m > 1: # should be list 
		for j in range(m): line.extend(map(lambda a:page_values[a][j], pgnm))
	else: line.extend(map(lambda a:page_values[a], pgnm))
else:
	n = len(page_values['hyb_year'])
	tb = tbs['array']
	line0 = tb[0]

	anm = 'samp_array_' #chVars.keys()[0]
	# get max channel No.
	# m = max(map(lambda a:len(page_values[anm+str(a+1)]), range(n)))
	# extend the title line
	# for j in range(m): line0.extend(map(lambda a:a+'ch'+str(j+1), chVars.values()))
	ch_max = 0

	for i in range(n):
		# only valid array are used
		array_name = page_values['identifier']
		if type(array_name) is types.ListType: # there are a list of arrays
			if not array_name[i].strip(): continue
		elif not array_name.strip(): # is a single value, but not a valid array
			continue
		line = tb[i+1]	
		# make var name on page
		pgnm = map(lambda a:a+str(i+1), chVars.keys())

		# extend data
		if True: # these data must be lists #type(page_values[pgnm[0]]) is types.ListType:
			ch_samps = page_values[anm + str(i+1)]
			m = len(ch_samps) # len(page_values[pgnm[0]])
			valid_ch = 0
			for j in range(m):
				# only valid channels are used
				if not ch_samps[j].strip(): continue
				valid_ch += 1
				if valid_ch > ch_max: # extend title line
					ch_max += 1
					line0.extend(map(lambda a:a+'ch'+str(valid_ch), chVars.values()))
				line.extend(map(lambda a:page_values.has_key(a) and page_values[a][j] or '', pgnm))
		else: line.extend(map(lambda a:page_values.has_key(a) and page_values[a] or '', pgnm))

		line[idx_dic['array']['hyb_date']] = '/'.join((page_values['hyb_year'][i], page_values['hyb_month'][i], page_values['hyb_day'][i]))

__DEBUG__ = True
__DEBUG__ = False
if __DEBUG__:
	f = open('input.txt', 'w')
	print "Content-type:text/html\n\n"
	for k,v in tbs.items(): 
		f.write('[%s]\n\n' % k)
		print '<p><b>', k, ':</b><br>'
		print '<table>'
		for line in v: 
			f.write('\t'.join(line)+'\n')
			print '<tr>', ''.join(map(lambda a:'<td>'+a+'</td>', line)), '</tr>'
		f.write('\n\n\n')
		print '</table>'
	f.close()
	sys.exit(0)

# prepare necessary directories.
#result_subdirs = prepareResultDirs(form_params.get('request_name', ''))

req_type = TYPE_DBS_FILL_SINGLE #TYPE_FILL_SINGLE 
#req_name = page_values['request_name'] #os.path.basename(os.path.normpath(result_subdirs['request']['name_full']))
user_params = {'fill_type':'By table', 'tbs':tbs, 'src_dir':src_dir, 'user_name':username, 'page_values':page_values_orig, 'cur_db':cur_db}
n = saveReq(req_type, req_name, user_params, username)
#if n is False: exitMsg('An error ocurred when accessing database!')
if not n: exitMsg('An error ocurred on server when submitting the request!<p><input type="button" value="Go back" onClick="javascript:history.go(-1)"/></p>')

tellDaemon('New requests')

import time
time.sleep(1)

print cgi_token
print '''<body onload=javascript:window.location.href="%s/ui/browse?page_type='webarraydb_fill'"></body>''' % (script_path_url)
sys.exit(0)

#print '<body><iframe width=%s, height=%s frameborder=0 src="%s/ui/browse"</body>' % ('100%', '100%', script_path_url)
from PythonInsideHTML import PIH
exec PIH(script_path_file + "/pages/ui_browse.pih?page_type='webarraydb_fill'").pythonCode()

