TRexDataAnalysis.py revision f6641c89
1#!/scratch/Anaconda2.4.0/bin/python
2import pandas as pd
3import numpy as np
4import matplotlib
5matplotlib.use('Agg')
6from matplotlib import pyplot as plt
7import os
8
9PATH_FOR_GRAPHS = 'Z:/trex/trex-doc/images/'
10
11
12def convert_dict_to_dframe(data, categories, index=''):
13	data_input = {}
14	for category in categories:
15		data_input[category] = data[category]
16	if index:
17		df = pd.DataFrame(data_input, index=data[index])
18	else:
19		df = pd.DataFrame(data_input)
20	return df
21
22
23def plot_bar_by_category(data_frame, category, index='', graph_name='graph.png', show='', gtitle='', save_path=''):
24	if index:
25		data_frame = data_frame.sort_index(by=index)
26		print data_frame[index]
27	else:
28		print data_frame
29	data_frame = pd.DataFrame(data_frame[category], columns=category).astype(float)
30	data_frame.plot(kind='bar')
31	plt.xticks(rotation='horizontal')
32	plt.title(gtitle)
33	if save_path:
34		plt.savefig(save_path + graph_name)
35	if show:
36		plt.show()
37
38
39def generate_csv(data_frame, file_name, save_path=(os.getcwd() + "/")):
40	f = open(save_path + file_name, 'w')
41	data_frame.to_csv(f)
42	f.close()
43
44
45# category is an array of category names that will appear as metrics
46def plot_bar_by_test_name(data_frame, test_name, category, graph_name='graph.png', show='', gtitle='', save_path=''):
47	data_frame = data_frame[data_frame['Test_name'] == test_name]
48	plot_bar_by_category(data_frame, category, 'Test_name', graph_name, show, gtitle=test_name, save_path=save_path)
49
50
51def generate_dframe_for_test(test_name, test_data):
52	test_results = []
53	test_mins = set()
54	test_maxs = set()
55	for query in test_data:
56		test_results.append(float(query[3]))
57		test_mins.add(float(query[4]))
58		test_maxs.add(float(query[5]))
59	df = pd.DataFrame({test_name: test_results})
60	stats = tuple([float(df.mean()), min(test_mins), max(test_maxs)])  # stats = (avg_mpps,min,max)
61	return df, stats
62
63
64def generate_dframe_arr_and_stats_of_tests_per_setup(date, setup_name, setup_dict):
65	dframe_arr_trend = []
66	stats_arr = []
67	dframe_arr_latest = []
68	test_names = setup_dict.keys()
69	for test in test_names:
70		df, stats = generate_dframe_for_test(test, setup_dict[test])
71		dframe_arr_trend.append(df)
72		stats_arr.append(stats)
73		df_latest = float(setup_dict[test][-1][3])
74		dframe_arr_latest.append(df_latest)
75	dframe_arr_latest = pd.DataFrame({'Date': [date] * len(dframe_arr_latest),
76									  'Setup': [setup_name],
77									  'Test Name': test_names,
78									  'MPPS': dframe_arr_latest},
79									 index=range(1, len(dframe_arr_latest) + 1))
80	stats_df = pd.DataFrame(stats_arr, index=setup_dict.keys(), columns=['Avg MPPS', 'Golden Min', 'Golden Max'])
81	stats_df.index.name = 'Test Name'
82	return dframe_arr_trend, stats_df, dframe_arr_latest
83
84
85def create_plot_for_dframe_arr(dframe_arr, setup_name, start_date, end_date, show='no', save_path='',
86							   file_name='trend_graph'):
87	dframe_all = pd.concat(dframe_arr, axis=1)
88	dframe_all = dframe_all.astype(float)
89	dframe_all.plot()
90	plt.legend(fontsize='small', loc='best')
91	plt.ylabel('MPPS')
92	plt.title('Setup: ' + setup_name)
93	plt.tick_params(
94		axis='x',
95		which='both',
96		bottom='off',
97		top='off',
98		labelbottom='off')
99	plt.xlabel('Time Period: ' + start_date + ' - ' + end_date)
100	if save_path:
101		plt.savefig(save_path + setup_name + file_name + '.png')
102	if show == 'yes':
103		plt.show()
104
105
106def create_bar_plot_for_latest_runs_per_setup(dframe_all_tests_latest, setup_name, show='no', save_path=''):
107	plt.figure()
108	dframe_all_tests_latest['MPPS'].plot(kind='bar', legend=False)
109	dframe_all_tests_latest = dframe_all_tests_latest[['Test Name', 'Setup', 'Date', 'MPPS']]
110	plt.xticks(rotation='horizontal')
111	plt.xlabel('Index of Tests')
112	plt.ylabel('MPPS')
113	plt.title("Test Runs for Setup: " + setup_name)
114	if save_path:
115		plt.savefig(save_path + setup_name + '_latest_test_runs.png')
116		dframe_all_tests_latest = dframe_all_tests_latest.round(2)
117		dframe_all_tests_latest.to_csv(save_path + setup_name + '_latest_test_runs_stats.csv')
118	if show == 'yes':
119		plt.show()
120
121
122def create_all_data_per_setup(setup_dict, setup_name, start_date, end_date, show='no', save_path='', add_stats=''):
123	dframe_arr, stats_arr, dframe_latest_arr = generate_dframe_arr_and_stats_of_tests_per_setup(end_date, setup_name,
124																								setup_dict)
125	create_bar_plot_for_latest_runs_per_setup(dframe_latest_arr, setup_name, show=show, save_path=save_path)
126	create_plot_for_dframe_arr(dframe_arr, setup_name, start_date, end_date, show, save_path)
127	if add_stats:
128		stats_arr = stats_arr.round(2)
129		stats_arr.to_csv(save_path + setup_name + '_trend_stats.csv')
130	plt.close('all')
131
132
133def create_all_data(ga_data, setup_names, start_date, end_date, save_path='', add_stats=''):
134	for setup_name in setup_names:
135		create_all_data_per_setup(ga_data[setup_name], setup_name, start_date, end_date, show='no', save_path=save_path,
136								  add_stats=add_stats)
137