trex_perf.py revision 8b52a31e
1#!/router/bin/python-2.7.4
2import h_avc
3
4
5from trex_control_plane.client.trex_client import CTRexClient
6import ConfigParser
7import threading
8import time,signal
9import argparse
10import sys
11import os
12import subprocess
13from time import sleep
14import signal
15import textwrap
16import getpass
17import random
18import datetime
19from datetime import timedelta
20import traceback
21import math
22import re
23import termios
24import errno
25import smtplib
26from email.MIMEMultipart import MIMEMultipart
27from email.MIMEBase import MIMEBase
28from email.MIMEText import MIMEText
29from email.Utils import COMMASPACE, formatdate
30from email import Encoders
31from email.mime.image import MIMEImage
32
33from distutils.version import StrictVersion
34
35class TrexRunException(Exception):
36    def __init__ (self, reason, cmd = None, std_log = None, err_log = None):
37        self.reason = reason
38        self.std_log = std_log
39        self.err_log = err_log
40        # generate the error message
41        self.message = "\nSummary of error:\n\n %s\n" % (reason)
42
43        if std_log:
44            self.message += "\nConsole Log:\n\n %s\n" % (self.std_log)
45
46        if err_log:
47            self.message += "\nStd Error Log:\n\n %s\n" % (self.err_log)
48
49    def __str__(self):
50        return self.message
51
52
53############################# utility functions start #################################
54
55def verify_glibc_version ():
56    x = subprocess.check_output("/usr/bin/ldd --version", shell=True)
57    m = re.match("ldd \(GNU libc\) (.*)", x)
58    if not m:
59        raise Exception("Cannot determine LDD version")
60    current_version = m.group(1)
61
62    if StrictVersion(current_version) < StrictVersion("2.5"):
63        raise Exception("GNU ldd version required for graph plotting is at least 2.5, system is %s - please run simple 'find'" % current_version)
64
65def get_median(numericValues):
66    theValues = sorted(numericValues)
67    if len(theValues) % 2 == 1:
68        return theValues[(len(theValues)+1)/2-1]
69    else:
70        lower = theValues[len(theValues)/2-1]
71        upper = theValues[len(theValues)/2]
72    return (float(lower + upper)) / 2
73
74def list_to_clusters(l, n):
75    for i in xrange(0, len(l), n):
76        yield l[i:i+n]
77
78def cpu_histo_to_str (cpu_histo):
79    s = "\nCPU Samplings:\n\n"
80    period = 0
81
82    clusters = list(list_to_clusters(cpu_histo, 10))
83
84    for cluster in clusters:
85        period += 10
86        line = "%3s Seconds: [" % period
87
88        cluster += (10 - len(cluster)) * [None]
89
90        for x in cluster:
91            if (x != None):
92                line += "%5.1f%%, " % x
93            else:
94                line += "        "
95
96        line = line[:-2] # trim the comma and space
97        line += " "      # return the space
98
99        line += "]\n"
100
101        s += line
102
103    return s
104
105# Terminal Manager Class
106class TermMng:
107    def __enter__(self):
108        self.fd = sys.stdin.fileno()
109        self.old = termios.tcgetattr(self.fd)
110
111        # copy new and remove echo
112        new = self.old[:]
113        new[3] &= ~termios.ECHO
114
115        self.tcsetattr_flags = termios.TCSAFLUSH
116        if hasattr(termios, 'TCSASOFT'):
117            self.tcsetattr_flags |= termios.TCSASOFT
118
119        termios.tcsetattr(self.fd, self.tcsetattr_flags, new)
120
121    def __exit__ (self ,type, value, traceback):
122        termios.tcsetattr(self.fd, self.tcsetattr_flags, self.old)
123
124############################# utility functions stop #################################
125
126def send_mail(send_from, send_to, subject, html_text, txt_attachments=[], images=[], server="localhost"):
127    assert isinstance(send_to, list)
128    assert isinstance(txt_attachments, list)
129    assert isinstance(images, list)
130
131    # create a multi part message
132    msg = MIMEMultipart()
133    msg['From'] = send_from
134    msg['To'] = COMMASPACE.join(send_to)
135    msg['Date'] = formatdate(localtime=True)
136    msg['Subject'] = subject
137    msg['Cc'] = "imarom@cisco.com"
138
139    # add all images to the text as embbeded images
140    for image in images:
141        html_text += '<br><img src="cid:{0}"><br>'.format(image)
142        fp = open(image, 'rb')
143        image_object = MIMEImage(fp.read())
144        fp.close()
145        image_object.add_header('Content-ID', image)
146        msg.attach(image_object)
147
148    # attach the main report as embedded HTML
149    msg.attach( MIMEText(html_text, 'html') )
150
151    # attach regualr txt files
152    for f in txt_attachments:
153        part = MIMEBase('application', "octet-stream")
154        part.set_payload( open(f,"rb").read() )
155        Encoders.encode_base64(part)
156        part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(f))
157        msg.attach(part)
158
159    smtp = smtplib.SMTP(server)
160    smtp.sendmail(send_from, send_to, msg.as_string())
161    smtp.close()
162
163# convert HTML to image - returning a image file as a string
164def html2image (html_filename, image_filename):
165    cmd = "./phantom/phantomjs ./phantom/rasterize.js {0} {1}".format(html_filename, image_filename)
166    subprocess.call(cmd, shell=True)
167
168    assert os.path.exists(image_filename)
169
170    return (image_filename)
171
172# convert results of run to a string
173def run_results_to_str (results, cond_type):
174    output = ""
175
176    output +=  "M:                  {0:<12.6f}\n".format(results['m'])
177    output +=  "BW:                 {0:<12,.2f} [Mbps]\n".format(results['tx'])
178    output +=  "PPS:                {0:<12,} [pkts]\n".format(int(results['total-pps']))
179    output +=  "CPU:                {0:.4f} %\n".format(results['cpu_util'])
180    output +=  "Maximum Latency:    {0:<12,} [usec]\n".format(int(results['maximum-latency']))
181    output +=  "Average Latency:    {0:<12,} [usec]\n".format(int(results['average-latency']))
182    output +=  "Pkt Drop:           {0:<12,} [pkts]\n".format(int(results['total-pkt-drop']))
183    output +=  "Condition:          {0:<12} ({1})\n".format("Passed" if check_condition(cond_type, results) else "Failed", cond_type_to_str(cond_type))
184
185    return (output)
186
187############################# classes #################################
188class ErrorHandler(object):
189    def __init__ (self, exception, traceback):
190
191        if isinstance(exception, TrexRunException):
192            logger.log("\n*** Script Terminated Due To Trex Failure")
193            logger.log("\n********************** TRex Error - Report **************************\n")
194            logger.log(str(exception))
195            logger.flush()
196
197        elif isinstance(exception, IOError):
198            logger.log("\n*** Script Terminated Due To IO Error")
199            logger.log("\nEither Router address or the Trex config is bad or some file is missing - check traceback below")
200            logger.log("\n********************** IO Error - Report **************************\n")
201            logger.log(str(exception))
202            logger.log(str(traceback))
203            logger.flush()
204
205
206        else:
207            logger.log("\n*** Script Terminated Due To Fatal Error")
208            logger.log("\n********************** Internal Error - Report **************************\n")
209            logger.log(str(exception) + "\n")
210            logger.log(str(traceback))
211            logger.flush()
212
213
214        # call the handler
215        g_kill_cause = "error"
216        os.kill(os.getpid(), signal.SIGUSR1)
217
218
219# simple HTML table
220class HTMLTable:
221    def __init__ (self):
222        self.table_rows = []
223
224    def add_row (self, param, value):
225        self.table_rows.append([param, value])
226
227    def generate_table(self):
228        txt = '<table class="myWideTable" style="width:50%">'
229        txt += "<tr><th>Parameter</th><th>Results</th></tr>"
230
231        for row in self.table_rows:
232            txt += "<tr><td>{0}</td><td>{1}</td></tr>".format(row[0], row[1])
233
234        txt += "</table>"
235
236        return txt
237
238# process results and dispatch it
239class JobReporter:
240    def __init__ (self, job_summary):
241        self.job_summary = job_summary
242        pass
243
244    def __plot_results_to_str (self, plot_results):
245        output = "\nPlotted Points: \n\n"
246        for p in plot_results:
247            output += "BW  : {0:8.2f},  ".format(p['tx'])
248            output += "PPS : {0:8,}  ".format(int(p['total-pps']))
249            output += "CPU : {0:8.2f} %,  ".format(p['cpu_util'])
250            output += "Max Latency : {0:10,},  ".format(int(p['maximum-latency']))
251            output += "Avg Latency : {0:10,},  ".format(int(p['average-latency']))
252            output += "Pkt Drop    : {0:12,},  \n".format(int(p['total-pkt-drop']))
253
254        return (output + "\n")
255
256    def __summary_to_string (self):
257        output = ""
258
259        output += "\n-== Job Completed Successfully ==-\n\n"
260        output += "Job Report:\n\n"
261        output +=  "Job Name:          {0}\n".format(self.job_summary['job_name'])
262        output +=  "YAML file:         {0}\n".format(self.job_summary['yaml'])
263        output +=  "Job Type:          {0}\n".format(self.job_summary['job_type_str'])
264        output +=  "Condition:         {0}\n".format(self.job_summary['cond_name'])
265        output +=  "Job Dir:           {0}\n".format(self.job_summary['job_dir'])
266        output +=  "Job Log:           {0}\n".format(self.job_summary['log_filename'])
267        output +=  "Email Report:      {0}\n".format(self.job_summary['email'])
268        output +=  "Job Total Time:    {0}\n\n".format(self.job_summary['total_run_time'])
269
270        if (self.job_summary.get('find_results') != None):
271            find_results = self.job_summary['find_results']
272            output += ("Maximum BW Point Details:\n\n")
273            output += run_results_to_str(find_results, self.job_summary['cond_type'])
274
275        if (self.job_summary.get('plot_results') != None):
276            plot_results = self.job_summary['plot_results']
277            output += self.__plot_results_to_str(plot_results)
278
279        return output
280
281
282    # simple print to screen of the job summary
283    def print_summary (self):
284        summary = self.__summary_to_string()
285        logger.log(summary)
286
287    def __generate_graph_report (self, plot_results):
288        graph_data = str( [ [x['tx'], x['cpu_util']/100, x['maximum-latency'], x['average-latency']] for x in plot_results ] )
289        table_data = str( [ [x['tx'], x['total-pps'], x['cpu_util']/100, x['norm_cpu'], x['maximum-latency'], x['average-latency'], x['total-pkt-drop']] for x in plot_results ] )
290
291        with open ("graph_template.html", "r") as myfile:
292            data = myfile.read()
293            data = data.replace("!@#$template_fill_head!@#$", self.job_summary['yaml'])
294            data = data.replace("!@#$template_fill_graph!@#$", graph_data[1:(len(graph_data) - 1)])
295            data = data.replace("!@#$template_fill_table!@#$", table_data[1:(len(table_data) - 1)])
296
297        # generate HTML report
298        graph_filename = self.job_summary['graph_filename']
299        text_file = open(graph_filename, "w")
300        text_file.write(str(data))
301        text_file.close()
302
303        return graph_filename
304
305    def __generate_body_report (self):
306        job_setup_table = HTMLTable()
307
308        job_setup_table.add_row("User Name", self.job_summary['user'])
309        job_setup_table.add_row("Job Name", self.job_summary['job_name'])
310        job_setup_table.add_row("Job Type", self.job_summary['job_type_str'])
311        job_setup_table.add_row("Test Condition", self.job_summary['cond_name'])
312        job_setup_table.add_row("YAML File", self.job_summary['yaml'])
313        job_setup_table.add_row("Job Total Time", "{0}".format(self.job_summary['total_run_time']))
314
315        job_summary_table = HTMLTable()
316
317        find_results = self.job_summary['find_results']
318
319        if find_results != None:
320            job_summary_table.add_row("Maximum Bandwidth", "{0:,.2f} [Mbps]".format(find_results['tx']))
321            job_summary_table.add_row("Maximum PPS", "{0:,} [pkts]".format(int(find_results['total-pps'])))
322            job_summary_table.add_row("CPU Util.", "{0:.2f}%".format(find_results['cpu_util']))
323            job_summary_table.add_row("Maximum Latency", "{0:,} [usec]".format(int(find_results['maximum-latency'])))
324            job_summary_table.add_row("Average Latency", "{0:,} [usec]".format(int(find_results['average-latency'])))
325            job_summary_table.add_row("Total Pkt Drop", "{0:,}  [pkts]".format(int(find_results['total-pkt-drop'])))
326
327        with open ("report_template.html", "r") as myfile:
328            data = myfile.read()
329            data = data.replace("!@#$template_fill_job_setup_table!@#$", job_setup_table.generate_table())
330            data = data.replace("!@#$template_fill_job_summary_table!@#$", job_summary_table.generate_table())
331
332        return data
333
334    # create an email report and send to the user
335    def send_email_report (self):
336        images = []
337
338        logger.log("\nCreating E-Mail Report...\n")
339
340        # generate main report
341        report_str = self.__generate_body_report()
342
343        # generate graph report (if exists)
344        plot_results = self.job_summary['plot_results']
345        if plot_results:
346            logger.log("Generating Plot Results HTML ...\n")
347            graph_filename  = self.__generate_graph_report(plot_results)
348            logger.log("Converting HTML to image ...\n")
349            images.append(html2image(graph_filename, graph_filename + ".png"))
350
351        else:
352            graph_filename = None
353
354        # create email
355        from_addr = 'TrexReporter@cisco.com'
356        to_addr = []
357        to_addr.append(self.job_summary['email'])
358        to_addr.append('imarom@cisco.com')
359
360        attachments = []
361        attachments.append(self.job_summary['log_filename'])
362        logger.log("Attaching log {0}...".format(self.job_summary['log_filename']))
363
364        if graph_filename:
365            attachments.append(graph_filename)
366            logger.log("Attaching plotting report {0}...".format(graph_filename))
367
368        logger.flush()
369
370        send_mail(from_addr, to_addr, "TRex Performance Report", report_str, attachments, images)
371        logger.log("\nE-mail sent successfully to: " + self.job_summary['email'])
372
373# dummy logger in case logger creation failed
374class DummyLogger(object):
375    def __init__(self):
376        pass
377
378    def log(self, text, force = False, newline = True):
379        text_out = (text + "\n") if newline else text
380        sys.stdout.write(text_out)
381
382    def console(self, text, force = False, newline = True):
383        self.log(text, force, newline)
384
385    def flush (self):
386        pass
387
388# logger object
389class MyLogger(object):
390
391    def __init__(self, log_filename):
392        # Store the original stdout and stderr
393        sys.stdout.flush()
394        sys.stderr.flush()
395
396        self.stdout_fd = os.dup(sys.stdout.fileno())
397        self.devnull = os.open('/dev/null', os.O_WRONLY)
398        self.log_file = open(log_filename, 'w')
399        self.silenced = False
400        self.pending_log_file_prints = 0
401        self.active = True
402
403    def shutdown (self):
404        self.active = False
405
406    def reactive (self):
407        self.active = True
408
409    # silence all prints from stdout
410    def silence(self):
411        os.dup2(self.devnull, sys.stdout.fileno())
412        self.silenced = True
413
414    # restore stdout status
415    def restore(self):
416        sys.stdout.flush()
417        sys.stderr.flush()
418        # Restore normal stdout
419        os.dup2(self.stdout_fd, sys.stdout.fileno())
420        self.silenced = False
421
422    #print a message to the log (both stdout / log file)
423    def log(self, text, force = False, newline = True):
424        if not self.active:
425            return
426
427        self.log_file.write((text + "\n") if newline else text)
428        self.pending_log_file_prints += 1
429
430        if (self.pending_log_file_prints >= 10):
431             self.log_file.flush()
432             self.pending_log_file_prints = 0
433
434        self.console(text, force, newline)
435
436    # print a message to the console alone
437    def console(self, text, force = False, newline = True):
438        if not self.active:
439            return
440
441        _text = (text + "\n") if newline else text
442
443        # if we are silenced and not forced - go home
444        if self.silenced and not force:
445            return
446
447        if self.silenced:
448            os.write(self.stdout_fd, _text)
449        else:
450            sys.stdout.write(_text)
451
452        sys.stdout.flush()
453
454    # flush
455    def flush(self):
456        sys.stdout.flush()
457        self.log_file.flush()
458
459    def __del__(self):
460        os.close(self.devnull)
461        if self.log_file:
462            self.log_file.flush()
463            self.log_file.close()
464
465
466# simple progress bar
467class ProgressBar(threading.Thread):
468    def __init__(self, time_sec, router):
469        super(ProgressBar, self).__init__()
470        self.active = True
471        self.time_sec = time_sec + 15
472        self.router = router
473
474    def run (self):
475        global g_stop
476
477        col = 40
478        delta_for_sec = float(col) / self.time_sec
479
480        accu = 0.0
481
482        for i in range(self.time_sec):
483            if (self.active == False):
484                # print 100% - done
485                bar = "\r[" + ('#' * col) + "] {0:.2f} %".format(100)
486                logger.console(bar, force = True, newline = False)
487                break
488
489            if (g_stop == True):
490                break
491
492            sleep(1)
493            accu += delta_for_sec
494            bar = "\r[" + ('#' * int(accu)) + (' ' * (col - int(accu))) + "] {0:.2f} %".format( (accu/col) * 100 )
495            bar += " / Router CPU: {0:.2f} %".format(self.router.get_last_cpu_util())
496            logger.console(bar, force = True, newline = False)
497
498        logger.console("\r\n", force = True, newline = False)
499        logger.flush()
500
501    def stop (self):
502        self.active = False
503        self.join()
504
505# global vars
506
507g_stop = False
508logger = DummyLogger()
509
510# cleanup list is a list of callables to be run when cntrl+c is caught
511cleanup_list = []
512
513################ threads ########################
514
515# sampler
516class Sample_Thread (threading.Thread):
517    def __init__(self, threadID, router):
518
519        threading.Thread.__init__(self)
520        self.threadID = threadID
521        self.router = router
522        self.stop = False
523
524    def run(self):
525        self.router.clear_sampling_stats()
526
527        try:
528            while (self.stop==False) and (g_stop==False):
529                self.router.sample_stats()
530                time.sleep(1);
531        except Exception as e:
532            ErrorHandler(e, traceback.format_exc())
533
534    def do_stop(self):
535        self.stop = True
536
537
538def general_cleanup_on_error ():
539    global g_stop
540    global cleanup_list
541
542    # mark all the threads to finish
543    g_stop = True;
544
545    # shutdown and flush the logger
546    logger.shutdown()
547    if logger:
548        logger.flush()
549
550    # execute the registered callables
551    for c in cleanup_list:
552        c()
553
554    # dummy wait for threads to finish (TODO: make this more smart)
555    time.sleep(2)
556    exit(-1)
557
558# just a dummy for preventing chain calls
559def signal_handler_dummy (sig_id, frame):
560    pass
561
562def error_signal_handler (sig_id, frame):
563    # make sure no chain of calls
564    signal.signal(signal.SIGUSR1, signal_handler_dummy)
565    signal.signal(signal.SIGINT, signal_handler_dummy)
566
567    general_cleanup_on_error()
568
569def int_signal_handler(sig_id, frame):
570    # make sure no chain of calls
571    signal.signal(signal.SIGINT, signal_handler_dummy)
572    signal.signal(signal.SIGUSR1, signal_handler_dummy)
573
574    logger.log("\n\nCaught Cntrl+C... Cleaning up!\n\n")
575
576    general_cleanup_on_error()
577
578
579# Trex with sampling
580class CTRexWithRouter:
581    def __init__(self, trex, trex_params):
582        self.trex = trex;
583        self.trex_params = trex_params
584
585        if self.trex_params['router_type'] == "ASR":
586            self.router = h_avc.ASR1k(self.trex_params['router_interface'], self.trex_params['router_port'], self.trex_params['router_password'])
587        elif self.trex_params['router_type'] == "ISR":
588            self.router = h_avc.ISR(self.trex_params['router_interface'], self.trex_params['router_port'], self.trex_params['router_password'])
589        else:
590            raise Exception("unknown router type in config file")
591
592        self.router.connect()
593
594    def get_router (self):
595        return self.router
596
597    def run(self, m, duration):
598
599        self.sample_thread = Sample_Thread(1, self.router)
600        self.sample_thread.start();
601
602        # launch trex
603        try:
604#           trex_res = self.trex.run(m, duration);
605            self.trex.start_trex(c = self.trex_params['trex_cores'],
606                m = m,
607                d = duration,
608                f = self.trex_params['trex_yaml_file'],
609                nc = True,
610                l = self.trex_params['trex_latency'],
611                limit_ports = self.trex_params['trex_limit_ports'])
612            self.trex.sample_to_run_finish(20)      # collect trex-sample every 20 seconds.
613        except Exception:
614            self.sample_thread.do_stop()  # signal to stop
615            self.sample_thread.join()     # wait for it to realy stop
616            raise
617
618        self.sample_thread.do_stop()  # signal to stop
619        self.sample_thread.join()     # wait for it to realy stop
620
621        self.res = self.trex.get_result_obj()
622
623        results = {}
624        results['status'] = True
625        results['trex_results'] = self.res
626        results['avc_results']  = self.router.get_stats()
627
628        return (results)
629        #return(trex_res.get_status() == STATUS_OK);
630
631# sanity checks to see run really went well
632def sanity_test_run (trex_r, avc_r):
633    pass
634    #if (sum(avc_r['cpu_histo']) == 0):
635        #raise h_trex.TrexRunException("CPU utilization from router is zero, check connectivity")
636
637def _trex_run (job_summary, m, duration):
638
639    trex_thread = job_summary['trex_thread']
640
641    p = ProgressBar(duration, trex_thread.get_router())
642    p.start()
643
644    try:
645        results = trex_thread.run(m, duration)
646    except Exception,e:
647        p.stop()
648        raise
649
650    p.stop()
651
652    if (results == None):
653        raise Exception("Failed to run Trex")
654
655    # fetch values
656    trex_r = results['trex_results']
657    avc_r  = results['avc_results']
658
659    sanity_test_run(trex_r, avc_r)
660
661    res_dict = {}
662
663    res_dict['m']  = m
664    total_tx_bps = trex_r.get_last_value("trex-global.data.m_tx_bps")
665    res_dict['tx'] = total_tx_bps / (1000 * 1000)  # EVENTUALLY CONTAINS IN MBPS (EXTRACTED IN BPS)
666
667    res_dict['cpu_util'] = avc_r['cpu_util']
668
669    if int(res_dict['cpu_util']) == 0:
670        res_dict['norm_cpu']=1;
671    else:
672        res_dict['norm_cpu'] = (res_dict['tx'] / res_dict['cpu_util']) * 100
673
674    res_dict['maximum-latency']  = max ( trex_r.get_max_latency().values() ) #trex_r.res['maximum-latency']
675    res_dict['average-latency']  = trex_r.get_avg_latency()['all'] #trex_r.res['average-latency']
676
677    logger.log(cpu_histo_to_str(avc_r['cpu_histo']))
678
679    res_dict['total-pkt-drop']  = trex_r.get_total_drops()
680    res_dict['expected-bps']    = trex_r.get_expected_tx_rate()['m_tx_expected_bps']
681    res_dict['total-pps']       = get_median( trex_r.get_value_list("trex-global.data.m_tx_pps") )#trex_r.res['total-pps']
682    res_dict['m_total_pkt']     = trex_r.get_last_value("trex-global.data.m_total_tx_pkts")
683
684    res_dict['latency_condition'] = job_summary['trex_params']['trex_latency_condition']
685
686    return res_dict
687
688def trex_run (job_summary, m, duration):
689    res = _trex_run (job_summary, m, duration)
690    return res
691
692
693def m_to_mbps (job_summary, m):
694    return (m * job_summary['base_m_unit'])
695
696# find the correct range of M
697def find_m_range (job_summary):
698
699    trex = job_summary['trex']
700    trex_config = job_summary['trex_params']
701
702    # if not provided - guess the correct range of bandwidth
703    if not job_summary['m_range']:
704        m_range = [0.0, 0.0]
705        # 1 Mbps -> 1 Gbps
706        LOW_TX = 1.0 * 1000 * 1000
707        MAX_TX = 1.0 * 1000 * 1000 * 1000
708
709        # for 10g go to 10g
710        if trex_config['trex_machine_type'] == "10G":
711            MAX_TX *= 10
712
713        # dual injection can potentially reach X2 speed
714        if trex_config['trex_is_dual'] == True:
715            MAX_TX *= 2
716
717    else:
718        m_range = job_summary['m_range']
719        LOW_TX = m_range[0] * 1000 * 1000
720        MAX_TX = m_range[1] * 1000 * 1000
721
722
723    logger.log("\nSystem Settings - Min: {0:,} Mbps / Max: {1:,} Mbps".format(LOW_TX / (1000 * 1000), MAX_TX / (1000 * 1000)))
724    logger.log("\nTrying to get system minimum M and maximum M...")
725
726    res_dict = trex_run(job_summary, 1, 30)
727
728    # figure out low / high M
729    m_range[0] = (LOW_TX / res_dict['expected-bps']) * 1
730    m_range[1] = (MAX_TX / res_dict['expected-bps']) * 1
731
732
733    # return both the m_range and the base m unit for future calculation
734    results = {}
735    results['m_range'] = m_range
736    results['base_m_unit'] = res_dict['expected-bps'] /(1000 * 1000)
737
738    return (results)
739
740# calculate points between m_range[0] and m_range[1]
741def calculate_plot_points (job_summary, m_range, plot_count):
742
743    cond_type = job_summary['cond_type']
744    delta_m = (m_range[1] - m_range[0]) / plot_count
745
746    m_current = m_range[0]
747    m_end = m_range[1]
748
749    logger.log("\nStarting Plot Graph Task ...\n")
750    logger.log("Plotting Range Is From: {0:.2f} [Mbps] To: {1:.2f} [Mbps] Over {2} Points".format(m_to_mbps(job_summary, m_range[0]),
751                                                                                                  m_to_mbps(job_summary, m_range[1]),
752                                                                                                  plot_count))
753    logger.log("Delta Between Points is {0:.2f} [Mbps]".format(m_to_mbps(job_summary, delta_m)))
754    plot_points = []
755
756    duration = 180
757
758    iter = 1
759
760    trex = job_summary['trex']
761    while (iter <= plot_count):
762        logger.log("\nPlotting Point [{0}/{1}]:\n".format(iter, plot_count))
763        logger.log("Estimated BW   ~= {0:,.2f} [Mbps]\n".format(m_to_mbps(job_summary, m_current)))
764        logger.log("M               = {0:.6f}".format(m_current))
765        logger.log("Duration        = {0} seconds\n".format(duration))
766
767        res_dict = trex_run(job_summary, m_current, duration)
768        print_trex_results(res_dict, cond_type)
769
770        plot_points.append(dict(res_dict))
771
772        m_current += delta_m
773        iter = iter + 1
774
775        # last point - make sure its the maximum point
776        if (iter == plot_count):
777            m_current = m_range[1]
778
779        #print "waiting for system to stabilize ..."
780        #time.sleep(30);
781
782    return plot_points
783
784
785def cond_type_to_str (cond_type):
786    return "Max Latency" if cond_type=='latency' else "Pkt Drop"
787
788# success condition (latency or drop)
789def check_condition (cond_type, res_dict):
790    if cond_type == 'latency':
791        if res_dict['maximum-latency'] < res_dict['latency_condition']:
792            return True
793        else:
794            return False
795
796    # drop condition is a bit more complex - it should create high latency in addition to 0.2% drop
797    elif cond_type == 'drop':
798        if (res_dict['maximum-latency'] > (res_dict['latency_condition']+2000) ) and (res_dict['total-pkt-drop'] > (0.002 * res_dict['m_total_pkt'])):
799            return False
800        else:
801            return True
802
803    assert(0)
804
805def print_trex_results (res_dict, cond_type):
806    logger.log("\nRun Results:\n")
807    output = run_results_to_str(res_dict, cond_type)
808    logger.log(output)
809
810
811######################## describe a find job ########################
812class FindJob:
813    # init a job object with min / max
814    def __init__ (self, min, max, job_summary):
815        self.min = float(min)
816        self.max = float(max)
817        self.job_summary = job_summary
818        self.cond_type = job_summary['cond_type']
819        self.success_points = []
820        self.iter_num = 1
821        self.found = False
822        self.iter_duration = 300
823
824    def _distance (self):
825        return ( (self.max - self.min) / min(self.max, self.min) )
826
827    def time_to_end (self):
828        time_in_sec = (self.iters_to_end() * self.iter_duration)
829        return timedelta(seconds = time_in_sec)
830
831    def iters_to_end (self):
832        # find 2% point
833        ma = self.max
834        mi = self.min
835        iter = 0
836
837        while True:
838            dist = (ma - mi) / min(ma , mi)
839            if dist < 0.02:
840                break
841            if random.choice(["up", "down"]) == "down":
842                ma = (ma + mi) / 2
843            else:
844                mi = (ma + mi) / 2
845
846            iter += 1
847
848        return (iter)
849
850    def _cur (self):
851        return ( (self.min + self.max) / 2 )
852
853    def _add_success_point (self, res_dict):
854        self.success_points.append(res_dict.copy())
855
856    def _is_found (self):
857        return (self.found)
858
859    def _next_iter_duration (self):
860        return (self.iter_duration)
861
862    # execute iteration
863    def _execute (self):
864        # reset the found var before running
865        self.found = False
866
867        # run and print results
868        res_dict = trex_run(self.job_summary, self._cur(), self.iter_duration)
869
870        self.iter_num += 1
871        cur = self._cur()
872
873        if (self._distance() < 0.02):
874            if (check_condition(self.cond_type, res_dict)):
875                # distance < 2% and success - we are done
876                self.found = True
877            else:
878                # lower to 90% of current and retry
879                self.min = cur * 0.9
880                self.max = cur
881        else:
882            # success
883            if (check_condition(self.cond_type, res_dict)):
884                self.min = cur
885            else:
886                self.max = cur
887
888        if (check_condition(self.cond_type, res_dict)):
889            self._add_success_point(res_dict)
890
891        return res_dict
892
893    # find the max M before
894    def find_max_m (self):
895
896        res_dict = {}
897        while not self._is_found():
898
899            logger.log("\n-> Starting Find Iteration #{0}\n".format(self.iter_num))
900            logger.log("Estimated BW         ~=  {0:,.2f} [Mbps]".format(m_to_mbps(self.job_summary, self._cur())))
901            logger.log("M                     =  {0:.6f}".format(self._cur()))
902            logger.log("Duration              =  {0} seconds".format(self._next_iter_duration()))
903            logger.log("Current BW Range      =  {0:,.2f} [Mbps] / {1:,.2f} [Mbps]".format(m_to_mbps(self.job_summary, self.min), m_to_mbps(self.job_summary, self.max)))
904            logger.log("Est. Iterations Left  =  {0} Iterations".format(self.iters_to_end()))
905            logger.log("Est. Time Left        =  {0}\n".format(self.time_to_end()))
906
907            res_dict = self._execute()
908
909            print_trex_results(res_dict, self.cond_type)
910
911        find_results = res_dict.copy()
912        find_results['max_m'] = self._cur()
913        return (find_results)
914
915######################## describe a plot job ########################
916class PlotJob:
917    def __init__(self, findjob):
918        self.job_summary = findjob.job_summary
919
920        self.plot_points = list(findjob.success_points)
921        self.plot_points.sort(key = lambda item:item['tx'])
922
923    def plot (self, duration = 300):
924        return self.plot_points
925
926        # add points if needed
927        #iter = 0
928        #for point in self.success_points:
929            #iter += 1
930            #logger.log("\nPlotting Point [{0}/{1}]:\n".format(iter, self.plot_count))
931            #logger.log("Estimated BW   ~= {0:,.2f} [Mbps]\n".format(m_to_mbps(self.job_summary, point['m'])))
932            #logger.log("M               = {0:.6f}".format(point['m']))
933            #logger.log("Duration        = {0} seconds\n".format(duration))
934
935            #res_dict = trex_run(self.job_summary, point['m'], duration)
936            #print_trex_results(res_dict, self.job_summary['cond_type'])
937
938            #self.plot_points.append(dict(res_dict))
939
940        #self.plot_points = list(self.success_points)
941
942        #print self.plot_points
943        #self.plot_points.sort(key = lambda item:item['m'])
944        #print self.plot_points
945
946        #return self.plot_points
947
948
949def generate_job_id ():
950    return (str(int(random.getrandbits(32))))
951
952def print_header ():
953    logger.log("--== T-Trex Performance Tool v1.0 (2014) ==--")
954
955# print startup summary
956def log_startup_summary (job_summary):
957
958    trex = job_summary['trex']
959    trex_config = job_summary['trex_params']
960
961    logger.log("\nWork Request Details:\n")
962    logger.log("Setup Details:\n")
963    logger.log("T-Rex Config File:   {0}".format(job_summary['config_file']))
964    logger.log("Machine Name:        {0}".format(trex_config['trex_name']))
965    logger.log("T-Rex Type:          {0}".format(trex_config['trex_machine_type']))
966    logger.log("T-Rex Dual Int. Tx:  {0}".format(trex_config['trex_is_dual']))
967    logger.log("Router Interface:    {0}".format(trex_config['router_interface']))
968
969    logger.log("\nJob Details:\n")
970    logger.log("Job Name:            {0}".format(job_summary['job_name']))
971    logger.log("YAML file:           {0}".format(job_summary['yaml']))
972    logger.log("Job Type:            {0}".format(job_summary['job_type_str']))
973    logger.log("Condition Type:      {0}".format(job_summary['cond_name']))
974    logger.log("Job Log:             {0}".format(job_summary['log_filename']))
975    logger.log("Email Report:        {0}".format(job_summary['email']))
976
977#   logger.log("\nTrex Command Used:\n{0}".format(trex.build_cmd(1, 10)))
978
979def load_trex_config_params (filename, yaml_file):
980    config = {}
981
982    parser = ConfigParser.ConfigParser()
983
984    try:
985        parser.read(filename)
986
987        config['trex_name'] = parser.get("trex", "machine_name")
988        config['trex_port'] = parser.get("trex", "machine_port")
989        config['trex_hisory_size'] = parser.getint("trex", "history_size")
990
991        config['trex_latency_condition'] = parser.getint("trex", "latency_condition")
992        config['trex_yaml_file'] = yaml_file
993
994        # support legacy data
995        config['trex_latency'] = parser.getint("trex", "latency")
996        config['trex_limit_ports'] = parser.getint("trex", "limit_ports")
997        config['trex_cores'] = parser.getint("trex", "cores")
998        config['trex_machine_type'] = parser.get("trex", "machine_type")
999        config['trex_is_dual'] = parser.getboolean("trex", "is_dual")
1000
1001        # optional Trex parameters
1002        if parser.has_option("trex", "config_file"):
1003            config['trex_config_file'] = parser.get("trex", "config_file")
1004        else:
1005            config['trex_config_file'] = None
1006
1007        if parser.has_option("trex", "misc_params"):
1008            config['trex_misc_params'] = parser.get("trex", "misc_params")
1009        else:
1010            config['trex_misc_params'] = None
1011
1012        # router section
1013
1014        if parser.has_option("router", "port"):
1015            config['router_port'] = parser.get("router", "port")
1016        else:
1017            # simple telnet port
1018            config['router_port'] = 23
1019
1020        config['router_interface'] = parser.get("router", "interface")
1021        config['router_password'] = parser.get("router", "password")
1022        config['router_type'] = parser.get("router", "type")
1023
1024    except Exception as inst:
1025        raise TrexRunException("\nBad configuration file: '{0}'\n\n{1}".format(filename, inst))
1026
1027    return config
1028
1029def prepare_for_run (job_summary):
1030    global logger
1031
1032    # generate unique id
1033    job_summary['job_id'] = generate_job_id()
1034    job_summary['job_dir'] = "trex_job_{0}".format(job_summary['job_id'])
1035
1036    job_summary['start_time'] = datetime.datetime.now()
1037
1038    if not job_summary['email']:
1039        job_summary['user'] = getpass.getuser()
1040        job_summary['email'] = "{0}@cisco.com".format(job_summary['user'])
1041
1042    # create dir for reports
1043    try:
1044        job_summary['job_dir'] = os.path.abspath( os.path.join(os.getcwd(), 'logs', job_summary['job_dir']) )
1045        print job_summary['job_dir']
1046        os.makedirs( job_summary['job_dir'] )
1047
1048    except OSError as err:
1049        if err.errno == errno.EACCES:
1050            # fall back. try creating the dir name at /tmp path
1051            job_summary['job_dir'] = os.path.join("/tmp/", "trex_job_{0}".format(job_summary['job_id']) )
1052            os.makedirs(job_summary['job_dir'])
1053
1054    job_summary['log_filename'] = os.path.join(job_summary['job_dir'], "trex_log_{0}.txt".format(job_summary['job_id']))
1055    job_summary['graph_filename'] = os.path.join(job_summary['job_dir'], "trex_graph_{0}.html".format(job_summary['job_id']))
1056
1057    # init logger
1058    logger = MyLogger(job_summary['log_filename'])
1059
1060    # mark those as not populated yet
1061    job_summary['find_results'] = None
1062    job_summary['plot_results'] = None
1063
1064    # create trex client instance
1065    trex_params = load_trex_config_params(job_summary['config_file'],job_summary['yaml'])
1066    trex = CTRexClient(trex_host = trex_params['trex_name'],
1067        max_history_size = trex_params['trex_hisory_size'],
1068        trex_daemon_port = trex_params['trex_port'])
1069
1070    job_summary['trex'] = trex
1071    job_summary['trex_params'] = trex_params
1072
1073    # create trex task thread
1074    job_summary['trex_thread'] = CTRexWithRouter(trex, trex_params);
1075
1076    # in case of an error we need to call the remote cleanup
1077    cleanup_list.append(trex.stop_trex)
1078
1079    # signal handler
1080    signal.signal(signal.SIGINT, int_signal_handler)
1081    signal.signal(signal.SIGUSR1, error_signal_handler)
1082
1083
1084def after_run (job_summary):
1085
1086    job_summary['total_run_time'] = datetime.datetime.now() - job_summary['start_time']
1087    reporter = JobReporter(job_summary)
1088    reporter.print_summary()
1089    reporter.send_email_report()
1090
1091def launch (job_summary):
1092
1093    prepare_for_run(job_summary)
1094
1095    print_header()
1096
1097    log_startup_summary(job_summary)
1098
1099    # find the correct M range if not provided
1100    range_results = find_m_range(job_summary)
1101
1102    job_summary['base_m_unit'] = range_results['base_m_unit']
1103
1104    if job_summary['m_range']:
1105        m_range = job_summary['m_range']
1106    else:
1107        m_range = range_results['m_range']
1108
1109    logger.log("\nJob Bandwidth Working Range:\n")
1110    logger.log("Min M = {0:.6f} / {1:,.2f} [Mbps] \nMax M = {2:.6f} / {3:,.2f} [Mbps]".format(m_range[0], m_to_mbps(job_summary, m_range[0]), m_range[1], m_to_mbps(job_summary, m_range[1])))
1111
1112    # job time
1113    findjob = FindJob(m_range[0], m_range[1], job_summary)
1114    job_summary['find_results'] = findjob.find_max_m()
1115
1116    if job_summary['job_type'] == "all":
1117        # plot points to graph
1118        plotjob = PlotJob(findjob)
1119        job_summary['plot_results'] = plotjob.plot()
1120
1121    after_run(job_summary)
1122
1123
1124# populate the fields for run
1125def populate_fields (job_summary, args):
1126    job_summary['config_file']  = args.config_file
1127    job_summary['job_type']     = args.job
1128    job_summary['cond_type']    = args.cond_type
1129    job_summary['yaml']         = args.yaml
1130
1131    if args.n:
1132        job_summary['job_name']     = args.n
1133    else:
1134        job_summary['job_name']     = "Nameless"
1135
1136    # did the user provided an email
1137    if args.e:
1138        job_summary['email']        = args.e
1139    else:
1140        job_summary['email']        = None
1141
1142    # did the user provide a range ?
1143    if args.m:
1144        job_summary['m_range'] = args.m
1145    else:
1146        job_summary['m_range'] = None
1147
1148    # some pretty shows
1149    job_summary['cond_name'] = 'Drop Pkt' if (args.cond_type == 'drop') else 'High Latency'
1150
1151    if args.job == "find":
1152        job_summary['job_type_str'] = "Find Max BW"
1153    elif args.job == "plot":
1154        job_summary['job_type_str'] = "Plot Graph"
1155    else:
1156        job_summary['job_type_str'] = "Find Max BW & Plot Graph"
1157
1158    if args.job != "find":
1159        verify_glibc_version()
1160
1161
1162
1163# verify file exists for argparse
1164def is_valid_file (parser, err_msg, filename):
1165    if not os.path.exists(filename):
1166        parser.error("{0}: '{1}'".format(err_msg, filename))
1167    else:
1168        return (filename)  # return an open file handle
1169
1170def entry ():
1171
1172    job_summary = {}
1173
1174    parser = argparse.ArgumentParser()
1175
1176    parser.add_argument("-n", help="Job Name",
1177                        type = str)
1178
1179    parser.add_argument("-m", help="M Range [default: auto calcuation]",
1180                        nargs = 2,
1181                        type = float)
1182
1183    parser.add_argument("-e", help="E-Mail for report [default: whoami@cisco.com]",
1184                        type = str)
1185
1186    parser.add_argument("-c", "--cfg", dest = "config_file", required = True,
1187                        help = "Configuration File For Trex/Router Pair",
1188                        type = lambda x: is_valid_file(parser, "config file does not exists",x))
1189
1190    parser.add_argument("job", help = "Job type",
1191                        type = str,
1192                        choices = ['find', 'plot', 'all'])
1193
1194    parser.add_argument("cond_type", help="type of failure condition",
1195                        type = str,
1196                        choices = ['latency','drop'])
1197
1198    parser.add_argument("-f", "--yaml", dest = "yaml", required = True,
1199                        help="YAML file to use", type = str)
1200
1201    args = parser.parse_args()
1202
1203    with TermMng():
1204        try:
1205            populate_fields(job_summary, args)
1206            launch(job_summary)
1207
1208        except Exception as e:
1209            ErrorHandler(e, traceback.format_exc())
1210
1211    logger.log("\nReport bugs to imarom@cisco.com\n")
1212    g_stop = True
1213
1214def dummy_test ():
1215    job_summary = {}
1216    find_results = {}
1217
1218    job_summary['config_file'] = 'config/trex01-1g.cfg'
1219    job_summary['yaml'] = 'dummy.yaml'
1220    job_summary['email']        = 'imarom@cisco.com'
1221    job_summary['job_name'] = 'test'
1222    job_summary['job_type_str'] = 'test'
1223
1224    prepare_for_run(job_summary)
1225
1226    time.sleep(2)
1227    job_summary['yaml'] = 'dummy.yaml'
1228    job_summary['job_type']  = 'find'
1229    job_summary['cond_name'] = 'Drop'
1230    job_summary['cond_type'] = 'drop'
1231    job_summary['job_id']= 94817231
1232
1233
1234    find_results['tx'] = 210.23
1235    find_results['m'] = 1.292812
1236    find_results['total-pps'] = 1000
1237    find_results['cpu_util'] = 74.0
1238    find_results['maximum-latency'] = 4892
1239    find_results['average-latency'] = 201
1240    find_results['total-pkt-drop'] = 0
1241
1242
1243    findjob = FindJob(1,1,job_summary)
1244    plotjob = PlotJob(findjob)
1245    job_summary['plot_results'] = plotjob.plot()
1246
1247    job_summary['find_results'] = find_results
1248    job_summary['plot_results'] = [{'cpu_util': 2.0,'norm_cpu': 1.0,  'total-pps': 1000, 'expected-bps': 999980.0, 'average-latency': 85.0, 'tx': 0.00207*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 221.0},
1249                                   {'cpu_util': 8.0,'norm_cpu': 1.0,  'total-pps': 1000,'expected-bps': 48500000.0, 'average-latency': 87.0, 'tx': 0.05005*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 279.0},
1250                                   {'cpu_util': 14.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 95990000.0, 'average-latency': 92.0, 'tx': 0.09806*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 273.0},
1251                                   {'cpu_util': 20.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 143490000.0, 'average-latency': 95.0, 'tx': 0.14613*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 271.0},
1252                                   {'cpu_util': 25.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 190980000.0, 'average-latency': 97.0, 'tx': 0.1933*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 302.0},
1253                                   {'cpu_util': 31.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 238480000.0, 'average-latency': 98.0, 'tx': 0.24213*1000, 'total-pkt-drop': 1.0, 'maximum-latency': 292.0},
1254                                   {'cpu_util': 37.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 285970000.0, 'average-latency': 99.0, 'tx': 0.29011*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 344.0},
1255                                   {'cpu_util': 43.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 333470000.0, 'average-latency': 100.0, 'tx': 0.3382*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 351.0},
1256                                   {'cpu_util': 48.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 380970000.0, 'average-latency': 100.0, 'tx': 0.38595*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 342.0},
1257                                   {'cpu_util': 54.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 428460000.0, 'average-latency': 19852.0, 'tx': 0.43438*1000, 'total-pkt-drop': 1826229.0, 'maximum-latency': 25344.0}]
1258
1259
1260
1261    after_run(job_summary)
1262
1263if __name__ == "__main__":
1264    entry ()
1265
1266