trex_perf.py revision 31597431
1#!/router/bin/python-2.7.4
2import h_avc
3
4
5import ConfigParser
6import threading
7import time,signal
8import argparse
9import sys
10import os
11sys.path.append(os.path.join('trex_control_plane', 'stf', 'trex_stf_lib'))
12from trex_client import CTRexClient
13import subprocess
14from time import sleep
15import signal
16import textwrap
17import getpass
18import random
19import datetime
20from datetime import timedelta
21import traceback
22import math
23import re
24import termios
25import errno
26import smtplib
27from email.MIMEMultipart import MIMEMultipart
28from email.MIMEBase import MIMEBase
29from email.MIMEText import MIMEText
30from email.Utils import COMMASPACE, formatdate
31from email import Encoders
32from email.mime.image import MIMEImage
33
34from distutils.version import StrictVersion
35
36class TrexRunException(Exception):
37    def __init__ (self, reason, cmd = None, std_log = None, err_log = None):
38        self.reason = reason
39        self.std_log = std_log
40        self.err_log = err_log
41        # generate the error message
42        self.message = "\nSummary of error:\n\n %s\n" % (reason)
43
44        if std_log:
45            self.message += "\nConsole Log:\n\n %s\n" % (self.std_log)
46
47        if err_log:
48            self.message += "\nStd Error Log:\n\n %s\n" % (self.err_log)
49
50    def __str__(self):
51        return self.message
52
53
54############################# utility functions start #################################
55
56def verify_glibc_version ():
57    x = subprocess.check_output("/usr/bin/ldd --version", shell=True)
58    m = re.match("^ldd \([^\)]+\) (.*)", x)
59    if not m:
60        raise Exception("Cannot determine LDD version")
61    current_version = m.group(1)
62
63    if StrictVersion(current_version) < StrictVersion("2.5"):
64        raise Exception("GNU ldd version required for graph plotting is at least 2.5, system is %s - please run simple 'find'" % current_version)
65
66def get_median(numericValues):
67    theValues = sorted(numericValues)
68    if len(theValues) % 2 == 1:
69        return theValues[(len(theValues)+1)/2-1]
70    else:
71        lower = theValues[len(theValues)/2-1]
72        upper = theValues[len(theValues)/2]
73    return (float(lower + upper)) / 2
74
75def list_to_clusters(l, n):
76    for i in xrange(0, len(l), n):
77        yield l[i:i+n]
78
79def cpu_histo_to_str (cpu_histo):
80    s = "\nCPU Samplings:\n\n"
81    period = 0
82
83    clusters = list(list_to_clusters(cpu_histo, 10))
84
85    for cluster in clusters:
86        period += 10
87        line = "%3s Seconds: [" % period
88
89        cluster += (10 - len(cluster)) * [None]
90
91        for x in cluster:
92            if (x != None):
93                line += "%5.1f%%, " % x
94            else:
95                line += "        "
96
97        line = line[:-2] # trim the comma and space
98        line += " "      # return the space
99
100        line += "]\n"
101
102        s += line
103
104    return s
105
106# Terminal Manager Class
107class TermMng:
108    def __enter__(self):
109        self.fd = sys.stdin.fileno()
110        self.old = termios.tcgetattr(self.fd)
111
112        # copy new and remove echo
113        new = self.old[:]
114        new[3] &= ~termios.ECHO
115
116        self.tcsetattr_flags = termios.TCSAFLUSH
117        if hasattr(termios, 'TCSASOFT'):
118            self.tcsetattr_flags |= termios.TCSASOFT
119
120        termios.tcsetattr(self.fd, self.tcsetattr_flags, new)
121
122    def __exit__ (self ,type, value, traceback):
123        termios.tcsetattr(self.fd, self.tcsetattr_flags, self.old)
124
125############################# utility functions stop #################################
126
127def send_mail(send_from, send_to, subject, html_text, txt_attachments=[], images=[], server="localhost"):
128    assert isinstance(send_to, list)
129    assert isinstance(txt_attachments, list)
130    assert isinstance(images, list)
131
132    # create a multi part message
133    msg = MIMEMultipart()
134    msg['From'] = send_from
135    msg['To'] = COMMASPACE.join(send_to)
136    msg['Date'] = formatdate(localtime=True)
137    msg['Subject'] = subject
138    msg['Cc'] = "imarom@cisco.com"
139
140    # add all images to the text as embbeded images
141    for image in images:
142        html_text += '<br><img src="cid:{0}"><br>'.format(image)
143        fp = open(image, 'rb')
144        image_object = MIMEImage(fp.read())
145        fp.close()
146        image_object.add_header('Content-ID', image)
147        msg.attach(image_object)
148
149    # attach the main report as embedded HTML
150    msg.attach( MIMEText(html_text, 'html') )
151
152    # attach regualr txt files
153    for f in txt_attachments:
154        part = MIMEBase('application', "octet-stream")
155        part.set_payload( open(f,"rb").read() )
156        Encoders.encode_base64(part)
157        part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(f))
158        msg.attach(part)
159
160    smtp = smtplib.SMTP(server)
161    smtp.sendmail(send_from, send_to, msg.as_string())
162    smtp.close()
163
164# convert HTML to image - returning a image file as a string
165def html2image (html_filename, image_filename):
166    cmd = "./phantom/phantomjs ./phantom/rasterize.js {0} {1}".format(html_filename, image_filename)
167    subprocess.call(cmd, shell=True)
168
169    assert os.path.exists(image_filename)
170
171    return (image_filename)
172
173# convert results of run to a string
174def run_results_to_str (results, cond_type):
175    output = ""
176
177    output +=  "M:                  {0:<12.6f}\n".format(results['m'])
178    output +=  "BW:                 {0:<12,.2f} [Mbps]\n".format(results['tx'])
179    output +=  "PPS:                {0:<12,} [pkts]\n".format(int(results['total-pps']))
180    output +=  "CPU:                {0:.4f} %\n".format(results['cpu_util'])
181    output +=  "Maximum Latency:    {0:<12,} [usec]\n".format(int(results['maximum-latency']))
182    output +=  "Average Latency:    {0:<12,} [usec]\n".format(int(results['average-latency']))
183    output +=  "Pkt Drop:           {0:<12,} [pkts]\n".format(int(results['total-pkt-drop']))
184    output +=  "Condition:          {0:<12} ({1})\n".format("Passed" if check_condition(cond_type, results) else "Failed", cond_type_to_str(cond_type))
185
186    return (output)
187
188############################# classes #################################
189class ErrorHandler(object):
190    def __init__ (self, exception, traceback):
191
192        if isinstance(exception, TrexRunException):
193            logger.log("\n*** Script Terminated Due To Trex Failure")
194            logger.log("\n********************** TRex Error - Report **************************\n")
195            logger.log(str(exception))
196            logger.flush()
197
198        elif isinstance(exception, IOError):
199            logger.log("\n*** Script Terminated Due To IO Error")
200            logger.log("\nEither Router address or the Trex config is bad or some file is missing - check traceback below")
201            logger.log("\n********************** IO Error - Report **************************\n")
202            logger.log(str(exception))
203            logger.log(str(traceback))
204            logger.flush()
205
206
207        else:
208            logger.log("\n*** Script Terminated Due To Fatal Error")
209            logger.log("\n********************** Internal Error - Report **************************\n")
210            logger.log(str(exception) + "\n")
211            logger.log(str(traceback))
212            logger.flush()
213
214
215        # call the handler
216        g_kill_cause = "error"
217        os.kill(os.getpid(), signal.SIGUSR1)
218
219
220# simple HTML table
221class HTMLTable:
222    def __init__ (self):
223        self.table_rows = []
224
225    def add_row (self, param, value):
226        self.table_rows.append([param, value])
227
228    def generate_table(self):
229        txt = '<table class="myWideTable" style="width:50%">'
230        txt += "<tr><th>Parameter</th><th>Results</th></tr>"
231
232        for row in self.table_rows:
233            txt += "<tr><td>{0}</td><td>{1}</td></tr>".format(row[0], row[1])
234
235        txt += "</table>"
236
237        return txt
238
239# process results and dispatch it
240class JobReporter:
241    def __init__ (self, job_summary):
242        self.job_summary = job_summary
243        pass
244
245    def __plot_results_to_str (self, plot_results):
246        output = "\nPlotted Points: \n\n"
247        for p in plot_results:
248            output += "BW  : {0:8.2f},  ".format(p['tx'])
249            output += "PPS : {0:8,}  ".format(int(p['total-pps']))
250            output += "CPU : {0:8.2f} %,  ".format(p['cpu_util'])
251            output += "Max Latency : {0:10,},  ".format(int(p['maximum-latency']))
252            output += "Avg Latency : {0:10,},  ".format(int(p['average-latency']))
253            output += "Pkt Drop    : {0:12,},  \n".format(int(p['total-pkt-drop']))
254
255        return (output + "\n")
256
257    def __summary_to_string (self):
258        output = ""
259
260        output += "\n-== Job Completed Successfully ==-\n\n"
261        output += "Job Report:\n\n"
262        output +=  "Job Name:          {0}\n".format(self.job_summary['job_name'])
263        output +=  "YAML file:         {0}\n".format(self.job_summary['yaml'])
264        output +=  "Job Type:          {0}\n".format(self.job_summary['job_type_str'])
265        output +=  "Condition:         {0}\n".format(self.job_summary['cond_name'])
266        output +=  "Job Dir:           {0}\n".format(self.job_summary['job_dir'])
267        output +=  "Job Log:           {0}\n".format(self.job_summary['log_filename'])
268        output +=  "Email Report:      {0}\n".format(self.job_summary['email'])
269        output +=  "Job Total Time:    {0}\n\n".format(self.job_summary['total_run_time'])
270
271        if (self.job_summary.get('find_results') != None):
272            find_results = self.job_summary['find_results']
273            output += ("Maximum BW Point Details:\n\n")
274            output += run_results_to_str(find_results, self.job_summary['cond_type'])
275
276        if (self.job_summary.get('plot_results') != None):
277            plot_results = self.job_summary['plot_results']
278            output += self.__plot_results_to_str(plot_results)
279
280        return output
281
282
283    # simple print to screen of the job summary
284    def print_summary (self):
285        summary = self.__summary_to_string()
286        logger.log(summary)
287
288    def __generate_graph_report (self, plot_results):
289        graph_data = str( [ [x['tx'], x['cpu_util']/100, x['maximum-latency'], x['average-latency']] for x in plot_results ] )
290        table_data = str( [ [x['tx'], x['total-pps'], x['cpu_util']/100, x['norm_cpu'], x['maximum-latency'], x['average-latency'], x['total-pkt-drop']] for x in plot_results ] )
291
292        with open ("graph_template.html", "r") as myfile:
293            data = myfile.read()
294            data = data.replace("!@#$template_fill_head!@#$", self.job_summary['yaml'])
295            data = data.replace("!@#$template_fill_graph!@#$", graph_data[1:(len(graph_data) - 1)])
296            data = data.replace("!@#$template_fill_table!@#$", table_data[1:(len(table_data) - 1)])
297
298        # generate HTML report
299        graph_filename = self.job_summary['graph_filename']
300        text_file = open(graph_filename, "w")
301        text_file.write(str(data))
302        text_file.close()
303
304        return graph_filename
305
306    def __generate_body_report (self):
307        job_setup_table = HTMLTable()
308
309        job_setup_table.add_row("User Name", self.job_summary['user'])
310        job_setup_table.add_row("Job Name", self.job_summary['job_name'])
311        job_setup_table.add_row("Job Type", self.job_summary['job_type_str'])
312        job_setup_table.add_row("Test Condition", self.job_summary['cond_name'])
313        job_setup_table.add_row("YAML File", self.job_summary['yaml'])
314        job_setup_table.add_row("Job Total Time", "{0}".format(self.job_summary['total_run_time']))
315
316        job_summary_table = HTMLTable()
317
318        find_results = self.job_summary['find_results']
319
320        if find_results != None:
321            job_summary_table.add_row("Maximum Bandwidth", "{0:,.2f} [Mbps]".format(find_results['tx']))
322            job_summary_table.add_row("Maximum PPS", "{0:,} [pkts]".format(int(find_results['total-pps'])))
323            job_summary_table.add_row("CPU Util.", "{0:.2f}%".format(find_results['cpu_util']))
324            job_summary_table.add_row("Maximum Latency", "{0:,} [usec]".format(int(find_results['maximum-latency'])))
325            job_summary_table.add_row("Average Latency", "{0:,} [usec]".format(int(find_results['average-latency'])))
326            job_summary_table.add_row("Total Pkt Drop", "{0:,}  [pkts]".format(int(find_results['total-pkt-drop'])))
327
328        with open ("report_template.html", "r") as myfile:
329            data = myfile.read()
330            data = data.replace("!@#$template_fill_job_setup_table!@#$", job_setup_table.generate_table())
331            data = data.replace("!@#$template_fill_job_summary_table!@#$", job_summary_table.generate_table())
332
333        return data
334
335    # create an email report and send to the user
336    def send_email_report (self):
337        images = []
338
339        logger.log("\nCreating E-Mail Report...\n")
340
341        # generate main report
342        report_str = self.__generate_body_report()
343
344        # generate graph report (if exists)
345        plot_results = self.job_summary['plot_results']
346        if plot_results:
347            logger.log("Generating Plot Results HTML ...\n")
348            graph_filename  = self.__generate_graph_report(plot_results)
349            logger.log("Converting HTML to image ...\n")
350            images.append(html2image(graph_filename, graph_filename + ".png"))
351
352        else:
353            graph_filename = None
354
355        # create email
356        from_addr = 'TrexReporter@cisco.com'
357        to_addr = []
358        to_addr.append(self.job_summary['email'])
359        to_addr.append('imarom@cisco.com')
360
361        attachments = []
362        attachments.append(self.job_summary['log_filename'])
363        logger.log("Attaching log {0}...".format(self.job_summary['log_filename']))
364
365        if graph_filename:
366            attachments.append(graph_filename)
367            logger.log("Attaching plotting report {0}...".format(graph_filename))
368
369        logger.flush()
370
371        send_mail(from_addr, to_addr, "TRex Performance Report", report_str, attachments, images)
372        logger.log("\nE-mail sent successfully to: " + self.job_summary['email'])
373
374# dummy logger in case logger creation failed
375class DummyLogger(object):
376    def __init__(self):
377        pass
378
379    def log(self, text, force = False, newline = True):
380        text_out = (text + "\n") if newline else text
381        sys.stdout.write(text_out)
382
383    def console(self, text, force = False, newline = True):
384        self.log(text, force, newline)
385
386    def flush (self):
387        pass
388
389# logger object
390class MyLogger(object):
391
392    def __init__(self, log_filename):
393        # Store the original stdout and stderr
394        sys.stdout.flush()
395        sys.stderr.flush()
396
397        self.stdout_fd = os.dup(sys.stdout.fileno())
398        self.devnull = os.open('/dev/null', os.O_WRONLY)
399        self.log_file = open(log_filename, 'w')
400        self.silenced = False
401        self.pending_log_file_prints = 0
402        self.active = True
403
404    def shutdown (self):
405        self.active = False
406
407    def reactive (self):
408        self.active = True
409
410    # silence all prints from stdout
411    def silence(self):
412        os.dup2(self.devnull, sys.stdout.fileno())
413        self.silenced = True
414
415    # restore stdout status
416    def restore(self):
417        sys.stdout.flush()
418        sys.stderr.flush()
419        # Restore normal stdout
420        os.dup2(self.stdout_fd, sys.stdout.fileno())
421        self.silenced = False
422
423    #print a message to the log (both stdout / log file)
424    def log(self, text, force = False, newline = True):
425        if not self.active:
426            return
427
428        self.log_file.write((text + "\n") if newline else text)
429        self.pending_log_file_prints += 1
430
431        if (self.pending_log_file_prints >= 10):
432             self.log_file.flush()
433             self.pending_log_file_prints = 0
434
435        self.console(text, force, newline)
436
437    # print a message to the console alone
438    def console(self, text, force = False, newline = True):
439        if not self.active:
440            return
441
442        _text = (text + "\n") if newline else text
443
444        # if we are silenced and not forced - go home
445        if self.silenced and not force:
446            return
447
448        if self.silenced:
449            os.write(self.stdout_fd, _text)
450        else:
451            sys.stdout.write(_text)
452
453        sys.stdout.flush()
454
455    # flush
456    def flush(self):
457        sys.stdout.flush()
458        self.log_file.flush()
459
460    def __del__(self):
461        os.close(self.devnull)
462        if self.log_file:
463            self.log_file.flush()
464            self.log_file.close()
465
466
467# simple progress bar
468class ProgressBar(threading.Thread):
469    def __init__(self, time_sec, router):
470        super(ProgressBar, self).__init__()
471        self.active = True
472        self.time_sec = time_sec + 15
473        self.router = router
474
475    def run (self):
476        global g_stop
477
478        col = 40
479        delta_for_sec = float(col) / self.time_sec
480
481        accu = 0.0
482
483        for i in range(self.time_sec):
484            if (self.active == False):
485                # print 100% - done
486                bar = "\r[" + ('#' * col) + "] {0:.2f} %".format(100)
487                logger.console(bar, force = True, newline = False)
488                break
489
490            if (g_stop == True):
491                break
492
493            sleep(1)
494            accu += delta_for_sec
495            bar = "\r[" + ('#' * int(accu)) + (' ' * (col - int(accu))) + "] {0:.2f} %".format( (accu/col) * 100 )
496            bar += " / Router CPU: {0:.2f} %".format(self.router.get_last_cpu_util())
497            logger.console(bar, force = True, newline = False)
498
499        logger.console("\r\n", force = True, newline = False)
500        logger.flush()
501
502    def stop (self):
503        self.active = False
504        self.join()
505
506# global vars
507
508g_stop = False
509logger = DummyLogger()
510
511# cleanup list is a list of callables to be run when cntrl+c is caught
512cleanup_list = []
513
514################ threads ########################
515
516# sampler
517class Sample_Thread (threading.Thread):
518    def __init__(self, threadID, router):
519
520        threading.Thread.__init__(self)
521        self.threadID = threadID
522        self.router = router
523        self.stop = False
524
525    def run(self):
526        self.router.clear_sampling_stats()
527
528        try:
529            while (self.stop==False) and (g_stop==False):
530                self.router.sample_stats()
531                time.sleep(1);
532        except Exception as e:
533            ErrorHandler(e, traceback.format_exc())
534
535    def do_stop(self):
536        self.stop = True
537
538
539def general_cleanup_on_error ():
540    global g_stop
541    global cleanup_list
542
543    # mark all the threads to finish
544    g_stop = True;
545
546    # shutdown and flush the logger
547    logger.shutdown()
548    if logger:
549        logger.flush()
550
551    # execute the registered callables
552    for c in cleanup_list:
553        c()
554
555    # dummy wait for threads to finish (TODO: make this more smart)
556    time.sleep(2)
557    exit(-1)
558
559# just a dummy for preventing chain calls
560def signal_handler_dummy (sig_id, frame):
561    pass
562
563def error_signal_handler (sig_id, frame):
564    # make sure no chain of calls
565    signal.signal(signal.SIGUSR1, signal_handler_dummy)
566    signal.signal(signal.SIGINT, signal_handler_dummy)
567
568    general_cleanup_on_error()
569
570def int_signal_handler(sig_id, frame):
571    # make sure no chain of calls
572    signal.signal(signal.SIGINT, signal_handler_dummy)
573    signal.signal(signal.SIGUSR1, signal_handler_dummy)
574
575    logger.log("\n\nCaught Cntrl+C... Cleaning up!\n\n")
576
577    general_cleanup_on_error()
578
579
580# Trex with sampling
581class CTRexWithRouter:
582    def __init__(self, trex, trex_params):
583        self.trex = trex;
584        self.trex_params = trex_params
585
586        if self.trex_params['router_type'] == "ASR":
587            self.router = h_avc.ASR1k(self.trex_params['router_interface'], self.trex_params['router_port'], self.trex_params['router_password'])
588        elif self.trex_params['router_type'] == "ISR":
589            self.router = h_avc.ISR(self.trex_params['router_interface'], self.trex_params['router_port'], self.trex_params['router_password'])
590        else:
591            raise Exception("unknown router type in config file")
592
593        self.router.connect()
594
595    def get_router (self):
596        return self.router
597
598    def run(self, m, duration):
599
600        self.sample_thread = Sample_Thread(1, self.router)
601        self.sample_thread.start();
602
603        # launch trex
604        try:
605#           trex_res = self.trex.run(m, duration);
606            self.trex.start_trex(c = self.trex_params['trex_cores'],
607                m = m,
608                d = duration,
609                f = self.trex_params['trex_yaml_file'],
610                nc = True,
611                l = self.trex_params['trex_latency'],
612                limit_ports = self.trex_params['trex_limit_ports'])
613            self.trex.sample_to_run_finish(20)      # collect trex-sample every 20 seconds.
614        except Exception:
615            self.sample_thread.do_stop()  # signal to stop
616            self.sample_thread.join()     # wait for it to realy stop
617            raise
618
619        self.sample_thread.do_stop()  # signal to stop
620        self.sample_thread.join()     # wait for it to realy stop
621
622        self.res = self.trex.get_result_obj()
623
624        results = {}
625        results['status'] = True
626        results['trex_results'] = self.res
627        results['avc_results']  = self.router.get_stats()
628
629        return (results)
630        #return(trex_res.get_status() == STATUS_OK);
631
632# sanity checks to see run really went well
633def sanity_test_run (trex_r, avc_r):
634    pass
635    #if (sum(avc_r['cpu_histo']) == 0):
636        #raise h_trex.TrexRunException("CPU utilization from router is zero, check connectivity")
637
638def _trex_run (job_summary, m, duration):
639
640    trex_thread = job_summary['trex_thread']
641
642    p = ProgressBar(duration, trex_thread.get_router())
643    p.start()
644
645    try:
646        results = trex_thread.run(m, duration)
647    except Exception as e:
648        p.stop()
649        raise
650
651    p.stop()
652
653    if (results == None):
654        raise Exception("Failed to run Trex")
655
656    # fetch values
657    trex_r = results['trex_results']
658    avc_r  = results['avc_results']
659
660    sanity_test_run(trex_r, avc_r)
661
662    res_dict = {}
663
664    res_dict['m']  = m
665    total_tx_bps = trex_r.get_last_value("trex-global.data.m_tx_bps")
666    res_dict['tx'] = total_tx_bps / (1000 * 1000)  # EVENTUALLY CONTAINS IN MBPS (EXTRACTED IN BPS)
667
668    res_dict['cpu_util'] = avc_r['cpu_util']
669
670    if int(res_dict['cpu_util']) == 0:
671        res_dict['norm_cpu']=1;
672    else:
673        res_dict['norm_cpu'] = (res_dict['tx'] / res_dict['cpu_util']) * 100
674
675    res_dict['maximum-latency']  = max ( trex_r.get_max_latency().values() ) #trex_r.res['maximum-latency']
676    res_dict['average-latency']  = trex_r.get_avg_latency()['all'] #trex_r.res['average-latency']
677
678    logger.log(cpu_histo_to_str(avc_r['cpu_histo']))
679
680    res_dict['total-pkt-drop']  = trex_r.get_total_drops()
681    res_dict['expected-bps']    = trex_r.get_expected_tx_rate()['m_tx_expected_bps']
682    res_dict['total-pps']       = get_median( trex_r.get_value_list("trex-global.data.m_tx_pps") )#trex_r.res['total-pps']
683    res_dict['m_total_pkt']     = trex_r.get_last_value("trex-global.data.m_total_tx_pkts")
684
685    res_dict['latency_condition'] = job_summary['trex_params']['trex_latency_condition']
686
687    return res_dict
688
689def trex_run (job_summary, m, duration):
690    res = _trex_run (job_summary, m, duration)
691    return res
692
693
694def m_to_mbps (job_summary, m):
695    return (m * job_summary['base_m_unit'])
696
697# find the correct range of M
698def find_m_range (job_summary):
699
700    trex = job_summary['trex']
701    trex_config = job_summary['trex_params']
702
703    # if not provided - guess the correct range of bandwidth
704    if not job_summary['m_range']:
705        m_range = [0.0, 0.0]
706        # 1 Mbps -> 1 Gbps
707        LOW_TX = 1.0 * 1000 * 1000
708        MAX_TX = 1.0 * 1000 * 1000 * 1000
709
710        # for 10g go to 10g
711        if trex_config['trex_machine_type'] == "10G":
712            MAX_TX *= 10
713
714        # dual injection can potentially reach X2 speed
715        if trex_config['trex_is_dual'] == True:
716            MAX_TX *= 2
717
718    else:
719        m_range = job_summary['m_range']
720        LOW_TX = m_range[0] * 1000 * 1000
721        MAX_TX = m_range[1] * 1000 * 1000
722
723
724    logger.log("\nSystem Settings - Min: {0:,} Mbps / Max: {1:,} Mbps".format(LOW_TX / (1000 * 1000), MAX_TX / (1000 * 1000)))
725    logger.log("\nTrying to get system minimum M and maximum M...")
726
727    res_dict = trex_run(job_summary, 1, 30)
728
729    # figure out low / high M
730    m_range[0] = (LOW_TX / res_dict['expected-bps']) * 1
731    m_range[1] = (MAX_TX / res_dict['expected-bps']) * 1
732
733
734    # return both the m_range and the base m unit for future calculation
735    results = {}
736    results['m_range'] = m_range
737    results['base_m_unit'] = res_dict['expected-bps'] /(1000 * 1000)
738
739    return (results)
740
741# calculate points between m_range[0] and m_range[1]
742def calculate_plot_points (job_summary, m_range, plot_count):
743
744    cond_type = job_summary['cond_type']
745    delta_m = (m_range[1] - m_range[0]) / plot_count
746
747    m_current = m_range[0]
748    m_end = m_range[1]
749
750    logger.log("\nStarting Plot Graph Task ...\n")
751    logger.log("Plotting Range Is From: {0:.2f} [Mbps] To: {1:.2f} [Mbps] Over {2} Points".format(m_to_mbps(job_summary, m_range[0]),
752                                                                                                  m_to_mbps(job_summary, m_range[1]),
753                                                                                                  plot_count))
754    logger.log("Delta Between Points is {0:.2f} [Mbps]".format(m_to_mbps(job_summary, delta_m)))
755    plot_points = []
756
757    duration = 180
758
759    iter = 1
760
761    trex = job_summary['trex']
762    while (iter <= plot_count):
763        logger.log("\nPlotting Point [{0}/{1}]:\n".format(iter, plot_count))
764        logger.log("Estimated BW   ~= {0:,.2f} [Mbps]\n".format(m_to_mbps(job_summary, m_current)))
765        logger.log("M               = {0:.6f}".format(m_current))
766        logger.log("Duration        = {0} seconds\n".format(duration))
767
768        res_dict = trex_run(job_summary, m_current, duration)
769        print_trex_results(res_dict, cond_type)
770
771        plot_points.append(dict(res_dict))
772
773        m_current += delta_m
774        iter = iter + 1
775
776        # last point - make sure its the maximum point
777        if (iter == plot_count):
778            m_current = m_range[1]
779
780        #print "waiting for system to stabilize ..."
781        #time.sleep(30);
782
783    return plot_points
784
785
786def cond_type_to_str (cond_type):
787    return "Max Latency" if cond_type=='latency' else "Pkt Drop"
788
789# success condition (latency or drop)
790def check_condition (cond_type, res_dict):
791    if cond_type == 'latency':
792        if res_dict['maximum-latency'] < res_dict['latency_condition']:
793            return True
794        else:
795            return False
796
797    # drop condition is a bit more complex - it should create high latency in addition to 0.2% drop
798    elif cond_type == 'drop':
799        if (res_dict['maximum-latency'] > (res_dict['latency_condition']+2000) ) and (res_dict['total-pkt-drop'] > (0.002 * res_dict['m_total_pkt'])):
800            return False
801        else:
802            return True
803
804    assert(0)
805
806def print_trex_results (res_dict, cond_type):
807    logger.log("\nRun Results:\n")
808    output = run_results_to_str(res_dict, cond_type)
809    logger.log(output)
810
811
812######################## describe a find job ########################
813class FindJob:
814    # init a job object with min / max
815    def __init__ (self, min, max, job_summary):
816        self.min = float(min)
817        self.max = float(max)
818        self.job_summary = job_summary
819        self.cond_type = job_summary['cond_type']
820        self.success_points = []
821        self.iter_num = 1
822        self.found = False
823        self.iter_duration = 300
824
825    def _distance (self):
826        return ( (self.max - self.min) / min(self.max, self.min) )
827
828    def time_to_end (self):
829        time_in_sec = (self.iters_to_end() * self.iter_duration)
830        return timedelta(seconds = time_in_sec)
831
832    def iters_to_end (self):
833        # find 2% point
834        ma = self.max
835        mi = self.min
836        iter = 0
837
838        while True:
839            dist = (ma - mi) / min(ma , mi)
840            if dist < 0.02:
841                break
842            if random.choice(["up", "down"]) == "down":
843                ma = (ma + mi) / 2
844            else:
845                mi = (ma + mi) / 2
846
847            iter += 1
848
849        return (iter)
850
851    def _cur (self):
852        return ( (self.min + self.max) / 2 )
853
854    def _add_success_point (self, res_dict):
855        self.success_points.append(res_dict.copy())
856
857    def _is_found (self):
858        return (self.found)
859
860    def _next_iter_duration (self):
861        return (self.iter_duration)
862
863    # execute iteration
864    def _execute (self):
865        # reset the found var before running
866        self.found = False
867
868        # run and print results
869        res_dict = trex_run(self.job_summary, self._cur(), self.iter_duration)
870
871        self.iter_num += 1
872        cur = self._cur()
873
874        if (self._distance() < 0.02):
875            if (check_condition(self.cond_type, res_dict)):
876                # distance < 2% and success - we are done
877                self.found = True
878            else:
879                # lower to 90% of current and retry
880                self.min = cur * 0.9
881                self.max = cur
882        else:
883            # success
884            if (check_condition(self.cond_type, res_dict)):
885                self.min = cur
886            else:
887                self.max = cur
888
889        if (check_condition(self.cond_type, res_dict)):
890            self._add_success_point(res_dict)
891
892        return res_dict
893
894    # find the max M before
895    def find_max_m (self):
896
897        res_dict = {}
898        while not self._is_found():
899
900            logger.log("\n-> Starting Find Iteration #{0}\n".format(self.iter_num))
901            logger.log("Estimated BW         ~=  {0:,.2f} [Mbps]".format(m_to_mbps(self.job_summary, self._cur())))
902            logger.log("M                     =  {0:.6f}".format(self._cur()))
903            logger.log("Duration              =  {0} seconds".format(self._next_iter_duration()))
904            logger.log("Current BW Range      =  {0:,.2f} [Mbps] / {1:,.2f} [Mbps]".format(m_to_mbps(self.job_summary, self.min), m_to_mbps(self.job_summary, self.max)))
905            logger.log("Est. Iterations Left  =  {0} Iterations".format(self.iters_to_end()))
906            logger.log("Est. Time Left        =  {0}\n".format(self.time_to_end()))
907
908            res_dict = self._execute()
909
910            print_trex_results(res_dict, self.cond_type)
911
912        find_results = res_dict.copy()
913        find_results['max_m'] = self._cur()
914        return (find_results)
915
916######################## describe a plot job ########################
917class PlotJob:
918    def __init__(self, findjob):
919        self.job_summary = findjob.job_summary
920
921        self.plot_points = list(findjob.success_points)
922        self.plot_points.sort(key = lambda item:item['tx'])
923
924    def plot (self, duration = 300):
925        return self.plot_points
926
927        # add points if needed
928        #iter = 0
929        #for point in self.success_points:
930            #iter += 1
931            #logger.log("\nPlotting Point [{0}/{1}]:\n".format(iter, self.plot_count))
932            #logger.log("Estimated BW   ~= {0:,.2f} [Mbps]\n".format(m_to_mbps(self.job_summary, point['m'])))
933            #logger.log("M               = {0:.6f}".format(point['m']))
934            #logger.log("Duration        = {0} seconds\n".format(duration))
935
936            #res_dict = trex_run(self.job_summary, point['m'], duration)
937            #print_trex_results(res_dict, self.job_summary['cond_type'])
938
939            #self.plot_points.append(dict(res_dict))
940
941        #self.plot_points = list(self.success_points)
942
943        #print self.plot_points
944        #self.plot_points.sort(key = lambda item:item['m'])
945        #print self.plot_points
946
947        #return self.plot_points
948
949
950def generate_job_id ():
951    return (str(int(random.getrandbits(32))))
952
953def print_header ():
954    logger.log("--== TRex Performance Tool v1.0 (2014) ==--")
955
956# print startup summary
957def log_startup_summary (job_summary):
958
959    trex = job_summary['trex']
960    trex_config = job_summary['trex_params']
961
962    logger.log("\nWork Request Details:\n")
963    logger.log("Setup Details:\n")
964    logger.log("TRex Config File:   {0}".format(job_summary['config_file']))
965    logger.log("Machine Name:        {0}".format(trex_config['trex_name']))
966    logger.log("TRex Type:          {0}".format(trex_config['trex_machine_type']))
967    logger.log("TRex Dual Int. Tx:  {0}".format(trex_config['trex_is_dual']))
968    logger.log("Router Interface:    {0}".format(trex_config['router_interface']))
969
970    logger.log("\nJob Details:\n")
971    logger.log("Job Name:            {0}".format(job_summary['job_name']))
972    logger.log("YAML file:           {0}".format(job_summary['yaml']))
973    logger.log("Job Type:            {0}".format(job_summary['job_type_str']))
974    logger.log("Condition Type:      {0}".format(job_summary['cond_name']))
975    logger.log("Job Log:             {0}".format(job_summary['log_filename']))
976    logger.log("Email Report:        {0}".format(job_summary['email']))
977
978#   logger.log("\nTrex Command Used:\n{0}".format(trex.build_cmd(1, 10)))
979
980def load_trex_config_params (filename, yaml_file):
981    config = {}
982
983    parser = ConfigParser.ConfigParser()
984
985    try:
986        parser.read(filename)
987
988        config['trex_name'] = parser.get("trex", "machine_name")
989        config['trex_port'] = parser.get("trex", "machine_port")
990        config['trex_hisory_size'] = parser.getint("trex", "history_size")
991
992        config['trex_latency_condition'] = parser.getint("trex", "latency_condition")
993        config['trex_yaml_file'] = yaml_file
994
995        # support legacy data
996        config['trex_latency'] = parser.getint("trex", "latency")
997        config['trex_limit_ports'] = parser.getint("trex", "limit_ports")
998        config['trex_cores'] = parser.getint("trex", "cores")
999        config['trex_machine_type'] = parser.get("trex", "machine_type")
1000        config['trex_is_dual'] = parser.getboolean("trex", "is_dual")
1001
1002        # optional Trex parameters
1003        if parser.has_option("trex", "config_file"):
1004            config['trex_config_file'] = parser.get("trex", "config_file")
1005        else:
1006            config['trex_config_file'] = None
1007
1008        if parser.has_option("trex", "misc_params"):
1009            config['trex_misc_params'] = parser.get("trex", "misc_params")
1010        else:
1011            config['trex_misc_params'] = None
1012
1013        # router section
1014
1015        if parser.has_option("router", "port"):
1016            config['router_port'] = parser.get("router", "port")
1017        else:
1018            # simple telnet port
1019            config['router_port'] = 23
1020
1021        config['router_interface'] = parser.get("router", "interface")
1022        config['router_password'] = parser.get("router", "password")
1023        config['router_type'] = parser.get("router", "type")
1024
1025    except Exception as inst:
1026        raise TrexRunException("\nBad configuration file: '{0}'\n\n{1}".format(filename, inst))
1027
1028    return config
1029
1030def prepare_for_run (job_summary):
1031    global logger
1032
1033    # generate unique id
1034    job_summary['job_id'] = generate_job_id()
1035    job_summary['job_dir'] = "trex_job_{0}".format(job_summary['job_id'])
1036
1037    job_summary['start_time'] = datetime.datetime.now()
1038
1039    if not job_summary['email']:
1040        job_summary['user'] = getpass.getuser()
1041        job_summary['email'] = "{0}@cisco.com".format(job_summary['user'])
1042
1043    # create dir for reports
1044    try:
1045        job_summary['job_dir'] = os.path.abspath( os.path.join(os.getcwd(), 'logs', job_summary['job_dir']) )
1046        print(job_summary['job_dir'])
1047        os.makedirs( job_summary['job_dir'] )
1048
1049    except OSError as err:
1050        if err.errno == errno.EACCES:
1051            # fall back. try creating the dir name at /tmp path
1052            job_summary['job_dir'] = os.path.join("/tmp/", "trex_job_{0}".format(job_summary['job_id']) )
1053            os.makedirs(job_summary['job_dir'])
1054
1055    job_summary['log_filename'] = os.path.join(job_summary['job_dir'], "trex_log_{0}.txt".format(job_summary['job_id']))
1056    job_summary['graph_filename'] = os.path.join(job_summary['job_dir'], "trex_graph_{0}.html".format(job_summary['job_id']))
1057
1058    # init logger
1059    logger = MyLogger(job_summary['log_filename'])
1060
1061    # mark those as not populated yet
1062    job_summary['find_results'] = None
1063    job_summary['plot_results'] = None
1064
1065    # create trex client instance
1066    trex_params = load_trex_config_params(job_summary['config_file'],job_summary['yaml'])
1067    trex = CTRexClient(trex_host = trex_params['trex_name'],
1068        max_history_size = trex_params['trex_hisory_size'],
1069        trex_daemon_port = trex_params['trex_port'])
1070
1071    job_summary['trex'] = trex
1072    job_summary['trex_params'] = trex_params
1073
1074    # create trex task thread
1075    job_summary['trex_thread'] = CTRexWithRouter(trex, trex_params);
1076
1077    # in case of an error we need to call the remote cleanup
1078    cleanup_list.append(trex.stop_trex)
1079
1080    # signal handler
1081    signal.signal(signal.SIGINT, int_signal_handler)
1082    signal.signal(signal.SIGUSR1, error_signal_handler)
1083
1084
1085def after_run (job_summary):
1086
1087    job_summary['total_run_time'] = datetime.datetime.now() - job_summary['start_time']
1088    reporter = JobReporter(job_summary)
1089    reporter.print_summary()
1090    reporter.send_email_report()
1091
1092def launch (job_summary):
1093
1094    prepare_for_run(job_summary)
1095
1096    print_header()
1097
1098    log_startup_summary(job_summary)
1099
1100    # find the correct M range if not provided
1101    range_results = find_m_range(job_summary)
1102
1103    job_summary['base_m_unit'] = range_results['base_m_unit']
1104
1105    if job_summary['m_range']:
1106        m_range = job_summary['m_range']
1107    else:
1108        m_range = range_results['m_range']
1109
1110    logger.log("\nJob Bandwidth Working Range:\n")
1111    logger.log("Min M = {0:.6f} / {1:,.2f} [Mbps] \nMax M = {2:.6f} / {3:,.2f} [Mbps]".format(m_range[0], m_to_mbps(job_summary, m_range[0]), m_range[1], m_to_mbps(job_summary, m_range[1])))
1112
1113    # job time
1114    findjob = FindJob(m_range[0], m_range[1], job_summary)
1115    job_summary['find_results'] = findjob.find_max_m()
1116
1117    if job_summary['job_type'] == "all":
1118        # plot points to graph
1119        plotjob = PlotJob(findjob)
1120        job_summary['plot_results'] = plotjob.plot()
1121
1122    after_run(job_summary)
1123
1124
1125# populate the fields for run
1126def populate_fields (job_summary, args):
1127    job_summary['config_file']  = args.config_file
1128    job_summary['job_type']     = args.job
1129    job_summary['cond_type']    = args.cond_type
1130    job_summary['yaml']         = args.yaml
1131
1132    if args.n:
1133        job_summary['job_name']     = args.n
1134    else:
1135        job_summary['job_name']     = "Nameless"
1136
1137    # did the user provided an email
1138    if args.e:
1139        job_summary['email']        = args.e
1140    else:
1141        job_summary['email']        = None
1142
1143    # did the user provide a range ?
1144    if args.m:
1145        job_summary['m_range'] = args.m
1146    else:
1147        job_summary['m_range'] = None
1148
1149    # some pretty shows
1150    job_summary['cond_name'] = 'Drop Pkt' if (args.cond_type == 'drop') else 'High Latency'
1151
1152    if args.job == "find":
1153        job_summary['job_type_str'] = "Find Max BW"
1154    elif args.job == "plot":
1155        job_summary['job_type_str'] = "Plot Graph"
1156    else:
1157        job_summary['job_type_str'] = "Find Max BW & Plot Graph"
1158
1159    if args.job != "find":
1160        verify_glibc_version()
1161
1162
1163
1164# verify file exists for argparse
1165def is_valid_file (parser, err_msg, filename):
1166    if not os.path.exists(filename):
1167        parser.error("{0}: '{1}'".format(err_msg, filename))
1168    else:
1169        return (filename)  # return an open file handle
1170
1171def entry ():
1172
1173    job_summary = {}
1174
1175    parser = argparse.ArgumentParser()
1176
1177    parser.add_argument("-n", help="Job Name",
1178                        type = str)
1179
1180    parser.add_argument("-m", help="M Range [default: auto calcuation]",
1181                        nargs = 2,
1182                        type = float)
1183
1184    parser.add_argument("-e", help="E-Mail for report [default: whoami@cisco.com]",
1185                        type = str)
1186
1187    parser.add_argument("-c", "--cfg", dest = "config_file", required = True,
1188                        help = "Configuration File For Trex/Router Pair",
1189                        type = lambda x: is_valid_file(parser, "config file does not exists",x))
1190
1191    parser.add_argument("job", help = "Job type",
1192                        type = str,
1193                        choices = ['find', 'plot', 'all'])
1194
1195    parser.add_argument("cond_type", help="type of failure condition",
1196                        type = str,
1197                        choices = ['latency','drop'])
1198
1199    parser.add_argument("-f", "--yaml", dest = "yaml", required = True,
1200                        help="YAML file to use", type = str)
1201
1202    args = parser.parse_args()
1203
1204    with TermMng():
1205        try:
1206            populate_fields(job_summary, args)
1207            launch(job_summary)
1208
1209        except Exception as e:
1210            ErrorHandler(e, traceback.format_exc())
1211
1212    logger.log("\nReport bugs to imarom@cisco.com\n")
1213    g_stop = True
1214
1215def dummy_test ():
1216    job_summary = {}
1217    find_results = {}
1218
1219    job_summary['config_file'] = 'config/trex01-1g.cfg'
1220    job_summary['yaml'] = 'dummy.yaml'
1221    job_summary['email']        = 'imarom@cisco.com'
1222    job_summary['job_name'] = 'test'
1223    job_summary['job_type_str'] = 'test'
1224
1225    prepare_for_run(job_summary)
1226
1227    time.sleep(2)
1228    job_summary['yaml'] = 'dummy.yaml'
1229    job_summary['job_type']  = 'find'
1230    job_summary['cond_name'] = 'Drop'
1231    job_summary['cond_type'] = 'drop'
1232    job_summary['job_id']= 94817231
1233
1234
1235    find_results['tx'] = 210.23
1236    find_results['m'] = 1.292812
1237    find_results['total-pps'] = 1000
1238    find_results['cpu_util'] = 74.0
1239    find_results['maximum-latency'] = 4892
1240    find_results['average-latency'] = 201
1241    find_results['total-pkt-drop'] = 0
1242
1243
1244    findjob = FindJob(1,1,job_summary)
1245    plotjob = PlotJob(findjob)
1246    job_summary['plot_results'] = plotjob.plot()
1247
1248    job_summary['find_results'] = find_results
1249    job_summary['plot_results'] = [{'cpu_util': 2.0,'norm_cpu': 1.0,  'total-pps': 1000, 'expected-bps': 999980.0, 'average-latency': 85.0, 'tx': 0.00207*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 221.0},
1250                                   {'cpu_util': 8.0,'norm_cpu': 1.0,  'total-pps': 1000,'expected-bps': 48500000.0, 'average-latency': 87.0, 'tx': 0.05005*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 279.0},
1251                                   {'cpu_util': 14.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 95990000.0, 'average-latency': 92.0, 'tx': 0.09806*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 273.0},
1252                                   {'cpu_util': 20.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 143490000.0, 'average-latency': 95.0, 'tx': 0.14613*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 271.0},
1253                                   {'cpu_util': 25.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 190980000.0, 'average-latency': 97.0, 'tx': 0.1933*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 302.0},
1254                                   {'cpu_util': 31.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 238480000.0, 'average-latency': 98.0, 'tx': 0.24213*1000, 'total-pkt-drop': 1.0, 'maximum-latency': 292.0},
1255                                   {'cpu_util': 37.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 285970000.0, 'average-latency': 99.0, 'tx': 0.29011*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 344.0},
1256                                   {'cpu_util': 43.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 333470000.0, 'average-latency': 100.0, 'tx': 0.3382*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 351.0},
1257                                   {'cpu_util': 48.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 380970000.0, 'average-latency': 100.0, 'tx': 0.38595*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 342.0},
1258                                   {'cpu_util': 54.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 428460000.0, 'average-latency': 19852.0, 'tx': 0.43438*1000, 'total-pkt-drop': 1826229.0, 'maximum-latency': 25344.0}]
1259
1260
1261
1262    after_run(job_summary)
1263
1264if __name__ == "__main__":
1265    entry ()
1266
1267