perf test: Simplify metric value validation test final report

The original test report was too complicated to read with information
that not really useful. This new update simplify the report which should
largely improve the readibility.

Signed-off-by: Weilin Wang <weilin.wang@intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Samantha Alt <samantha.alt@intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240130180907.639729-1-weilin.wang@intel.com
This commit is contained in:
Weilin Wang 2024-01-30 10:09:07 -08:00 committed by Namhyung Kim
parent 1c84b47f99
commit 8f95b29c73
2 changed files with 126 additions and 107 deletions

View file

@ -1,4 +1,4 @@
#SPDX-License-Identifier: GPL-2.0
# SPDX-License-Identifier: GPL-2.0
import re
import csv
import json
@ -6,36 +6,61 @@ import argparse
from pathlib import Path
import subprocess
class TestError:
def __init__(self, metric: list[str], wl: str, value: list[float], low: float, up=float('nan'), description=str()):
self.metric: list = metric # multiple metrics in relationship type tests
self.workloads = [wl] # multiple workloads possible
self.collectedValue: list = value
self.valueLowBound = low
self.valueUpBound = up
self.description = description
def __repr__(self) -> str:
if len(self.metric) > 1:
return "\nMetric Relationship Error: \tThe collected value of metric {0}\n\
\tis {1} in workload(s): {2} \n\
\tbut expected value range is [{3}, {4}]\n\
\tRelationship rule description: \'{5}\'".format(self.metric, self.collectedValue, self.workloads,
self.valueLowBound, self.valueUpBound, self.description)
elif len(self.collectedValue) == 0:
return "\nNo Metric Value Error: \tMetric {0} returns with no value \n\
\tworkload(s): {1}".format(self.metric, self.workloads)
else:
return "\nWrong Metric Value Error: \tThe collected value of metric {0}\n\
\tis {1} in workload(s): {2}\n\
\tbut expected value range is [{3}, {4}]"\
.format(self.metric, self.collectedValue, self.workloads,
self.valueLowBound, self.valueUpBound)
class Validator:
def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
self.rulefname = rulefname
self.reportfname = reportfname
self.rules = None
self.collectlist:str = metrics
self.collectlist: str = metrics
self.metrics = self.__set_metrics(metrics)
self.skiplist = set()
self.tolerance = t
self.workloads = [x for x in workload.split(",") if x]
self.wlidx = 0 # idx of current workloads
self.allresults = dict() # metric results of all workload
self.allignoremetrics = dict() # metrics with no results or negative results
self.allfailtests = dict()
self.wlidx = 0 # idx of current workloads
self.allresults = dict() # metric results of all workload
self.alltotalcnt = dict()
self.allpassedcnt = dict()
self.allerrlist = dict()
self.results = dict() # metric results of current workload
self.results = dict() # metric results of current workload
# vars for test pass/failure statistics
self.ignoremetrics= set() # metrics with no results or negative results, neg result counts as a failed test
self.failtests = dict()
# metrics with no results or negative results, neg result counts failed tests
self.ignoremetrics = set()
self.totalcnt = 0
self.passedcnt = 0
# vars for errors
self.errlist = list()
# vars for Rule Generator
self.pctgmetrics = set() # Percentage rule
self.pctgmetrics = set() # Percentage rule
# vars for debug
self.datafname = datafname
@ -69,10 +94,10 @@ class Validator:
ensure_ascii=True,
indent=4)
def get_results(self, idx:int = 0):
def get_results(self, idx: int = 0):
return self.results[idx]
def get_bounds(self, lb, ub, error, alias={}, ridx:int = 0) -> list:
def get_bounds(self, lb, ub, error, alias={}, ridx: int = 0) -> list:
"""
Get bounds and tolerance from lb, ub, and error.
If missing lb, use 0.0; missing ub, use float('inf); missing error, use self.tolerance.
@ -85,7 +110,7 @@ class Validator:
tolerance, denormalized base on upper bound value
"""
# init ubv and lbv to invalid values
def get_bound_value (bound, initval, ridx):
def get_bound_value(bound, initval, ridx):
val = initval
if isinstance(bound, int) or isinstance(bound, float):
val = bound
@ -113,10 +138,10 @@ class Validator:
return lbv, ubv, denormerr
def get_value(self, name:str, ridx:int = 0) -> list:
def get_value(self, name: str, ridx: int = 0) -> list:
"""
Get value of the metric from self.results.
If result of this metric is not provided, the metric name will be added into self.ignoremetics and self.errlist.
If result of this metric is not provided, the metric name will be added into self.ignoremetics.
All future test(s) on this metric will fail.
@param name: name of the metric
@ -142,7 +167,7 @@ class Validator:
Check if metrics value are non-negative.
One metric is counted as one test.
Failure: when metric value is negative or not provided.
Metrics with negative value will be added into the self.failtests['PositiveValueTest'] and self.ignoremetrics.
Metrics with negative value will be added into self.ignoremetrics.
"""
negmetric = dict()
pcnt = 0
@ -155,25 +180,27 @@ class Validator:
else:
pcnt += 1
tcnt += 1
# The first round collect_perf() run these metrics with simple workload
# "true". We give metrics a second chance with a longer workload if less
# than 20 metrics failed positive test.
if len(rerun) > 0 and len(rerun) < 20:
second_results = dict()
self.second_test(rerun, second_results)
for name, val in second_results.items():
if name not in negmetric: continue
if name not in negmetric:
continue
if val >= 0:
del negmetric[name]
pcnt += 1
self.failtests['PositiveValueTest']['Total Tests'] = tcnt
self.failtests['PositiveValueTest']['Passed Tests'] = pcnt
if len(negmetric.keys()):
self.ignoremetrics.update(negmetric.keys())
negmessage = ["{0}(={1:.4f})".format(name, val) for name, val in negmetric.items()]
self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue': negmessage})
self.errlist.extend(
[TestError([m], self.workloads[self.wlidx], negmetric[m], 0) for m in negmetric.keys()])
return
def evaluate_formula(self, formula:str, alias:dict, ridx:int = 0):
def evaluate_formula(self, formula: str, alias: dict, ridx: int = 0):
"""
Evaluate the value of formula.
@ -187,10 +214,11 @@ class Validator:
sign = "+"
f = str()
#TODO: support parenthesis?
# TODO: support parenthesis?
for i in range(len(formula)):
if i+1 == len(formula) or formula[i] in ('+', '-', '*', '/'):
s = alias[formula[b:i]] if i+1 < len(formula) else alias[formula[b:]]
s = alias[formula[b:i]] if i + \
1 < len(formula) else alias[formula[b:]]
v = self.get_value(s, ridx)
if not v:
errs.append(s)
@ -228,49 +256,49 @@ class Validator:
alias = dict()
for m in rule['Metrics']:
alias[m['Alias']] = m['Name']
lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
val, f = self.evaluate_formula(rule['Formula'], alias, ridx=rule['RuleIndex'])
lbv, ubv, t = self.get_bounds(
rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
val, f = self.evaluate_formula(
rule['Formula'], alias, ridx=rule['RuleIndex'])
lb = rule['RangeLower']
ub = rule['RangeUpper']
if isinstance(lb, str):
if lb in alias:
lb = alias[lb]
if isinstance(ub, str):
if ub in alias:
ub = alias[ub]
if val == -1:
self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Description':f})
self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [],
lb, ub, rule['Description']))
elif not self.check_bound(val, lbv, ubv, t):
lb = rule['RangeLower']
ub = rule['RangeUpper']
if isinstance(lb, str):
if lb in alias:
lb = alias[lb]
if isinstance(ub, str):
if ub in alias:
ub = alias[ub]
self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Formula':f,
'RangeLower': lb, 'LowerBoundValue': self.get_value(lb),
'RangeUpper': ub, 'UpperBoundValue':self.get_value(ub),
'ErrorThreshold': t, 'CollectedValue': val})
self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [val],
lb, ub, rule['Description']))
else:
self.passedcnt += 1
self.failtests['RelationshipTest']['Passed Tests'] += 1
self.totalcnt += 1
self.failtests['RelationshipTest']['Total Tests'] += 1
return
# Single Metric Test
def single_test(self, rule:dict):
def single_test(self, rule: dict):
"""
Validate if the metrics are in the required value range.
eg. lower_bound <= metrics_value <= upper_bound
One metric is counted as one test in this type of test.
One rule may include one or more metrics.
Failure: when the metric value not provided or the value is outside the bounds.
This test updates self.total_cnt and records failed tests in self.failtest['SingleMetricTest'].
This test updates self.total_cnt.
@param rule: dict with metrics to validate and the value range requirement
"""
lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
lbv, ubv, t = self.get_bounds(
rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
metrics = rule['Metrics']
passcnt = 0
totalcnt = 0
faillist = list()
failures = dict()
rerun = list()
for m in metrics:
@ -286,25 +314,20 @@ class Validator:
second_results = dict()
self.second_test(rerun, second_results)
for name, val in second_results.items():
if name not in failures: continue
if name not in failures:
continue
if self.check_bound(val, lbv, ubv, t):
passcnt += 1
del failures[name]
else:
failures[name] = val
failures[name] = [val]
self.results[0][name] = val
self.totalcnt += totalcnt
self.passedcnt += passcnt
self.failtests['SingleMetricTest']['Total Tests'] += totalcnt
self.failtests['SingleMetricTest']['Passed Tests'] += passcnt
if len(failures.keys()) != 0:
faillist = [{'MetricName':name, 'CollectedValue':val} for name, val in failures.items()]
self.failtests['SingleMetricTest']['Failed Tests'].append({'RuleIndex':rule['RuleIndex'],
'RangeLower': rule['RangeLower'],
'RangeUpper': rule['RangeUpper'],
'ErrorThreshold':rule['ErrorThreshold'],
'Failure':faillist})
self.errlist.extend([TestError([name], self.workloads[self.wlidx], val,
rule['RangeLower'], rule['RangeUpper']) for name, val in failures.items()])
return
@ -312,19 +335,11 @@ class Validator:
"""
Create final report and write into a JSON file.
"""
alldata = list()
for i in range(0, len(self.workloads)):
reportstas = {"Total Rule Count": self.alltotalcnt[i], "Passed Rule Count": self.allpassedcnt[i]}
data = {"Metric Validation Statistics": reportstas, "Tests in Category": self.allfailtests[i],
"Errors":self.allerrlist[i]}
alldata.append({"Workload": self.workloads[i], "Report": data})
json_str = json.dumps(alldata, indent=4)
print("Test validation finished. Final report: ")
print(json_str)
print(self.errlist)
if self.debug:
allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]} for i in range(0, len(self.workloads))]
allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]}
for i in range(0, len(self.workloads))]
self.json_dump(allres, self.datafname)
def check_rule(self, testtype, metric_list):
@ -342,13 +357,13 @@ class Validator:
return True
# Start of Collector and Converter
def convert(self, data: list, metricvalues:dict):
def convert(self, data: list, metricvalues: dict):
"""
Convert collected metric data from the -j output to dict of {metric_name:value}.
"""
for json_string in data:
try:
result =json.loads(json_string)
result = json.loads(json_string)
if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
name = result["metric-unit"].split(" ")[1] if len(result["metric-unit"].split(" ")) > 1 \
else result["metric-unit"]
@ -365,9 +380,10 @@ class Validator:
print(" ".join(command))
cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
data = [x+'}' for x in cmd.stderr.split('}\n') if x]
if data[0][0] != '{':
data[0] = data[0][data[0].find('{'):]
return data
def collect_perf(self, workload: str):
"""
Collect metric data with "perf stat -M" on given workload with -a and -j.
@ -385,14 +401,18 @@ class Validator:
if rule["TestType"] == "RelationshipTest":
metrics = [m["Name"] for m in rule["Metrics"]]
if not any(m not in collectlist[0] for m in metrics):
collectlist[rule["RuleIndex"]] = [",".join(list(set(metrics)))]
collectlist[rule["RuleIndex"]] = [
",".join(list(set(metrics)))]
for idx, metrics in collectlist.items():
if idx == 0: wl = "true"
else: wl = workload
if idx == 0:
wl = "true"
else:
wl = workload
for metric in metrics:
data = self._run_perf(metric, wl)
if idx not in self.results: self.results[idx] = dict()
if idx not in self.results:
self.results[idx] = dict()
self.convert(data, self.results[idx])
return
@ -412,7 +432,8 @@ class Validator:
2) create metric name list
"""
command = ['perf', 'list', '-j', '--details', 'metrics']
cmd = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8')
cmd = subprocess.run(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, encoding='utf-8')
try:
data = json.loads(cmd.stdout)
for m in data:
@ -453,12 +474,12 @@ class Validator:
rules = data['RelationshipRules']
self.skiplist = set([name.lower() for name in data['SkipList']])
self.rules = self.remove_unsupported_rules(rules)
pctgrule = {'RuleIndex':0,
'TestType':'SingleMetricTest',
'RangeLower':'0',
pctgrule = {'RuleIndex': 0,
'TestType': 'SingleMetricTest',
'RangeLower': '0',
'RangeUpper': '100',
'ErrorThreshold': self.tolerance,
'Description':'Metrics in percent unit have value with in [0, 100]',
'Description': 'Metrics in percent unit have value with in [0, 100]',
'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
self.rules.append(pctgrule)
@ -469,8 +490,9 @@ class Validator:
idx += 1
if self.debug:
#TODO: need to test and generate file name correctly
data = {'RelationshipRules':self.rules, 'SupportedMetrics': [{"MetricName": name} for name in self.metrics]}
# TODO: need to test and generate file name correctly
data = {'RelationshipRules': self.rules, 'SupportedMetrics': [
{"MetricName": name} for name in self.metrics]}
self.json_dump(data, self.fullrulefname)
return
@ -482,20 +504,17 @@ class Validator:
@param key: key to the dictionaries (index of self.workloads).
'''
self.allresults[key] = self.results
self.allignoremetrics[key] = self.ignoremetrics
self.allfailtests[key] = self.failtests
self.alltotalcnt[key] = self.totalcnt
self.allpassedcnt[key] = self.passedcnt
self.allerrlist[key] = self.errlist
#Initialize data structures before data validation of each workload
# Initialize data structures before data validation of each workload
def _init_data(self):
testtypes = ['PositiveValueTest', 'RelationshipTest', 'SingleMetricTest']
testtypes = ['PositiveValueTest',
'RelationshipTest', 'SingleMetricTest']
self.results = dict()
self.ignoremetrics= set()
self.ignoremetrics = set()
self.errlist = list()
self.failtests = {k:{'Total Tests':0, 'Passed Tests':0, 'Failed Tests':[]} for k in testtypes}
self.totalcnt = 0
self.passedcnt = 0
@ -525,32 +544,33 @@ class Validator:
testtype = r['TestType']
if not self.check_rule(testtype, r['Metrics']):
continue
if testtype == 'RelationshipTest':
if testtype == 'RelationshipTest':
self.relationship_test(r)
elif testtype == 'SingleMetricTest':
self.single_test(r)
else:
print("Unsupported Test Type: ", testtype)
self.errlist.append("Unsupported Test Type from rule: " + r['RuleIndex'])
self._storewldata(i)
print("Workload: ", self.workloads[i])
print("Total metrics collected: ", self.failtests['PositiveValueTest']['Total Tests'])
print("Non-negative metric count: ", self.failtests['PositiveValueTest']['Passed Tests'])
print("Total Test Count: ", self.totalcnt)
print("Passed Test Count: ", self.passedcnt)
self._storewldata(i)
self.create_report()
return sum(self.alltotalcnt.values()) != sum(self.allpassedcnt.values())
return len(self.errlist) > 0
# End of Class Validator
def main() -> None:
parser = argparse.ArgumentParser(description="Launch metric value validation")
parser = argparse.ArgumentParser(
description="Launch metric value validation")
parser.add_argument("-rule", help="Base validation rule file", required=True)
parser.add_argument("-output_dir", help="Path for validator output file, report file", required=True)
parser.add_argument("-debug", help="Debug run, save intermediate data to files", action="store_true", default=False)
parser.add_argument("-wl", help="Workload to run while data collection", default="true")
parser.add_argument(
"-rule", help="Base validation rule file", required=True)
parser.add_argument(
"-output_dir", help="Path for validator output file, report file", required=True)
parser.add_argument("-debug", help="Debug run, save intermediate data to files",
action="store_true", default=False)
parser.add_argument(
"-wl", help="Workload to run while data collection", default="true")
parser.add_argument("-m", help="Metric list to validate", default="")
args = parser.parse_args()
outpath = Path(args.output_dir)
@ -559,8 +579,8 @@ def main() -> None:
datafile = Path.joinpath(outpath, 'perf_data.json')
validator = Validator(args.rule, reportf, debug=args.debug,
datafname=datafile, fullrulefname=fullrule, workload=args.wl,
metrics=args.m)
datafname=datafile, fullrulefname=fullrule, workload=args.wl,
metrics=args.m)
ret = validator.test()
return ret
@ -569,6 +589,3 @@ def main() -> None:
if __name__ == "__main__":
import sys
sys.exit(main())

View file

@ -19,6 +19,8 @@ echo "Output will be stored in: $tmpdir"
$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
ret=$?
rm -rf $tmpdir
if [ $ret -ne 0 ]; then
echo "Metric validation return with erros. Please check metrics reported with errors."
fi
exit $ret