qemu/tests/avocado/tesseract_utils.py
Thomas Huth 645198d58b tests/avocado: Allow newer versions of tesseract in the nextcube test
Current Linux distros ship version 5 of the tesseract OCR software,
so the nextcube screen test is ignored there. Let's make the check
more flexible to allow newer versions, too, and remove the old v3
test since most Linux distros don't ship this version anymore.

Message-ID: <20231101204323.35533-1-huth@tuxfamily.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2023-11-07 19:26:50 +01:00

47 lines
1.4 KiB
Python

# ...
#
# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
#
# This work is licensed under the terms of the GNU GPL, version 2 or
# later. See the COPYING file in the top-level directory.
import re
import logging
from avocado.utils import process
from avocado.utils.path import find_command, CmdNotFoundError
def tesseract_available(expected_version):
try:
find_command('tesseract')
except CmdNotFoundError:
return False
res = process.run('tesseract --version')
try:
version = res.stdout_text.split()[1]
except IndexError:
version = res.stderr_text.split()[1]
return int(version.split('.')[0]) >= expected_version
match = re.match(r'tesseract\s(\d)', res)
if match is None:
return False
# now this is guaranteed to be a digit
return int(match.groups()[0]) >= expected_version
def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
console_logger = logging.getLogger('tesseract')
console_logger.debug(image_path)
if tesseract_version == 4:
tesseract_args += ' --oem 1'
proc = process.run("tesseract {} {} stdout".format(tesseract_args,
image_path))
lines = []
for line in proc.stdout_text.split('\n'):
sline = line.strip()
if len(sline):
console_logger.debug(sline)
lines += [sline]
return lines