diff --git a/changelogs/fragments/78214-wait-for-compare-bytes.yml b/changelogs/fragments/78214-wait-for-compare-bytes.yml new file mode 100644 index 00000000000..252d7ca4a99 --- /dev/null +++ b/changelogs/fragments/78214-wait-for-compare-bytes.yml @@ -0,0 +1,3 @@ +bugfixes: +- wait_for - Read file and perform comparisons using bytes to avoid decode errors + (https://github.com/ansible/ansible/issues/78214) diff --git a/lib/ansible/modules/wait_for.py b/lib/ansible/modules/wait_for.py index 2244f357bc1..ada2e80b7bc 100644 --- a/lib/ansible/modules/wait_for.py +++ b/lib/ansible/modules/wait_for.py @@ -224,9 +224,11 @@ match_groupdict: ''' import binascii +import contextlib import datetime import errno import math +import mmap import os import re import select @@ -236,7 +238,7 @@ import traceback from ansible.module_utils.basic import AnsibleModule, missing_required_lib from ansible.module_utils.common.sys_info import get_platform_subclass -from ansible.module_utils._text import to_native +from ansible.module_utils._text import to_bytes HAS_PSUTIL = False @@ -496,14 +498,22 @@ def main(): delay = module.params['delay'] port = module.params['port'] state = module.params['state'] + path = module.params['path'] + b_path = to_bytes(path, errors='surrogate_or_strict', nonstring='passthru') + search_regex = module.params['search_regex'] + b_search_regex = to_bytes(search_regex, errors='surrogate_or_strict', nonstring='passthru') + msg = module.params['msg'] if search_regex is not None: - compiled_search_re = re.compile(search_regex, re.MULTILINE) + try: + b_compiled_search_re = re.compile(b_search_regex, re.MULTILINE) + except re.error as e: + module.fail_json(msg="Invalid regular expression: %s" % e) else: - compiled_search_re = None + b_compiled_search_re = None match_groupdict = {} match_groups = () @@ -536,7 +546,7 @@ def main(): while datetime.datetime.utcnow() < end: if path: try: - if not os.access(path, os.F_OK): + if not os.access(b_path, os.F_OK): break except IOError: break @@ -562,7 +572,7 @@ def main(): while datetime.datetime.utcnow() < end: if path: try: - os.stat(path) + os.stat(b_path) except OSError as e: # If anything except file not present, throw an error if e.errno != 2: @@ -571,22 +581,20 @@ def main(): # file doesn't exist yet, so continue else: # File exists. Are there additional things to check? - if not compiled_search_re: + if not b_compiled_search_re: # nope, succeed! break try: - f = open(path) - try: - search = re.search(compiled_search_re, f.read()) - if search: - if search.groupdict(): - match_groupdict = search.groupdict() - if search.groups(): - match_groups = search.groups() - - break - finally: - f.close() + with open(b_path, 'rb') as f: + with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as mm: + search = b_compiled_search_re.search(mm) + if search: + if search.groupdict(): + match_groupdict = search.groupdict() + if search.groups(): + match_groups = search.groups() + + break except IOError: pass elif port: @@ -598,8 +606,8 @@ def main(): pass else: # Connected -- are there additional conditions? - if compiled_search_re: - data = '' + if b_compiled_search_re: + b_data = b'' matched = False while datetime.datetime.utcnow() < end: max_timeout = math.ceil(_timedelta_total_seconds(end - datetime.datetime.utcnow())) @@ -612,8 +620,8 @@ def main(): if not response: # Server shutdown break - data += to_native(response, errors='surrogate_or_strict') - if re.search(compiled_search_re, data): + b_data += response + if b_compiled_search_re.search(b_data): matched = True break diff --git a/test/integration/targets/wait_for/files/write_utf16.py b/test/integration/targets/wait_for/files/write_utf16.py new file mode 100644 index 00000000000..6079ed33b02 --- /dev/null +++ b/test/integration/targets/wait_for/files/write_utf16.py @@ -0,0 +1,20 @@ +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import sys + +# utf16 encoded bytes +# to ensure wait_for doesn't have any encoding errors +data = ( + b'\xff\xfep\x00r\x00e\x00m\x00i\x00\xe8\x00r\x00e\x00 \x00i\x00s\x00 ' + b'\x00f\x00i\x00r\x00s\x00t\x00\n\x00p\x00r\x00e\x00m\x00i\x00e\x00' + b'\x00\x03r\x00e\x00 \x00i\x00s\x00 \x00s\x00l\x00i\x00g\x00h\x00t\x00' + b'l\x00y\x00 \x00d\x00i\x00f\x00f\x00e\x00r\x00e\x00n\x00t\x00\n\x00\x1a' + b'\x048\x04@\x048\x04;\x04;\x048\x04F\x040\x04 \x00i\x00s\x00 \x00C\x00y' + b'\x00r\x00i\x00l\x00l\x00i\x00c\x00\n\x00\x01\xd8\x00\xdc \x00a\x00m' + b'\x00 \x00D\x00e\x00s\x00e\x00r\x00e\x00t\x00\n\x00\n' + b'completed\n' +) + +with open(sys.argv[1], 'wb') as f: + f.write(data) diff --git a/test/integration/targets/wait_for/tasks/main.yml b/test/integration/targets/wait_for/tasks/main.yml index c524f990bce..f71ddbda6b0 100644 --- a/test/integration/targets/wait_for/tasks/main.yml +++ b/test/integration/targets/wait_for/tasks/main.yml @@ -104,6 +104,16 @@ - waitfor['match_groupdict']['foo'] == 'data' - waitfor['match_groups'] == ['data', '123'] +- name: write non-ascii file + script: write_utf16.py "{{remote_tmp_dir}}/utf16.txt" + args: + executable: '{{ ansible_facts.python.executable }}' + +- name: test non-ascii file + wait_for: + path: "{{remote_tmp_dir}}/utf16.txt" + search_regex: completed + - name: test wait for port timeout wait_for: port: 12121