Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On Jenkins the test_AWS_figure_of_merit_publisher.py unit test ocasionally fails for master branch #452

Open
vitodb opened this issue Feb 28, 2023 · 0 comments

Comments

@vitodb
Copy link
Contributor

vitodb commented Feb 28, 2023

On Jenkins the unit tests test_AWS_figure_of_merit_publisher.py for master branch is occasionally failing.
It looks like is not able to find the fixture file AWS_figure_of_merit_pub.csv that is provided with the test itself:
E FileNotFoundError: [Errno 2] No such file or directory: 'AWS_figure_of_merit_pub.csv'

The full error message we get for this unit test is collapsed here
=================================== FAILURES ===================================
________________________________ test_transform ________________________________
[gw3] linux -- Python 3.9.7 /usr/bin/python3

publisher_instance = <decisionengine_modules.AWS.publishers.AWS_figure_of_merit.AWSFOMPublisher object at 0x7f5ec68f53d0>

    def test_transform(publisher_instance):
        data_block = create_datablock()
        publisher_instance.publish(data_block)
        try:
            # pandas 1.2 uses a new method for floats, use the "legacy" one for now
            # https://pandas.pydata.org/pandas-docs/stable/whatsnew/v1.2.0.html#change-in-default-floating-precision-for-read-csv-and-read-table
>           opd = pd.read_csv(OUTPUT_FILE, float_precision="legacy")

data_block = {'AWS_Figure_Of_Merit':     AccountName AvailabilityZone  ... MaxLimit AWS_Figure_Of_Merit
0  HEPCloud_CMS       us-ea...0770
4  HEPCloud_CMS       us-east-1a  ...       20            0.238776

[5 rows x 10 columns], 'channel_name': 'test'}
publisher_instance = <decisionengine_modules.AWS.publishers.AWS_figure_of_merit.AWSFOMPublisher object at 0x7f5ec68f53d0>

src/decisionengine_modules/AWS/tests/test_AWS_figure_of_merit_publisher.py:65: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/lib64/python3.9/site-packages/pandas/util/_decorators.py:311: in wrapper
    return func(*args, **kwargs)
        allow_args = ['filepath_or_buffer']
        args       = ('AWS_figure_of_merit_pub.csv',)
        arguments  = " except for the argument 'filepath_or_buffer'"
        func       = <function read_csv at 0x7f5e793e31f0>
        kwargs     = {'float_precision': 'legacy'}
        msg        = 'In a future version of pandas all arguments of read_csv{arguments} will be keyword-only.'
        num_allow_args = 1
        stacklevel = 3
/usr/lib64/python3.9/site-packages/pandas/io/parsers/readers.py:680: in read_csv
    return _read(filepath_or_buffer, kwds)
        cache_dates = True
        chunksize  = None
        comment    = None
        compression = 'infer'
        converters = None
        date_parser = None
        dayfirst   = False
        decimal    = '.'
        delim_whitespace = False
        delimiter  = None
        dialect    = None
        doublequote = True
        dtype      = None
        encoding   = None
        encoding_errors = 'strict'
        engine     = None
        error_bad_lines = None
        escapechar = None
        false_values = None
        filepath_or_buffer = 'AWS_figure_of_merit_pub.csv'
        float_precision = 'legacy'
        header     = 'infer'
        index_col  = None
        infer_datetime_format = False
        iterator   = False
        keep_date_col = False
        keep_default_na = True
        kwds       = {'cache_dates': True, 'chunksize': None, 'comment': None, 'compression': 'infer', ...}
        kwds_defaults = {'delimiter': ',', 'engine': 'c', 'engine_specified': False, 'names': None, ...}
        lineterminator = None
        low_memory = True
        mangle_dupe_cols = True
        memory_map = False
        na_filter  = True
        na_values  = None
        names      = <no_default>
        nrows      = None
        on_bad_lines = None
        parse_dates = None
        prefix     = <no_default>
        quotechar  = '"'
        quoting    = 0
        sep        = <no_default>
        skip_blank_lines = True
        skipfooter = 0
        skipinitialspace = False
        skiprows   = None
        squeeze    = None
        storage_options = None
        thousands  = None
        true_values = None
        usecols    = None
        verbose    = False
        warn_bad_lines = None
/usr/lib64/python3.9/site-packages/pandas/io/parsers/readers.py:575: in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
        chunksize  = None
        filepath_or_buffer = 'AWS_figure_of_merit_pub.csv'
        iterator   = False
        kwds       = {'cache_dates': True, 'chunksize': None, 'comment': None, 'compression': 'infer', ...}
        nrows      = None
/usr/lib64/python3.9/site-packages/pandas/io/parsers/readers.py:934: in __init__
    self._engine = self._make_engine(f, self.engine)
        dialect    = None
        engine     = 'c'
        engine_specified = True
        f          = 'AWS_figure_of_merit_pub.csv'
        kwds       = {'cache_dates': True, 'chunksize': None, 'comment': None, 'compression': 'infer', ...}
        options    = {'cache_dates': True, 'comment': None, 'compression': 'infer', 'converters': None, ...}
        self       = <pandas.io.parsers.readers.TextFileReader object at 0x7f5ec68f5d90>
/usr/lib64/python3.9/site-packages/pandas/io/parsers/readers.py:1218: in _make_engine
    self.handles = get_handle(  # type: ignore[call-overload]
        engine     = 'c'
        f          = 'AWS_figure_of_merit_pub.csv'
        is_text    = True
        mapping    = {'c': <class 'pandas.io.parsers.c_parser_wrapper.CParserWrapper'>, 'pyarrow': <class 'pandas.io.parsers.arrow_parser_w...io.parsers.python_parser.PythonParser'>, 'python-fwf': <class 'pandas.io.parsers.python_parser.FixedWidthFieldParser'>}
        mode       = 'r'
        self       = <pandas.io.parsers.readers.TextFileReader object at 0x7f5ec68f5d90>
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

path_or_buf = 'AWS_figure_of_merit_pub.csv', mode = 'r'

    @doc(compression_options=_shared_docs["compression_options"] % "path_or_buf")
    def get_handle(
        path_or_buf: FilePath | BaseBuffer,
        mode: str,
        *,
        encoding: str | None = None,
        compression: CompressionOptions = None,
        memory_map: bool = False,
        is_text: bool = True,
        errors: str | None = None,
        storage_options: StorageOptions = None,
    ) -> IOHandles[str] | IOHandles[bytes]:
        """
        Get file handle for given path/buffer and mode.
    
        Parameters
        ----------
        path_or_buf : str or file handle
            File path or object.
        mode : str
            Mode to open path_or_buf with.
        encoding : str or None
            Encoding to use.
        {compression_options}
    
            .. versionchanged:: 1.0.0
               May now be a dict with key 'method' as compression mode
               and other keys as compression options if compression
               mode is 'zip'.
    
            .. versionchanged:: 1.1.0
               Passing compression options as keys in dict is now
               supported for compression modes 'gzip', 'bz2', 'zstd' and 'zip'.
    
            .. versionchanged:: 1.4.0 Zstandard support.
    
        memory_map : bool, default False
            See parsers._parser_params for more information.
        is_text : bool, default True
            Whether the type of the content passed to the file/buffer is string or
            bytes. This is not the same as `"b" not in mode`. If a string content is
            passed to a binary file/buffer, a wrapper is inserted.
        errors : str, default 'strict'
            Specifies how encoding and decoding errors are to be handled.
            See the errors argument for :func:`open` for a full list
            of options.
        storage_options: StorageOptions = None
            Passed to _get_filepath_or_buffer
    
        .. versionchanged:: 1.2.0
    
        Returns the dataclass IOHandles
        """
        # Windows does not default to utf-8. Set to utf-8 for a consistent behavior
        encoding = encoding or "utf-8"
    
        # read_csv does not know whether the buffer is opened in binary/text mode
        if _is_binary_mode(path_or_buf, mode) and "b" not in mode:
            mode += "b"
    
        # validate encoding and errors
        codecs.lookup(encoding)
        if isinstance(errors, str):
            codecs.lookup_error(errors)
    
        # open URLs
        ioargs = _get_filepath_or_buffer(
            path_or_buf,
            encoding=encoding,
            compression=compression,
            mode=mode,
            storage_options=storage_options,
        )
    
        handle = ioargs.filepath_or_buffer
        handles: list[BaseBuffer]
    
        # memory mapping needs to be the first step
        handle, memory_map, handles = _maybe_memory_map(
            handle,
            memory_map,
            ioargs.encoding,
            ioargs.mode,
            errors,
            ioargs.compression["method"] not in _compression_to_extension,
        )
    
        is_path = isinstance(handle, str)
        compression_args = dict(ioargs.compression)
        compression = compression_args.pop("method")
    
        # Only for write methods
        if "r" not in mode and is_path:
            check_parent_directory(str(handle))
    
        if compression:
            if compression != "zstd":
                # compression libraries do not like an explicit text-mode
                ioargs.mode = ioargs.mode.replace("t", "")
            elif compression == "zstd" and "b" not in ioargs.mode:
                # python-zstandard defaults to text mode, but we always expect
                # compression libraries to use binary mode.
                ioargs.mode += "b"
    
            # GZ Compression
            if compression == "gzip":
                if is_path:
                    assert isinstance(handle, str)
                    # error: Incompatible types in assignment (expression has type
                    # "GzipFile", variable has type "Union[str, BaseBuffer]")
                    handle = gzip.GzipFile(  # type: ignore[assignment]
                        filename=handle,
                        mode=ioargs.mode,
                        **compression_args,
                    )
                else:
                    handle = gzip.GzipFile(
                        # No overload variant of "GzipFile" matches argument types
                        # "Union[str, BaseBuffer]", "str", "Dict[str, Any]"
                        fileobj=handle,  # type: ignore[call-overload]
                        mode=ioargs.mode,
                        **compression_args,
                    )
    
            # BZ Compression
            elif compression == "bz2":
                # No overload variant of "BZ2File" matches argument types
                # "Union[str, BaseBuffer]", "str", "Dict[str, Any]"
                handle = bz2.BZ2File(  # type: ignore[call-overload]
                    handle,
                    mode=ioargs.mode,
                    **compression_args,
                )
    
            # ZIP Compression
            elif compression == "zip":
                # error: Argument 1 to "_BytesZipFile" has incompatible type "Union[str,
                # BaseBuffer]"; expected "Union[Union[str, PathLike[str]],
                # ReadBuffer[bytes], WriteBuffer[bytes]]"
                handle = _BytesZipFile(
                    handle, ioargs.mode, **compression_args  # type: ignore[arg-type]
                )
                if handle.mode == "r":
                    handles.append(handle)
                    zip_names = handle.namelist()
                    if len(zip_names) == 1:
                        handle = handle.open(zip_names.pop())
                    elif len(zip_names) == 0:
                        raise ValueError(f"Zero files found in ZIP file {path_or_buf}")
                    else:
                        raise ValueError(
                            "Multiple files found in ZIP file. "
                            f"Only one file per ZIP: {zip_names}"
                        )
    
            # XZ Compression
            elif compression == "xz":
                handle = get_lzma_file()(handle, ioargs.mode)
    
            # Zstd Compression
            elif compression == "zstd":
                zstd = import_optional_dependency("zstandard")
                if "r" in ioargs.mode:
                    open_args = {"dctx": zstd.ZstdDecompressor(**compression_args)}
                else:
                    open_args = {"cctx": zstd.ZstdCompressor(**compression_args)}
                handle = zstd.open(
                    handle,
                    mode=ioargs.mode,
                    **open_args,
                )
    
            # Unrecognized Compression
            else:
                msg = f"Unrecognized compression type: {compression}"
                raise ValueError(msg)
    
            assert not isinstance(handle, str)
            handles.append(handle)
    
        elif isinstance(handle, str):
            # Check whether the filename is to be opened in binary mode.
            # Binary mode does not support 'encoding' and 'newline'.
            if ioargs.encoding and "b" not in ioargs.mode:
                # Encoding
>               handle = open(
                    handle,
                    ioargs.mode,
                    encoding=ioargs.encoding,
                    errors=errors,
                    newline="",
                )
E               FileNotFoundError: [Errno 2] No such file or directory: 'AWS_figure_of_merit_pub.csv'

compression = None
compression_args = {}
encoding   = 'utf-8'
errors     = 'strict'
handle     = 'AWS_figure_of_merit_pub.csv'
handles    = []
ioargs     = IOArgs(filepath_or_buffer='AWS_figure_of_merit_pub.csv', encoding='utf-8', mode='r', compression={'method': None}, should_close=False)
is_path    = True
is_text    = True
memory_map = False
mode       = 'r'
path_or_buf = 'AWS_figure_of_merit_pub.csv'
storage_options = None

/usr/lib64/python3.9/site-packages/pandas/io/common.py:786: FileNotFoundError
------------------------------ Captured log call -------------------------------
DEBUG    channel:AWS_generic_publisher.py:53 {"channel": "test", "class_name": "AWSFOMPublisher", "event": "in AWSGenericPublisher publish", "level": "debug", "logger": "channel", "timestamp": "2023-02-28 09:08:39"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant