"""Export dataframe to ASCII format while preserving attrs"""
import pandas as pd
from typing import Callable, Hashable, Sequence, List, Tuple, cast, Union, Optional
from pandas.io.common import get_handle
from pandas._typing import (
CompressionOptions,
BaseBuffer,
FilePath,
IndexLabel,
StorageOptions,
OpenFileErrors,
)
from io import IOBase
from os import PathLike
from .header import HeaderInfo
[docs]
def to_csv(
self: pd.DataFrame,
filepath_or_buffer: Union[FilePath, BaseBuffer],
*,
sep: str = ",",
commentchar: str = "#",
na_rep: str = "",
float_format: Union[str, Callable, None] = None,
columns: Union[Sequence[Hashable], None] = None,
header: Union[bool, List[str]] = True,
index: bool = True,
index_label: Optional[IndexLabel] = None,
mode: str = "w",
encoding: Optional[str] = None,
compression: CompressionOptions = "infer",
quoting: Optional[int] = None,
quotechar: str = '"',
lineterminator: Optional[str] = None,
chunksize: Optional[int] = None,
date_format: Optional[str] = None,
doublequote: bool = True,
escapechar: Optional[str] = None,
decimal: str = ".",
errors: OpenFileErrors = "strict",
storage_options: Optional[StorageOptions] = None,
) -> Optional[str]:
r"""
Write object to a comma-separated values (csv) file while preserving attrs
Fallsback to `pd.DataFrame.to_csv` if no attrs content
Parameters
----------
path_or_buf : str, path object, file-like object, or None, default None
String, path object (implementing os.PathLike[str]), or file-like
object implementing a write() function. If None, the result is
returned as a string. If a non-binary file object is passed, it should
be opened with `newline=''`, disabling universal newlines. If a binary
file object is passed, `mode` might need to contain a `'b'`.
sep : str, default ','
String of length 1. Field delimiter for the output file.
commentchar : str, default '#'
Character starting a comment line for the output file.
na_rep : str, default ''
Missing data representation.
float_format : str, Callable, default None
Format string for floating point numbers. If a Callable is given, it takes
precedence over other numeric formatting parameters, like decimal.
columns : sequence, optional
Columns to write.
header : bool or list of str, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names.
index : bool, default True
Write row names (index).
index_label : str or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the object uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R.
mode : {{'w', 'x', 'a'}}, default 'w'
Forwarded to either `open(mode=)` or `fsspec.open(mode=)` to control
the file opening. Typical values include:
- 'w', truncate the file first.
- 'x', exclusive creation, failing if the file already exists.
- 'a', append to the end of file if it exists.
encoding : str, optional
A string representing the encoding to use in the output file,
defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
is a non-binary file object.
compression : str or dict, default 'infer'
For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' is
path-like, then detect compression from the following extensions: '.gz',
'.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2'
(otherwise no compression).
Set to ``None`` for no compression.
Can also be a dict with key ``'method'`` set
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
other key-value pairs are forwarded to
``zipfile.ZipFile``, ``gzip.GzipFile``,
``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or
``tarfile.TarFile``, respectively.
As an example, the following could be passed for faster compression and to create
a reproducible gzip archive:
``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
May be a dict with key 'method' as compression mode
and other entries as additional compression options if
compression mode is 'zip'.
Passing compression options as keys in dict is
supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.
quoting : optional constant from csv module
Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric.
quotechar : str, default '\"'
String of length 1. Character used to quote fields.
lineterminator : str, optional
The newline character or character sequence to use in the output
file. Defaults to `os.linesep`, which depends on the OS in which
this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
chunksize : int or None
Rows to write at a time.
date_format : str, default None
Format string for datetime objects.
doublequote : bool, default True
Control quoting of `quotechar` inside a field.
escapechar : str, default None
String of length 1. Character used to escape `sep` and `quotechar`
when appropriate.
decimal : str, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data.
errors : str, default 'strict'
Specifies how encoding and decoding errors are to be handled.
See the errors argument for :func:`open` for a full list
of options.
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to ``urllib.request.Request`` as header options. For other
URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
details, and for more examples on storage options refer `here
<https://pandas.pydata.org/docs/user_guide/io.html?
highlight=storage_options#reading-writing-remote-files>`_.
Returns
-------
None or str
If path_or_buf is None, returns the resulting csv format as a
string. Otherwise returns None.
"""
kwargs = {
"sep": sep,
"na_rep": na_rep,
"float_format": float_format,
"columns": columns,
"header": header,
"index": index,
"index_label": index_label,
"mode": mode,
"encoding": encoding,
"compression": compression,
"quoting": quoting,
"quotechar": quotechar,
"lineterminator": lineterminator,
"chunksize": chunksize,
"date_format": date_format,
"doublequote": doublequote,
"escapechar": escapechar,
"decimal": decimal,
"errors": errors,
"storage_options": storage_options,
}
if not self.attrs:
return pd.DataFrame.to_csv(self, filepath_or_buffer, **kwargs) # pyright: ignore[reportCallIssue, reportArgumentType]
with get_handle(
filepath_or_buffer,
mode,
encoding=encoding,
errors=errors,
compression=compression,
storage_options=storage_options,
) as handles:
# write header here
if header:
handles.handle.write(
ascii_generate_header(
self,
comments=commentchar,
delimiter=sep,
commented_header=(sep != ","),
)
)
kwargs["header"] = False
return pd.DataFrame.to_csv(self, handles.handle, **kwargs)
[docs]
def to_ascii(
self: pd.DataFrame,
filepath_or_buffer: Union[FilePath, BaseBuffer],
*,
sep: "str" = " ",
commentchar: str = "#",
**kwargs,
) -> Optional[str]:
r"""
Write object to an ASCII values file while preserving attrs
Equivalent to `to_csv` with default `sep` set to a space.
See also
--------
to_csv: Write object to a CSV file while preserving attrs
"""
return to_csv(self, filepath_or_buffer, sep=sep, commentchar=commentchar, **kwargs)
[docs]
def from_csv(
filepath_or_buffer: Union[str, IOBase, PathLike],
*,
commented_header: bool = False,
**kwargs,
):
r"""
Read a CSV file into a DataFrame while preserving header information
Equivalent to `pd.read_csv` with preserved header information.
Also supports optionally iterating or breaking of the file
into chunks.
Additional help can be found in the online docs for
`IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
Parameters
----------
filepath_or_buffer : str, path object or file-like object
Any valid string path is acceptable.
commented_header: bool, default False
Whether the column definition header line starts with a comment character.
commentchar: str, default '#'
Character to treat as a comment character.
sep : str, default ','
Character or regex pattern to treat as the delimiter. If ``sep=None``, the
C engine cannot automatically detect the separator, but the Python
parsing engine can, meaning the latter will be used and automatically
detect the separator from only the first valid row of the file by
Python's builtin sniffer tool, ``csv.Sniffer``.
In addition, separators longer than 1 character and different from
``'\s+'`` will be interpreted as regular expressions and will also force
the use of the Python parsing engine. Note that regex delimiters are prone
to ignoring quoted data. Regex example: ``'\r\t'``.
Returns
-------
DataFrame: pd.DataFrame
The parsed data as a pd.DataFrame.
header : HeaderInfo
The header information extracted from the file.
See also
--------
pd.read_csv: Read a CSV file into a DataFrame
"""
kwargs.setdefault("delimiter", ",")
kwargs.setdefault("comment", "#")
nlines, hdr, names = ascii_read_header(
filepath_or_buffer,
commented_header=commented_header,
delimiter=kwargs["delimiter"],
commentchar=kwargs["comment"],
)
kwargs.setdefault("names", names)
kwargs.setdefault("skiprows", nlines)
kwargs.setdefault("header", None)
df = pd.read_csv(filepath_or_buffer, **kwargs) # type: ignore / safe
return df, hdr
[docs]
def from_ascii(
filepath_or_buffer: Union[str, IOBase, PathLike],
*,
commented_header: bool = False,
**kwargs,
):
"""Read an ASCII file into a DataFrame.
from_csv with delimiter set to " " by default
Parameters
----------
filepath_or_buffer : str, path object or file-like object
Any valid string path is acceptable. The string could be a URL. Valid
URL schemes include http, ftp, s3, and file. For file URLs, a host is
expected. A local file could be:
``file://localhost/path/to/table.csv``.
commented_header : bool, default False
Whether the header is commented or not.
**kwargs : dict
Additional keyword arguments passed to ``pd.read_csv``.
Returns
-------
DataFrame: pd.DataFrame
The parsed data as a pd.DataFrame.
header : HeaderInfo
The header information extracted from the file.
See also
--------
from_csv: Read a CSV file into a DataFrame
"""
kwargs.setdefault("delimiter", " ")
return from_csv(filepath_or_buffer, commented_header=commented_header, **kwargs)