Source code for mvn.aux_pandas.aux_pandas

#!/usr/bin/env python3

import numpy as np
import pandas as pd
from contextlib import ExitStack
from IPython.core.display import display

from sklearn.datasets import load_wine


wine_ds = load_wine()
wine_df = pd.DataFrame(
    data=np.c_[wine_ds["data"], wine_ds["target"]],
    columns=wine_ds["feature_names"] + ["target"],
)


[docs]def fix_column_names(df0, lowercase=False): df0.columns = df0.columns.str.strip() df0.columns = df0.columns.map(lambda x: x.replace(" ", "_")) df0.columns = df0.columns.map(lambda x: x.replace("-", "_")) df0.columns = df0.columns.map(lambda x: x.replace(".", "_")) if lowercase: df0.columns = df0.columns.map(str.lower) return df0
def _context_pandas( max_columns=222, max_colwidth=44, width=2222, max_rows=44, min_rows=33, ): """Apply custom context to dataframe representation (ExitStack).""" return [ pd.option_context("display.max_columns", max_columns), pd.option_context("display.max_colwidth", max_colwidth), pd.option_context("display.width", width), pd.option_context("display.max_rows", max_rows), pd.option_context("display.min_rows", min_rows), ]
[docs]def disp_df(df0, **opt): """Display DF using custom formatting context. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from mvn import disp_df >>> from mvn.aux_pandas import wine_df >>> disp_df(df0) """ with ExitStack() as stack: _ = [stack.enter_context(cont) for cont in _context_pandas(**opt)] display(df0)
[docs]def repr_df(df0, **opt): """Get DF repr using custom formatting context. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from mvn import disp_df >>> from mvn.aux_pandas import wine_df >>> print(repr_df(df0)) """ with ExitStack() as stack: _ = [stack.enter_context(cont) for cont in _context_pandas(**opt)] return str(df0)
[docs]def split_dataframe(dframe, max_rows): """Split pandas dataframe into chunks with max_rows. Examples -------- >>> import pathlib.Path >>> from mvn.aux_pandas import split_dataframe >>> df0 = pd.DataFrame({'A': range(1, 21), 'B': range(21, 41)}) >>> max_rows = 5e0 # 25e4 >>> chunks_dict = split_dataframe(df0, max_rows) >>> >>> dir0 = "../../data/i0000-data-chunks/" >>> dir0 = pathlib.Path(dir0) >>> dir0.mkdir(mode=0o700, parents=True, exist_ok=True) >>> >>> for key, chunk in chunks_dict.items(): >>> print(f"{key}: {chunk.shape}") >>> # print(chunk) >>> chunk.to_pickle((dir0/key).with_suffix(".pkl")) >>> print("") """ max_rows = int(max_rows) chunks = len(dframe) // max_rows + (1 if len(dframe) % max_rows else 0) return { f"chunk_{i+1:04d}": dframe[i * max_rows : (i + 1) * max_rows] # noqa: E203 for i in range(chunks) }