PK=T8N;H&pipomatic_pipomatic_obfuscate_files.py"Obfuscate files package" __version__ = "0.1" #!/usr/bin/env python # coding: utf-8 # To do: # # Replace each unique value with the same random text # In[1]: from pathlib import Path import numpy as np import pandas as pd from pandas.api.types import ( is_datetime64_any_dtype, is_numeric_dtype, is_integer_dtype, is_object_dtype, is_string_dtype, ) # In[2]: def delete_files(folder): file_generator = folder.glob("**/*") file_list = list(file_generator) for file in file_list: file.unlink() return list(file_list) def move_files(samples_folder, file_to_copy, destination_folder): (destination_folder / file_to_copy).write_bytes( (samples_folder / file_to_copy).read_bytes() ) return file_to_copy def convert_to_numeric_and_date(df, dayfirst=True): for column in df.columns: if is_object_dtype(df[column]) or is_string_dtype(df[column]): try: df[column] = pd.to_numeric(df[column], downcast="integer") except: try: df[column] = df[column].str.replace("$", "") df[column] = df[column].str.replace(",", "") df[column] = pd.to_numeric(df[column]) except: try: df[column] = pd.to_datetime(df[column], dayfirst=dayfirst) except: pass return df def random_dates(start, end, seed=1, replace=True, number_of_rows=100): dates = pd.date_range(start, end).to_series() return dates.sample(number_of_rows, replace=replace, random_state=seed).index def dataframe_obfuscator(df, number_of_rows=100): for column in df.columns: if is_datetime64_any_dtype(df[column]): df[column] = random_dates(min(df[column]), max(df[column]), seed=1) elif is_integer_dtype(df[column]): df[column] = df[column].fillna(0) if min(df[column]) < max(df[column]): df[column] = np.random.randint( min(df[column]), max(df[column]), size=(number_of_rows) ) else: df[column] = min(df[column]) elif is_numeric_dtype(df[column]): df[column] = df[column].fillna(0) df[column] = np.random.uniform( min(df[column]), max(df[column]), size=(number_of_rows) ) else: df[column] = "random text" return df def obfuscate_csv(data_file, dayfirst=True, number_of_rows=100): df = pd.read_csv(data_file, nrows=number_of_rows) df = convert_to_numeric_and_date(df) df = dataframe_obfuscator(df) df.to_csv(data_file, header=True, index=False) return df def obfuscate_excel(data_file, dayfirst=True, number_of_rows=100): df = pd.read_excel(data_file, nrows=number_of_rows) display(df.head()) df = convert_to_numeric_and_date(df) df = dataframe_obfuscator(df) df.to_excel(data_file, header=True, index=False) return df # In[5]: get_ipython().system( 'jupyter nbconvert --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags="[\'build\']" --TemplateExporter.exclude_output=True --to python "obfuscate_files.ipynb"' ) first_line = """'Obfuscate files package' __version__ = '0.1' """ script_file = Path.cwd() / "obfuscate_files.py" script = script_file.read_text() script_file.write_text(first_line + script) username = script_file.parent.parent.name system_name = script_file.parent.name standardised_script_name = f"pipomatic_{username}_{system_name}.py" script_file.replace(script_file.parent / standardised_script_name) standardised_script_name # In[ ]: PK!Hd BUc7pipomatic_pipomatic_obfuscate_files-0.1.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,rzd&Y)r$[)T&UD"PK!HU8.:pipomatic_pipomatic_obfuscate_files-0.1.dist-info/METADATAur0D{}@ & ` P2t,KInwnn(ň7 ¾0-'FG09)lth#Q\ks84l)IA@swT4c q,현{Ȗ;fCWZSo`! 1&gXPK=T8N;H&pipomatic_pipomatic_obfuscate_files.pyPK!Hd BUc7`pipomatic_pipomatic_obfuscate_files-0.1.dist-info/WHEELPK!HU8.: pipomatic_pipomatic_obfuscate_files-0.1.dist-info/METADATAPK!H_z8 pipomatic_pipomatic_obfuscate_files-0.1.dist-info/RECORDPKN