|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "id": "84b491a8-4ce7-44f3-aed9-feba2ddd1b3b", |
| 7 | + "metadata": {}, |
| 8 | + "outputs": [ |
| 9 | + { |
| 10 | + "ename": "FileNotFoundError", |
| 11 | + "evalue": "[Errno 2] No such file or directory: 'hr_terminations.csv'", |
| 12 | + "output_type": "error", |
| 13 | + "traceback": [ |
| 14 | + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", |
| 15 | + "\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)", |
| 16 | + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpd\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;66;03m# Load the datasets\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m df_hr = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mhr_terminations.csv\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 5\u001b[39m df_app = pd.read_csv(\u001b[33m'\u001b[39m\u001b[33mapp_abc_users.csv\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 7\u001b[39m \u001b[38;5;66;03m# Convert date columns to actual datetime objects immediately\u001b[39;00m\n", |
| 17 | + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.1/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m 1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m 1014\u001b[39m dialect,\n\u001b[32m 1015\u001b[39m delimiter,\n\u001b[32m (...)\u001b[39m\u001b[32m 1022\u001b[39m dtype_backend=dtype_backend,\n\u001b[32m 1023\u001b[39m )\n\u001b[32m 1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", |
| 18 | + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.1/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m 617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m 623\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", |
| 19 | + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.1/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m 1617\u001b[39m \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", |
| 20 | + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.1/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m 1878\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m 1879\u001b[39m mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1881\u001b[39m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1882\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1883\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1884\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1885\u001b[39m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1886\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1887\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1888\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n", |
| 21 | + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.1/libexec/lib/python3.14/site-packages/pandas/io/common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 869\u001b[39m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m 870\u001b[39m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m 871\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m 872\u001b[39m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 874\u001b[39m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 875\u001b[39m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 876\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 877\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 878\u001b[39m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 879\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 880\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 881\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m 882\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n", |
| 22 | + "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: 'hr_terminations.csv'" |
| 23 | + ] |
| 24 | + } |
| 25 | + ], |
| 26 | + "source": [ |
| 27 | + "import pandas as pd\n", |
| 28 | + "\n", |
| 29 | + "# Load the datasets\n", |
| 30 | + "df_hr = pd.read_csv('hr_terminations.csv')\n", |
| 31 | + "df_app = pd.read_csv('app_abc_users.csv')\n", |
| 32 | + "\n", |
| 33 | + "# Convert date columns to actual datetime objects immediately\n", |
| 34 | + "df_hr['Term_Date'] = pd.to_datetime(df_hr['Term_Date'])\n", |
| 35 | + "df_app['Last_Login'] = pd.to_datetime(df_app['Last_Login'])\n", |
| 36 | + "\n", |
| 37 | + "print(f\"HR Records: {len(df_hr)}\")\n", |
| 38 | + "print(f\"App Records: {len(df_app)}\")\n", |
| 39 | + "df_hr.head()" |
| 40 | + ] |
| 41 | + }, |
| 42 | + { |
| 43 | + "cell_type": "code", |
| 44 | + "execution_count": null, |
| 45 | + "id": "f67f851a-ee33-405a-abf4-9393f1837047", |
| 46 | + "metadata": {}, |
| 47 | + "outputs": [], |
| 48 | + "source": [ |
| 49 | + "# Strip whitespace from IDs and Names to prevent 'false negatives'\n", |
| 50 | + "df_hr['Employee_ID'] = df_hr['Employee_ID'].str.strip()\n", |
| 51 | + "df_app['User_ID'] = df_app['User_ID'].str.strip()\n", |
| 52 | + "\n", |
| 53 | + "# Standardizing names for easier visual review later\n", |
| 54 | + "df_hr['Name'] = df_hr['Name'].str.strip().str.title()\n", |
| 55 | + "df_app['Full_Name'] = df_app['Full_Name'].str.strip().str.title()\n", |
| 56 | + "\n", |
| 57 | + "print(\"Data cleaning complete.\")" |
| 58 | + ] |
| 59 | + }, |
| 60 | + { |
| 61 | + "cell_type": "code", |
| 62 | + "execution_count": null, |
| 63 | + "id": "6a32518c-fe0a-4fb9-89fe-7796d5267957", |
| 64 | + "metadata": {}, |
| 65 | + "outputs": [], |
| 66 | + "source": [ |
| 67 | + "# We join on the ID. \n", |
| 68 | + "# We use 'left' because we only care about people on the termination list.\n", |
| 69 | + "audit_merge = pd.merge(\n", |
| 70 | + " df_hr, \n", |
| 71 | + " df_app, \n", |
| 72 | + " left_on='Employee_ID', \n", |
| 73 | + " right_on='User_ID', \n", |
| 74 | + " how='left'\n", |
| 75 | + ")\n", |
| 76 | + "\n", |
| 77 | + "# Display the merged table\n", |
| 78 | + "audit_merge" |
| 79 | + ] |
| 80 | + }, |
| 81 | + { |
| 82 | + "cell_type": "code", |
| 83 | + "execution_count": null, |
| 84 | + "id": "b46cc115-8fee-476c-9f8c-6980d49a000f", |
| 85 | + "metadata": {}, |
| 86 | + "outputs": [], |
| 87 | + "source": [ |
| 88 | + "# 1. Identify Terminated but still 'Active' in Application\n", |
| 89 | + "active_leavers = audit_merge[audit_merge['Account_Status'] == 'Active'].copy()\n", |
| 90 | + "\n", |
| 91 | + "# 2. Identify Logins occurring AFTER termination date\n", |
| 92 | + "# This is a critical security finding indicating potential account misuse\n", |
| 93 | + "post_term_logins = audit_merge[audit_merge['Last_Login'] > audit_merge['Term_Date']].copy()\n", |
| 94 | + "\n", |
| 95 | + "print(f\"Finding 1: {len(active_leavers)} users still marked as 'Active'\")\n", |
| 96 | + "print(f\"Finding 2: {len(post_term_logins)} users logged in after termination\")" |
| 97 | + ] |
| 98 | + }, |
| 99 | + { |
| 100 | + "cell_type": "code", |
| 101 | + "execution_count": null, |
| 102 | + "id": "9644d4b3-9682-4ebe-acfc-1474218c58b0", |
| 103 | + "metadata": {}, |
| 104 | + "outputs": [], |
| 105 | + "source": [ |
| 106 | + "# Create a summary report\n", |
| 107 | + "with pd.ExcelWriter('Termination_Audit_Report.xlsx') as writer:\n", |
| 108 | + " active_leavers.to_excel(writer, sheet_name='Active_Leavers', index=False)\n", |
| 109 | + " post_term_logins.to_excel(writer, sheet_name='Post_Term_Logins', index=False)\n", |
| 110 | + " audit_merge.to_excel(writer, sheet_name='Full_Traceability_Matrix', index=False)\n", |
| 111 | + "\n", |
| 112 | + "print(\"Audit Report Exported: Termination_Audit_Report.xlsx\")" |
| 113 | + ] |
| 114 | + } |
| 115 | + ], |
| 116 | + "metadata": { |
| 117 | + "kernelspec": { |
| 118 | + "display_name": "Python 3 (ipykernel)", |
| 119 | + "language": "python", |
| 120 | + "name": "python3" |
| 121 | + }, |
| 122 | + "language_info": { |
| 123 | + "codemirror_mode": { |
| 124 | + "name": "ipython", |
| 125 | + "version": 3 |
| 126 | + }, |
| 127 | + "file_extension": ".py", |
| 128 | + "mimetype": "text/x-python", |
| 129 | + "name": "python", |
| 130 | + "nbconvert_exporter": "python", |
| 131 | + "pygments_lexer": "ipython3", |
| 132 | + "version": "3.14.2" |
| 133 | + } |
| 134 | + }, |
| 135 | + "nbformat": 4, |
| 136 | + "nbformat_minor": 5 |
| 137 | +} |
0 commit comments