Learn practical skills, build real-world projects, and advance your career
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
/kaggle/input/employee/sample_submission.csv /kaggle/input/employee/Test.csv /kaggle/input/employee/Train.csv
import torch
import torchvision
import torch.nn as nn
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
dataframe_raw = pd.read_csv("../input/employee/Train.csv")
dataframe_test = pd.read_csv("../input/employee/Test.csv")
print(dataframe_raw.head(20))
print(dataframe_test.head(20))
Employee_ID Gender Age Education_Level Relationship_Status Hometown \ 0 EID_23371 F 42.0 4 Married Franklin 1 EID_18000 M 24.0 3 Single Springfield 2 EID_3891 F 58.0 3 Married Clinton 3 EID_17492 F 26.0 3 Single Lebanon 4 EID_22534 F 31.0 1 Married Springfield 5 EID_2278 M 54.0 3 Married Lebanon 6 EID_18588 F 21.0 4 Married Springfield 7 EID_1235 F NaN 3 Married Springfield 8 EID_10197 M 40.0 4 Single Springfield 9 EID_21262 M 45.0 3 Married Lebanon 10 EID_9153 F 65.0 5 Single Lebanon 11 EID_19087 F 19.0 5 Single Springfield 12 EID_4825 M 63.0 3 Married Lebanon 13 EID_17644 F 23.0 4 Single Washington 14 EID_20902 F 19.0 3 Married Washington 15 EID_20121 F NaN 3 Married Springfield 16 EID_5504 M 65.0 4 Single Franklin 17 EID_8049 F 40.0 4 Married Springfield 18 EID_14638 M 33.0 3 Single Springfield 19 EID_12947 M 32.0 3 Single Lebanon Unit Decision_skill_possess Time_of_service \ 0 IT Conceptual 4.0 1 Logistics Analytical 5.0 2 Quality Conceptual 27.0 3 Human Resource Management Behavioral 4.0 4 Logistics Conceptual 5.0 5 Purchasing Conceptual 19.0 6 Purchasing Directive 2.0 7 Sales Directive 34.0 8 Production Analytical 13.0 9 IT Directive 21.0 10 Purchasing Behavioral 31.0 11 Operarions Analytical 0.0 12 Sales Conceptual 27.0 13 Logistics Conceptual 4.0 14 Human Resource Management Directive 0.0 15 Logistics Analytical 0.0 16 IT Behavioral 35.0 17 Accounting and Finance Directive 12.0 18 Accounting and Finance Analytical 9.0 19 IT Conceptual 7.0 Time_since_promotion ... Compensation_and_Benefits Work_Life_balance \ 0 4 ... type2 3.0 1 4 ... type2 4.0 2 3 ... type2 1.0 3 3 ... type2 1.0 4 4 ... type3 3.0 5 1 ... type2 1.0 6 1 ... type2 2.0 7 4 ... type3 2.0 8 1 ... type0 4.0 9 4 ... type3 4.0 10 3 ... type3 3.0 11 0 ... type4 1.0 12 1 ... type2 1.0 13 2 ... type2 3.0 14 0 ... type2 3.0 15 0 ... type2 2.0 16 1 ... type2 2.0 17 1 ... type3 2.0 18 1 ... type3 1.0 19 4 ... type2 1.0 VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 Attrition_rate 0 4 0.7516 1.8688 2.0 4 5 3 0.1841 1 3 -0.9612 -0.4537 2.0 3 5 3 0.0670 2 4 -0.9612 -0.4537 3.0 3 8 3 0.0851 3 3 -1.8176 -0.4537 NaN 3 7 3 0.0668 4 1 0.7516 -0.4537 2.0 2 8 2 0.1827 5 3 -1.8176 1.8688 2.0 2 8 3 0.7613 6 3 -0.9612 0.7075 2.0 3 7 3 0.2819 7 3 -0.1048 -0.4537 2.0 3 9 3 0.1169 8 1 NaN 1.8688 2.0 5 6 3 0.1968 9 3 0.7516 -0.4537 2.0 4 8 3 0.2870 10 3 0.7516 -0.4537 3.0 4 6 3 0.2735 11 3 -1.8176 -0.4537 2.0 3 6 3 0.0800 12 3 -0.1048 -0.4537 2.0 5 6 3 0.2109 13 4 -0.1048 -1.6150 2.0 4 8 3 0.1696 14 3 -0.9612 1.8688 2.0 4 6 3 0.0318 15 3 0.7516 -1.6150 1.0 3 8 3 0.1331 16 3 -0.1048 0.7075 1.0 3 5 3 0.0671 17 3 -0.9612 -0.4537 2.0 5 8 4 0.1468 18 3 -1.8176 0.7075 2.0 3 8 3 0.0560 19 3 NaN 0.7075 1.0 1 5 4 0.6410 [20 rows x 24 columns] Employee_ID Gender Age Education_Level Relationship_Status Hometown \ 0 EID_22713 F 32.0 5 Single Springfield 1 EID_9658 M 65.0 2 Single Lebanon 2 EID_22203 M 52.0 3 Married Springfield 3 EID_7652 M 50.0 5 Single Washington 4 EID_6516 F 44.0 3 Married Franklin 5 EID_20283 F 22.0 4 Married Franklin 6 EID_21014 M 42.0 3 Married Washington 7 EID_7693 F 41.0 2 Married Springfield 8 EID_13232 M 31.0 1 Single Springfield 9 EID_6515 M 48.0 2 Single Springfield 10 EID_13639 F 31.0 4 Single Springfield 11 EID_14669 M 29.0 4 Single Washington 12 EID_16537 F 28.0 4 Single Lebanon 13 EID_5782 F 65.0 3 Married Franklin 14 EID_20157 M 54.0 3 Single Washington 15 EID_1855 F 64.0 5 Single Lebanon 16 EID_20748 M 42.0 4 Married Lebanon 17 EID_23179 F NaN 2 Married Lebanon 18 EID_12838 M 38.0 3 Married Washington 19 EID_21656 F 32.0 1 Married Franklin Unit Decision_skill_possess Time_of_service \ 0 R&D Conceptual 7.0 1 IT Directive 41.0 2 Sales Directive 21.0 3 Marketing Analytical 11.0 4 R&D Conceptual 12.0 5 IT Behavioral 3.0 6 Purchasing Analytical 6.0 7 Sales Conceptual 4.0 8 IT Analytical 7.0 9 R&D Conceptual 16.0 10 Operarions Behavioral 4.0 11 IT Directive 7.0 12 Human Resource Management Directive NaN 13 Logistics Conceptual 27.0 14 IT Analytical 18.0 15 Purchasing Analytical 42.0 16 Sales Behavioral 16.0 17 R&D Behavioral 3.0 18 Logistics Directive 12.0 19 Human Resource Management Behavioral 4.0 Time_since_promotion ... Pay_Scale Compensation_and_Benefits \ 0 4 ... 4.0 type2 1 2 ... 1.0 type2 2 3 ... 8.0 type3 3 4 ... 2.0 type0 4 4 ... 2.0 type2 5 1 ... 6.0 type2 6 4 ... 4.0 type2 7 4 ... 8.0 type2 8 3 ... 8.0 type2 9 1 ... 4.0 type0 10 2 ... 6.0 type3 11 2 ... 9.0 type2 12 4 ... 10.0 type2 13 4 ... 6.0 type3 14 4 ... 7.0 type2 15 3 ... 8.0 type3 16 4 ... 3.0 type2 17 3 ... 4.0 type2 18 1 ... 8.0 type3 19 1 ... 5.0 type2 Work_Life_balance VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 0 1.0 3 -0.9612 -0.4537 2.0 1 8 4 1 1.0 4 -0.9612 0.7075 1.0 2 8 2 2 1.0 4 -0.1048 0.7075 2.0 1 9 3 3 4.0 3 -0.1048 0.7075 2.0 2 8 3 4 4.0 4 1.6081 0.7075 2.0 2 7 4 5 1.0 3 NaN -0.4537 2.0 3 8 3 6 1.0 3 0.7516 1.8688 2.0 3 7 3 7 1.0 3 -0.1048 1.8688 2.0 2 7 5 8 3.0 3 1.6081 0.7075 2.0 2 7 4 9 3.0 3 -0.1048 -0.4537 2.0 4 7 4 10 3.0 3 1.6081 0.7075 1.0 3 8 5 11 4.0 4 0.7516 -0.4537 1.0 3 5 4 12 3.0 3 0.7516 -0.4537 2.0 2 5 2 13 1.0 3 -1.8176 -0.4537 1.0 2 8 3 14 5.0 2 0.7516 -0.4537 2.0 2 7 4 15 1.0 3 NaN -0.4537 3.0 3 9 5 16 4.0 1 -1.8176 -0.4537 NaN 4 8 2 17 4.0 4 -1.8176 0.7075 2.0 4 7 4 18 1.0 3 -1.8176 -0.4537 NaN 2 7 3 19 1.0 3 0.7516 -1.6150 NaN 3 8 2 [20 rows x 23 columns]
dataframe_raw.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7000 entries, 0 to 6999 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Employee_ID 7000 non-null object 1 Gender 7000 non-null object 2 Age 6588 non-null float64 3 Education_Level 7000 non-null int64 4 Relationship_Status 7000 non-null object 5 Hometown 7000 non-null object 6 Unit 7000 non-null object 7 Decision_skill_possess 7000 non-null object 8 Time_of_service 6856 non-null float64 9 Time_since_promotion 7000 non-null int64 10 growth_rate 7000 non-null int64 11 Travel_Rate 7000 non-null int64 12 Post_Level 7000 non-null int64 13 Pay_Scale 6991 non-null float64 14 Compensation_and_Benefits 7000 non-null object 15 Work_Life_balance 6989 non-null float64 16 VAR1 7000 non-null int64 17 VAR2 6423 non-null float64 18 VAR3 7000 non-null float64 19 VAR4 6344 non-null float64 20 VAR5 7000 non-null int64 21 VAR6 7000 non-null int64 22 VAR7 7000 non-null int64 23 Attrition_rate 7000 non-null float64 dtypes: float64(8), int64(9), object(7) memory usage: 1.3+ MB
dataframe_raw.describe()