Learn practical skills, build real-world projects, and advance your career
Updated 4 years ago
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
/kaggle/input/employee/sample_submission.csv
/kaggle/input/employee/Test.csv
/kaggle/input/employee/Train.csv
import torch
import torchvision
import torch.nn as nn
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
dataframe_raw = pd.read_csv("../input/employee/Train.csv")
dataframe_test = pd.read_csv("../input/employee/Test.csv")
print(dataframe_raw.head(20))
print(dataframe_test.head(20))
Employee_ID Gender Age Education_Level Relationship_Status Hometown \
0 EID_23371 F 42.0 4 Married Franklin
1 EID_18000 M 24.0 3 Single Springfield
2 EID_3891 F 58.0 3 Married Clinton
3 EID_17492 F 26.0 3 Single Lebanon
4 EID_22534 F 31.0 1 Married Springfield
5 EID_2278 M 54.0 3 Married Lebanon
6 EID_18588 F 21.0 4 Married Springfield
7 EID_1235 F NaN 3 Married Springfield
8 EID_10197 M 40.0 4 Single Springfield
9 EID_21262 M 45.0 3 Married Lebanon
10 EID_9153 F 65.0 5 Single Lebanon
11 EID_19087 F 19.0 5 Single Springfield
12 EID_4825 M 63.0 3 Married Lebanon
13 EID_17644 F 23.0 4 Single Washington
14 EID_20902 F 19.0 3 Married Washington
15 EID_20121 F NaN 3 Married Springfield
16 EID_5504 M 65.0 4 Single Franklin
17 EID_8049 F 40.0 4 Married Springfield
18 EID_14638 M 33.0 3 Single Springfield
19 EID_12947 M 32.0 3 Single Lebanon
Unit Decision_skill_possess Time_of_service \
0 IT Conceptual 4.0
1 Logistics Analytical 5.0
2 Quality Conceptual 27.0
3 Human Resource Management Behavioral 4.0
4 Logistics Conceptual 5.0
5 Purchasing Conceptual 19.0
6 Purchasing Directive 2.0
7 Sales Directive 34.0
8 Production Analytical 13.0
9 IT Directive 21.0
10 Purchasing Behavioral 31.0
11 Operarions Analytical 0.0
12 Sales Conceptual 27.0
13 Logistics Conceptual 4.0
14 Human Resource Management Directive 0.0
15 Logistics Analytical 0.0
16 IT Behavioral 35.0
17 Accounting and Finance Directive 12.0
18 Accounting and Finance Analytical 9.0
19 IT Conceptual 7.0
Time_since_promotion ... Compensation_and_Benefits Work_Life_balance \
0 4 ... type2 3.0
1 4 ... type2 4.0
2 3 ... type2 1.0
3 3 ... type2 1.0
4 4 ... type3 3.0
5 1 ... type2 1.0
6 1 ... type2 2.0
7 4 ... type3 2.0
8 1 ... type0 4.0
9 4 ... type3 4.0
10 3 ... type3 3.0
11 0 ... type4 1.0
12 1 ... type2 1.0
13 2 ... type2 3.0
14 0 ... type2 3.0
15 0 ... type2 2.0
16 1 ... type2 2.0
17 1 ... type3 2.0
18 1 ... type3 1.0
19 4 ... type2 1.0
VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 Attrition_rate
0 4 0.7516 1.8688 2.0 4 5 3 0.1841
1 3 -0.9612 -0.4537 2.0 3 5 3 0.0670
2 4 -0.9612 -0.4537 3.0 3 8 3 0.0851
3 3 -1.8176 -0.4537 NaN 3 7 3 0.0668
4 1 0.7516 -0.4537 2.0 2 8 2 0.1827
5 3 -1.8176 1.8688 2.0 2 8 3 0.7613
6 3 -0.9612 0.7075 2.0 3 7 3 0.2819
7 3 -0.1048 -0.4537 2.0 3 9 3 0.1169
8 1 NaN 1.8688 2.0 5 6 3 0.1968
9 3 0.7516 -0.4537 2.0 4 8 3 0.2870
10 3 0.7516 -0.4537 3.0 4 6 3 0.2735
11 3 -1.8176 -0.4537 2.0 3 6 3 0.0800
12 3 -0.1048 -0.4537 2.0 5 6 3 0.2109
13 4 -0.1048 -1.6150 2.0 4 8 3 0.1696
14 3 -0.9612 1.8688 2.0 4 6 3 0.0318
15 3 0.7516 -1.6150 1.0 3 8 3 0.1331
16 3 -0.1048 0.7075 1.0 3 5 3 0.0671
17 3 -0.9612 -0.4537 2.0 5 8 4 0.1468
18 3 -1.8176 0.7075 2.0 3 8 3 0.0560
19 3 NaN 0.7075 1.0 1 5 4 0.6410
[20 rows x 24 columns]
Employee_ID Gender Age Education_Level Relationship_Status Hometown \
0 EID_22713 F 32.0 5 Single Springfield
1 EID_9658 M 65.0 2 Single Lebanon
2 EID_22203 M 52.0 3 Married Springfield
3 EID_7652 M 50.0 5 Single Washington
4 EID_6516 F 44.0 3 Married Franklin
5 EID_20283 F 22.0 4 Married Franklin
6 EID_21014 M 42.0 3 Married Washington
7 EID_7693 F 41.0 2 Married Springfield
8 EID_13232 M 31.0 1 Single Springfield
9 EID_6515 M 48.0 2 Single Springfield
10 EID_13639 F 31.0 4 Single Springfield
11 EID_14669 M 29.0 4 Single Washington
12 EID_16537 F 28.0 4 Single Lebanon
13 EID_5782 F 65.0 3 Married Franklin
14 EID_20157 M 54.0 3 Single Washington
15 EID_1855 F 64.0 5 Single Lebanon
16 EID_20748 M 42.0 4 Married Lebanon
17 EID_23179 F NaN 2 Married Lebanon
18 EID_12838 M 38.0 3 Married Washington
19 EID_21656 F 32.0 1 Married Franklin
Unit Decision_skill_possess Time_of_service \
0 R&D Conceptual 7.0
1 IT Directive 41.0
2 Sales Directive 21.0
3 Marketing Analytical 11.0
4 R&D Conceptual 12.0
5 IT Behavioral 3.0
6 Purchasing Analytical 6.0
7 Sales Conceptual 4.0
8 IT Analytical 7.0
9 R&D Conceptual 16.0
10 Operarions Behavioral 4.0
11 IT Directive 7.0
12 Human Resource Management Directive NaN
13 Logistics Conceptual 27.0
14 IT Analytical 18.0
15 Purchasing Analytical 42.0
16 Sales Behavioral 16.0
17 R&D Behavioral 3.0
18 Logistics Directive 12.0
19 Human Resource Management Behavioral 4.0
Time_since_promotion ... Pay_Scale Compensation_and_Benefits \
0 4 ... 4.0 type2
1 2 ... 1.0 type2
2 3 ... 8.0 type3
3 4 ... 2.0 type0
4 4 ... 2.0 type2
5 1 ... 6.0 type2
6 4 ... 4.0 type2
7 4 ... 8.0 type2
8 3 ... 8.0 type2
9 1 ... 4.0 type0
10 2 ... 6.0 type3
11 2 ... 9.0 type2
12 4 ... 10.0 type2
13 4 ... 6.0 type3
14 4 ... 7.0 type2
15 3 ... 8.0 type3
16 4 ... 3.0 type2
17 3 ... 4.0 type2
18 1 ... 8.0 type3
19 1 ... 5.0 type2
Work_Life_balance VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7
0 1.0 3 -0.9612 -0.4537 2.0 1 8 4
1 1.0 4 -0.9612 0.7075 1.0 2 8 2
2 1.0 4 -0.1048 0.7075 2.0 1 9 3
3 4.0 3 -0.1048 0.7075 2.0 2 8 3
4 4.0 4 1.6081 0.7075 2.0 2 7 4
5 1.0 3 NaN -0.4537 2.0 3 8 3
6 1.0 3 0.7516 1.8688 2.0 3 7 3
7 1.0 3 -0.1048 1.8688 2.0 2 7 5
8 3.0 3 1.6081 0.7075 2.0 2 7 4
9 3.0 3 -0.1048 -0.4537 2.0 4 7 4
10 3.0 3 1.6081 0.7075 1.0 3 8 5
11 4.0 4 0.7516 -0.4537 1.0 3 5 4
12 3.0 3 0.7516 -0.4537 2.0 2 5 2
13 1.0 3 -1.8176 -0.4537 1.0 2 8 3
14 5.0 2 0.7516 -0.4537 2.0 2 7 4
15 1.0 3 NaN -0.4537 3.0 3 9 5
16 4.0 1 -1.8176 -0.4537 NaN 4 8 2
17 4.0 4 -1.8176 0.7075 2.0 4 7 4
18 1.0 3 -1.8176 -0.4537 NaN 2 7 3
19 1.0 3 0.7516 -1.6150 NaN 3 8 2
[20 rows x 23 columns]
dataframe_raw.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7000 entries, 0 to 6999
Data columns (total 24 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Employee_ID 7000 non-null object
1 Gender 7000 non-null object
2 Age 6588 non-null float64
3 Education_Level 7000 non-null int64
4 Relationship_Status 7000 non-null object
5 Hometown 7000 non-null object
6 Unit 7000 non-null object
7 Decision_skill_possess 7000 non-null object
8 Time_of_service 6856 non-null float64
9 Time_since_promotion 7000 non-null int64
10 growth_rate 7000 non-null int64
11 Travel_Rate 7000 non-null int64
12 Post_Level 7000 non-null int64
13 Pay_Scale 6991 non-null float64
14 Compensation_and_Benefits 7000 non-null object
15 Work_Life_balance 6989 non-null float64
16 VAR1 7000 non-null int64
17 VAR2 6423 non-null float64
18 VAR3 7000 non-null float64
19 VAR4 6344 non-null float64
20 VAR5 7000 non-null int64
21 VAR6 7000 non-null int64
22 VAR7 7000 non-null int64
23 Attrition_rate 7000 non-null float64
dtypes: float64(8), int64(9), object(7)
memory usage: 1.3+ MB
dataframe_raw.describe()