import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


data_info = pd.read_csv('../DATA/lending_club_info.csv',index_col='LoanStatNew')


print(data_info.loc['revol_util']['Description'])

Revolving line utilization rate, or the amount of credit the borrower is using relative to all available revolving credit.


def feat_info(col_name):
    print(data_info.loc[col_name]['Description'])


feat_info('mort_acc')

Number of mortgage accounts.


df = pd.read_csv('../DATA/lending_club_loan_two.csv')


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396030 entries, 0 to 396029
Data columns (total 27 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   loan_amnt             396030 non-null  float64
 1   term                  396030 non-null  object 
 2   int_rate              396030 non-null  float64
 3   installment           396030 non-null  float64
 4   grade                 396030 non-null  object 
 5   sub_grade             396030 non-null  object 
 6   emp_title             373103 non-null  object 
 7   emp_length            377729 non-null  object 
 8   home_ownership        396030 non-null  object 
 9   annual_inc            396030 non-null  float64
 10  verification_status   396030 non-null  object 
 11  issue_d               396030 non-null  object 
 12  loan_status           396030 non-null  object 
 13  purpose               396030 non-null  object 
 14  title                 394275 non-null  object 
 15  dti                   396030 non-null  float64
 16  earliest_cr_line      396030 non-null  object 
 17  open_acc              396030 non-null  float64
 18  pub_rec               396030 non-null  float64
 19  revol_bal             396030 non-null  float64
 20  revol_util            395754 non-null  float64
 21  total_acc             396030 non-null  float64
 22  initial_list_status   396030 non-null  object 
 23  application_type      396030 non-null  object 
 24  mort_acc              358235 non-null  float64
 25  pub_rec_bankruptcies  395495 non-null  float64
 26  address               396030 non-null  object 
dtypes: float64(12), object(15)
memory usage: 81.6+ MB


sns.set_palette('dark')
sns.set_style('whitegrid')


sns.countplot(data=df, x='loan_status')

<AxesSubplot:xlabel='loan_status', ylabel='count'>


plt.figure(figsize=(12,4))
sns.histplot(data=df,x='loan_amnt',bins=40)

<AxesSubplot:xlabel='loan_amnt', ylabel='Count'>


df.corr()


plt.figure(figsize=(12,7))
sns.heatmap(df.corr(), annot=True)

<AxesSubplot:>


feat_info('installment')

The monthly payment owed by the borrower if the loan originates.


feat_info('loan_amnt')

The listed amount of the loan applied for by the borrower. If at some point in time, the credit department reduces the loan amount, then it will be reflected in this value.


plt.figure(figsize=(8,5))
sns.scatterplot(x='installment', y='loan_amnt', data=df)

<AxesSubplot:xlabel='installment', ylabel='loan_amnt'>


sns.boxplot(x='loan_status', y='loan_amnt', data=df)

<AxesSubplot:xlabel='loan_status', ylabel='loan_amnt'>


df.groupby('loan_status')['loan_amnt'].describe().transpose()


sorted(df['grade'].unique())

['A', 'B', 'C', 'D', 'E', 'F', 'G']


sorted(df['sub_grade'].unique())

['A1',
 'A2',
 'A3',
 'A4',
 'A5',
 'B1',
 'B2',
 'B3',
 'B4',
 'B5',
 'C1',
 'C2',
 'C3',
 'C4',
 'C5',
 'D1',
 'D2',
 'D3',
 'D4',
 'D5',
 'E1',
 'E2',
 'E3',
 'E4',
 'E5',
 'F1',
 'F2',
 'F3',
 'F4',
 'F5',
 'G1',
 'G2',
 'G3',
 'G4',
 'G5']


sns.countplot(x='grade', data=df, hue='loan_status')

<AxesSubplot:xlabel='grade', ylabel='count'>


plt.figure(figsize=(12,4))
sub_order = sorted(df['sub_grade'].unique())
sns.countplot(x='sub_grade', data=df, order = sub_order)

<AxesSubplot:xlabel='sub_grade', ylabel='count'>


plt.figure(figsize=(12,4))
sub_order = sorted(df['sub_grade'].unique())
sns.countplot(x='sub_grade', data=df, order = sub_order, hue='loan_status')

<AxesSubplot:xlabel='sub_grade', ylabel='count'>


plt.figure(figsize=(12,4))
f_g=df[(df['grade']=='G')|(df['grade']=='F')]
sub_order = sorted(f_g['sub_grade'].unique())
sns.countplot(x='sub_grade', data=f_g, order = sub_order, hue='loan_status')

<AxesSubplot:xlabel='sub_grade', ylabel='count'>


df['loan_status'].unique()

array(['Fully Paid', 'Charged Off'], dtype=object)


df['loan_repaid']=df['loan_status'].map({'Fully Paid':1,'Charged Off':0})


df[['loan_repaid', 'loan_status']]


plt.figure(figsize=(12,4))
df.corr()['loan_repaid'].drop('loan_repaid').sort_values().plot(kind='bar')

<AxesSubplot:>


df.head()


len(df)

396030


df.isnull().sum()

loan_amnt                   0
term                        0
int_rate                    0
installment                 0
grade                       0
sub_grade                   0
emp_title               22927
emp_length              18301
home_ownership              0
annual_inc                  0
verification_status         0
issue_d                     0
loan_status                 0
purpose                     0
title                    1755
dti                         0
earliest_cr_line            0
open_acc                    0
pub_rec                     0
revol_bal                   0
revol_util                276
total_acc                   0
initial_list_status         0
application_type            0
mort_acc                37795
pub_rec_bankruptcies      535
address                     0
loan_repaid                 0
dtype: int64


df.isnull().sum()/len(df)* 100

loan_amnt               0.000000
term                    0.000000
int_rate                0.000000
installment             0.000000
grade                   0.000000
sub_grade               0.000000
emp_title               5.789208
emp_length              4.621115
home_ownership          0.000000
annual_inc              0.000000
verification_status     0.000000
issue_d                 0.000000
loan_status             0.000000
purpose                 0.000000
title                   0.443148
dti                     0.000000
earliest_cr_line        0.000000
open_acc                0.000000
pub_rec                 0.000000
revol_bal               0.000000
revol_util              0.069692
total_acc               0.000000
initial_list_status     0.000000
application_type        0.000000
mort_acc                9.543469
pub_rec_bankruptcies    0.135091
address                 0.000000
loan_repaid             0.000000
dtype: float64


feat_info('emp_title')
print('\n')
feat_info('emp_length')

The job title supplied by the Borrower when applying for the loan.*


Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 means ten or more years.


df['emp_title'].nunique()

173105


df['emp_title'].value_counts()

Teacher                            4389
Manager                            4250
Registered Nurse                   1856
RN                                 1846
Supervisor                         1830
                                   ... 
Relationships Banker                  1
fremont unified school district       1
Cook lll                              1
Hayward Labrotories                   1
IMT RESIDENTIAL                       1
Name: emp_title, Length: 173105, dtype: int64


df=df.drop('emp_title', axis=1)


sorted(df['emp_length'].dropna().unique())

['1 year',
 '10+ years',
 '2 years',
 '3 years',
 '4 years',
 '5 years',
 '6 years',
 '7 years',
 '8 years',
 '9 years',
 '< 1 year']


order_l=['< 1 year',
'1 year',
 '2 years',
 '3 years',
 '4 years',
 '5 years',
 '6 years',
 '7 years',
 '8 years',
 '9 years',
 '10+ years']


plt.figure(figsize=(10,4))
sns.countplot(data=df, x= 'emp_length', order=order_l)

<AxesSubplot:xlabel='emp_length', ylabel='count'>


plt.figure(figsize=(10,4))
sns.countplot(data=df, x= 'emp_length', order=order_l, hue='loan_status')

<AxesSubplot:xlabel='emp_length', ylabel='count'>


emp_co=df[df['loan_status']=='Charged Off'].groupby('emp_length').count()['loan_status']


emp_fp = df[df['loan_status']=='Fully Paid'].groupby('emp_length').count()['loan_status']


emp_ratio=emp_co/emp_fp
emp_ratio

emp_length
1 year       0.248649
10+ years    0.225770
2 years      0.239560
3 years      0.242593
4 years      0.238213
5 years      0.237911
6 years      0.233341
7 years      0.241887
8 years      0.249625
9 years      0.250735
< 1 year     0.260830
Name: loan_status, dtype: float64


emp_ratio.plot(kind='bar')

<AxesSubplot:xlabel='emp_length'>


df=df.drop('emp_length', axis=1)


df.isnull().sum()

loan_amnt                   0
term                        0
int_rate                    0
installment                 0
grade                       0
sub_grade                   0
home_ownership              0
annual_inc                  0
verification_status         0
issue_d                     0
loan_status                 0
purpose                     0
title                    1755
dti                         0
earliest_cr_line            0
open_acc                    0
pub_rec                     0
revol_bal                   0
revol_util                276
total_acc                   0
initial_list_status         0
application_type            0
mort_acc                37795
pub_rec_bankruptcies      535
address                     0
loan_repaid                 0
dtype: int64


df['purpose'].head()

0              vacation
1    debt_consolidation
2           credit_card
3           credit_card
4           credit_card
Name: purpose, dtype: object


df['title'].head()

0                   Vacation
1         Debt consolidation
2    Credit card refinancing
3    Credit card refinancing
4      Credit Card Refinance
Name: title, dtype: object


df=df.drop('title', axis=1)


feat_info('mort_acc')

Number of mortgage accounts.


df['mort_acc'].value_counts()

0.0     139777
1.0      60416
2.0      49948
3.0      38049
4.0      27887
5.0      18194
6.0      11069
7.0       6052
8.0       3121
9.0       1656
10.0       865
11.0       479
12.0       264
13.0       146
14.0       107
15.0        61
16.0        37
17.0        22
18.0        18
19.0        15
20.0        13
24.0        10
22.0         7
21.0         4
25.0         4
27.0         3
23.0         2
31.0         2
26.0         2
32.0         2
28.0         1
34.0         1
30.0         1
Name: mort_acc, dtype: int64


df.corr()['mort_acc'].sort_values()

int_rate               -0.082583
dti                    -0.025439
revol_util              0.007514
pub_rec                 0.011552
pub_rec_bankruptcies    0.027239
loan_repaid             0.073111
open_acc                0.109205
installment             0.193694
revol_bal               0.194925
loan_amnt               0.222315
annual_inc              0.236320
total_acc               0.381072
mort_acc                1.000000
Name: mort_acc, dtype: float64


df.groupby('total_acc').mean()['mort_acc']

total_acc
2.0      0.000000
3.0      0.052023
4.0      0.066743
5.0      0.103289
6.0      0.151293
           ...   
124.0    1.000000
129.0    1.000000
135.0    3.000000
150.0    2.000000
151.0    0.000000
Name: mort_acc, Length: 118, dtype: float64


total_acc_avg = df.groupby('total_acc').mean()['mort_acc']


total_acc_avg[2.0]

0.0


def fill(total_acc, mort_acc):
    if np.isnan(mort_acc):
        return total_acc_avg[total_acc]
    else:
        return mort_acc


df['mort_acc'] = df.apply(lambda x: fill(x['total_acc'], x['mort_acc']), axis=1)


df.isnull().sum()

loan_amnt                 0
term                      0
int_rate                  0
installment               0
grade                     0
sub_grade                 0
home_ownership            0
annual_inc                0
verification_status       0
issue_d                   0
loan_status               0
purpose                   0
dti                       0
earliest_cr_line          0
open_acc                  0
pub_rec                   0
revol_bal                 0
revol_util              276
total_acc                 0
initial_list_status       0
application_type          0
mort_acc                  0
pub_rec_bankruptcies    535
address                   0
loan_repaid               0
dtype: int64


df=df.dropna()


df.isnull().sum()
# Perfect

loan_amnt               0
term                    0
int_rate                0
installment             0
grade                   0
sub_grade               0
home_ownership          0
annual_inc              0
verification_status     0
issue_d                 0
loan_status             0
purpose                 0
dti                     0
earliest_cr_line        0
open_acc                0
pub_rec                 0
revol_bal               0
revol_util              0
total_acc               0
initial_list_status     0
application_type        0
mort_acc                0
pub_rec_bankruptcies    0
address                 0
loan_repaid             0
dtype: int64


df.select_dtypes(['object']).columns
# We need to go through all of them

Index(['term', 'grade', 'sub_grade', 'home_ownership', 'verification_status',
       'issue_d', 'loan_status', 'purpose', 'earliest_cr_line',
       'initial_list_status', 'application_type', 'address'],
      dtype='object')


df['term'].value_counts()

 36 months    301247
 60 months     93972
Name: term, dtype: int64


#Convert into 36 or 60 integer numeric
df['term']=df['term'].apply(lambda x: int(x[:3]))


#We have subgrade, drop this
df = df.drop('grade',axis=1)


subgrade_dummy=pd.get_dummies(df['sub_grade'], drop_first=True)


df=df.drop('sub_grade', axis=1)


df=pd.concat([df,subgrade_dummy],axis=1)


df.columns

Index(['loan_amnt', 'term', 'int_rate', 'installment', 'home_ownership',
       'annual_inc', 'verification_status', 'issue_d', 'loan_status',
       'purpose', 'dti', 'earliest_cr_line', 'open_acc', 'pub_rec',
       'revol_bal', 'revol_util', 'total_acc', 'initial_list_status',
       'application_type', 'mort_acc', 'pub_rec_bankruptcies', 'address',
       'loan_repaid', 'A2', 'A3', 'A4', 'A5', 'B1', 'B2', 'B3', 'B4', 'B5',
       'C1', 'C2', 'C3', 'C4', 'C5', 'D1', 'D2', 'D3', 'D4', 'D5', 'E1', 'E2',
       'E3', 'E4', 'E5', 'F1', 'F2', 'F3', 'F4', 'F5', 'G1', 'G2', 'G3', 'G4',
       'G5'],
      dtype='object')


df.select_dtypes(['object']).columns

Index(['home_ownership', 'verification_status', 'issue_d', 'loan_status',
       'purpose', 'earliest_cr_line', 'initial_list_status',
       'application_type', 'address'],
      dtype='object')


dummies=pd.get_dummies(df[['verification_status', 'application_type','initial_list_status','purpose' ]],drop_first=True)


df = df.drop(['verification_status', 'application_type','initial_list_status','purpose'],axis=1)


df = pd.concat([df,dummies], axis=1)


df.columns

Index(['loan_amnt', 'term', 'int_rate', 'installment', 'home_ownership',
       'annual_inc', 'issue_d', 'loan_status', 'dti', 'earliest_cr_line',
       'open_acc', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc',
       'mort_acc', 'pub_rec_bankruptcies', 'address', 'loan_repaid', 'A2',
       'A3', 'A4', 'A5', 'B1', 'B2', 'B3', 'B4', 'B5', 'C1', 'C2', 'C3', 'C4',
       'C5', 'D1', 'D2', 'D3', 'D4', 'D5', 'E1', 'E2', 'E3', 'E4', 'E5', 'F1',
       'F2', 'F3', 'F4', 'F5', 'G1', 'G2', 'G3', 'G4', 'G5',
       'verification_status_Source Verified', 'verification_status_Verified',
       'application_type_INDIVIDUAL', 'application_type_JOINT',
       'initial_list_status_w', 'purpose_credit_card',
       'purpose_debt_consolidation', 'purpose_educational',
       'purpose_home_improvement', 'purpose_house', 'purpose_major_purchase',
       'purpose_medical', 'purpose_moving', 'purpose_other',
       'purpose_renewable_energy', 'purpose_small_business',
       'purpose_vacation', 'purpose_wedding'],
      dtype='object')


df['home_ownership'].value_counts()

MORTGAGE    198022
RENT        159395
OWN          37660
OTHER          110
NONE            29
ANY              3
Name: home_ownership, dtype: int64


df['home_ownership']=df['home_ownership'].replace(['NONE', 'ANY'], 'OTHER')


dummies = pd.get_dummies(df['home_ownership'],drop_first=True)
df=df.drop('home_ownership', axis=1)
df=pd.concat([df, dummies], axis=1)


#Feature engineer zip code
df['zip_code']=df['address'].apply(lambda address: address[-5:])


dummies = pd.get_dummies(df['zip_code'],drop_first=True)


df = df.drop(['zip_code','address'], axis=1)
df = pd.concat([df, dummies], axis=1)


#We wouldn't know beforehand whether or not a loan would be issued when using our model
#We wouldn't have an issue_date Drop
df=df.drop('issue_d', axis=1)


#This is a time stamp feature. Feature engineer the year
df['earliest_cr_year']=df['earliest_cr_line'].apply(lambda year: int(year[-4:]))
df=df.drop('earliest_cr_line', axis=1)


df.select_dtypes(['object']).columns

Index(['loan_status'], dtype='object')


df=df.drop('loan_status', axis=1)


from sklearn.model_selection import train_test_split


X = df.drop('loan_repaid',axis=1).values
y = df['loan_repaid'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=101)


from sklearn.preprocessing import MinMaxScaler


scaler = MinMaxScaler()


X_train = scaler.fit_transform(X_train)


X_test = scaler.transform(X_test)


print(X_train.shape)

(316175, 78)


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import EarlyStopping


# CODE HERE
model = Sequential()

# Choose whatever number of layers/neurons you want.
model.add(Dense(78, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(39, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(19, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam')

# Remember to compile()


early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)


model.fit(x=X_train, y=y_train, epochs=25, batch_size=256, validation_data=(X_test, y_test), callbacks=[early_stop])

Epoch 1/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2955 - val_loss: 0.2659
Epoch 2/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2652 - val_loss: 0.2629
Epoch 3/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2629 - val_loss: 0.2623
Epoch 4/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2616 - val_loss: 0.2621
Epoch 5/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2609 - val_loss: 0.2623
Epoch 6/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2604 - val_loss: 0.2619
Epoch 7/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2599 - val_loss: 0.2617
Epoch 8/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2595 - val_loss: 0.2615
Epoch 9/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2595 - val_loss: 0.2620
Epoch 10/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2591 - val_loss: 0.2614
Epoch 11/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2589 - val_loss: 0.2613
Epoch 12/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2584 - val_loss: 0.2616
Epoch 13/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2586 - val_loss: 0.2610
Epoch 14/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2582 - val_loss: 0.2614
Epoch 15/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2582 - val_loss: 0.2617
Epoch 16/25
1236/1236 [==============================] - 3s 2ms/step - loss: 0.2580 - val_loss: 0.2624
Epoch 16: early stopping

<keras.callbacks.History at 0x1501b1885e0>


losses = pd.DataFrame(model.history.history)


losses[['loss','val_loss']].plot()

<AxesSubplot:>


from sklearn.metrics import classification_report,confusion_matrix


predictions = (model.predict(X_test) > 0.5).astype("int32")

2471/2471 [==============================] - 1s 578us/step


print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       1.00      0.43      0.60     15658
           1       0.88      1.00      0.93     63386

    accuracy                           0.89     79044
   macro avg       0.94      0.72      0.77     79044
weighted avg       0.90      0.89      0.87     79044


confusion_matrix(y_test,predictions)

array([[ 6745,  8913],
       [   26, 63360]], dtype=int64)


import random
random.seed(101)
random_ind = random.randint(0,len(df))

new_customer = df.drop('loan_repaid',axis=1).iloc[random_ind]
new_customer

loan_amnt           25000.00
term                   60.00
int_rate               18.24
installment           638.11
annual_inc          61665.00
                      ...   
48052                   0.00
70466                   0.00
86630                   0.00
93700                   0.00
earliest_cr_year     1996.00
Name: 305323, Length: 78, dtype: float64


(model.predict(new_customer.values.reshape(1,78)) > 0.5).astype("int32")

1/1 [==============================] - 0s 10ms/step

array([[1]])


df.iloc[random_ind]['loan_repaid']

1.0

	loan_amnt	int_rate	installment	annual_inc	dti	open_acc	pub_rec	revol_bal	revol_util	total_acc	mort_acc	pub_rec_bankruptcies
loan_amnt	1.000000	0.168921	0.953929	0.336887	0.016636	0.198556	-0.077779	0.328320	0.099911	0.223886	0.222315	-0.106539
int_rate	0.168921	1.000000	0.162758	-0.056771	0.079038	0.011649	0.060986	-0.011280	0.293659	-0.036404	-0.082583	0.057450
installment	0.953929	0.162758	1.000000	0.330381	0.015786	0.188973	-0.067892	0.316455	0.123915	0.202430	0.193694	-0.098628
annual_inc	0.336887	-0.056771	0.330381	1.000000	-0.081685	0.136150	-0.013720	0.299773	0.027871	0.193023	0.236320	-0.050162
dti	0.016636	0.079038	0.015786	-0.081685	1.000000	0.136181	-0.017639	0.063571	0.088375	0.102128	-0.025439	-0.014558
open_acc	0.198556	0.011649	0.188973	0.136150	0.136181	1.000000	-0.018392	0.221192	-0.131420	0.680728	0.109205	-0.027732
pub_rec	-0.077779	0.060986	-0.067892	-0.013720	-0.017639	-0.018392	1.000000	-0.101664	-0.075910	0.019723	0.011552	0.699408
revol_bal	0.328320	-0.011280	0.316455	0.299773	0.063571	0.221192	-0.101664	1.000000	0.226346	0.191616	0.194925	-0.124532
revol_util	0.099911	0.293659	0.123915	0.027871	0.088375	-0.131420	-0.075910	0.226346	1.000000	-0.104273	0.007514	-0.086751
total_acc	0.223886	-0.036404	0.202430	0.193023	0.102128	0.680728	0.019723	0.191616	-0.104273	1.000000	0.381072	0.042035
mort_acc	0.222315	-0.082583	0.193694	0.236320	-0.025439	0.109205	0.011552	0.194925	0.007514	0.381072	1.000000	0.027239
pub_rec_bankruptcies	-0.106539	0.057450	-0.098628	-0.050162	-0.014558	-0.027732	0.699408	-0.124532	-0.086751	0.042035	0.027239	1.000000

loan_status	Charged Off	Fully Paid
count	77673.000000	318357.000000
mean	15126.300967	13866.878771
std	8505.090557	8302.319699
min	1000.000000	500.000000
25%	8525.000000	7500.000000
50%	14000.000000	12000.000000
75%	20000.000000	19225.000000
max	40000.000000	40000.000000

LendingClub Loan Default Prediction, Deep Learning¶

The Data¶

Goal¶

Data Overview¶

Preparation¶

Section 1: Exploratory Data Analysis¶

Section 2: Data PreProcessing¶

Missing Data¶

Categorical Variables and Dummy Variables¶

home_ownership¶

Section 3: Train Test Split and Normalizing¶

Section 4: Creating the Model and Evaluation¶

Thank you¶

	LoanStatNew	Description
0	loan_amnt	The listed amount of the loan applied for by the borrower. If at some point in time, the credit department reduces the loan amount, then it will be reflected in this value.
1	term	The number of payments on the loan. Values are in months and can be either 36 or 60.
2	int_rate	Interest Rate on the loan
3	installment	The monthly payment owed by the borrower if the loan originates.
4	grade	LC assigned loan grade
5	sub_grade	LC assigned loan subgrade
6	emp_title	The job title supplied by the Borrower when applying for the loan.*
7	emp_length	Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 means ten or more years.
8	home_ownership	The home ownership status provided by the borrower during registration or obtained from the credit report. Our values are: RENT, OWN, MORTGAGE, OTHER
9	annual_inc	The self-reported annual income provided by the borrower during registration.
10	verification_status	Indicates if income was verified by LC, not verified, or if the income source was verified
11	issue_d	The month which the loan was funded
12	loan_status	Current status of the loan
13	purpose	A category provided by the borrower for the loan request.
14	title	The loan title provided by the borrower
15	zip_code	The first 3 numbers of the zip code provided by the borrower in the loan application.
16	addr_state	The state provided by the borrower in the loan application
17	dti	A ratio calculated using the borrower’s total monthly debt payments on the total debt obligations, excluding mortgage and the requested LC loan, divided by the borrower’s self-reported monthly income.
18	earliest_cr_line	The month the borrower's earliest reported credit line was opened
19	open_acc	The number of open credit lines in the borrower's credit file.
20	pub_rec	Number of derogatory public records
21	revol_bal	Total credit revolving balance
22	revol_util	Revolving line utilization rate, or the amount of credit the borrower is using relative to all available revolving credit.
23	total_acc	The total number of credit lines currently in the borrower's credit file
24	initial_list_status	The initial listing status of the loan. Possible values are – W, F
25	application_type	Indicates whether the loan is an individual application or a joint application with two co-borrowers
26	mort_acc	Number of mortgage accounts.
27	pub_rec_bankruptcies	Number of public record bankruptcies

	loan_amnt	term	int_rate	installment	grade	sub_grade	emp_title	emp_length	home_ownership	annual_inc	...	revol_bal	revol_util	total_acc	initial_list_status	application_type	mort_acc	address	loan_repaid
0	10000.0	36 months	11.44	329.48	B	B4	Marketing	10+ years	RENT	117000.0	...	36369.0	41.8	25.0	w	INDIVIDUAL	0.0	0174 Michelle Gateway\nMendozaberg, OK 22690	1
1	8000.0	36 months	11.99	265.68	B	B5	Credit analyst	4 years	MORTGAGE	65000.0	...	20131.0	53.3	27.0	f	INDIVIDUAL	3.0	1076 Carney Fort Apt. 347\nLoganmouth, SD 05113	1
2	15600.0	36 months	10.49	506.97	B	B3	Statistician	< 1 year	RENT	43057.0	...	11987.0	92.2	26.0	f	INDIVIDUAL	0.0	87025 Mark Dale Apt. 269\nNew Sabrina, WV 05113	1
3	7200.0	36 months	6.49	220.65	A	A2	Client Advocate	6 years	RENT	54000.0	...	5472.0	21.5	13.0	f	INDIVIDUAL	0.0	823 Reid Ford\nDelacruzside, MA 00813	1
4	24375.0	60 months	17.27	609.33	C	C5	Destiny Management Inc.	9 years	MORTGAGE	55000.0	...	24584.0	69.8	43.0	f	INDIVIDUAL	1.0	679 Luna Roads\nGreggshire, VA 11650	0