Klasifikasi Teks

from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.calibration import CalibratedClassifierCV

model = Pipeline([('vect', CountVectorizer(ngram_range=(1, 10),min_df=5, max_df = .70)),
                ('tfidf', TfidfTransformer(norm='l1')),
                ('clf', CalibratedClassifierCV(base_estimator= SGDClassifier(penalty='elasticnet',alpha=0.001, max_iter=500,l1_ratio=.1,random_state=45),method = 'isotonic')),
               ])
model.fit(X_train, y_train)


# Predictions
test_y_pred = model.predict(X_test)
train_y_pred = model.predict(X_train)

print('Train accuracy %s' %accuracy_score(train_y_pred, y_train))
print('Test accuracy %s' % accuracy_score(test_y_pred, y_test))

print('\nTrain Report\n')
print(classification_report(y_train, train_y_pred))

print('Test Report\n')
print(classification_report(y_test, test_y_pred))
Lazy Leopard