@divisha wrote:
I am trying to predict the class of the test input using stacking method but I am getting following error on the predict step- Any help would be appreciated.
y_new = lr.predict(review) Traceback (most recent call last):
File “”, line 1, in y_new = lr.predict(review)
File “C:\Users\DIVISHA\Anaconda2\lib\site-packages\sklearn\linear_model\base.py”, line 324, in predict scores = self.decision_function(X)
File “C:\Users\DIVISHA\Anaconda2\lib\site-packages\sklearn\linear_model\base.py”, line 305, in decision_function % (X.shape[1], n_features))
ValueError: X has 913 features per sample; expecting 3
here’s my code-
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from mlxtend.classifier import StackingClassifier
from sklearn import cross_validation
import numpy as np
import pandas as pddataset = pd.read_csv(‘dataset_incsv.csv’, delimiter = ‘,’)
y = dataset.iloc[:,0].values
import re
import nltk
nltk.download(‘stopwords’)
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmercorpus = [] #empty list of all the reviews
for i in range(0, 906):
review = re.sub(’[^a-zA-Z]’, ’ ’ , dataset[‘Comments’][i]) #for 1st review i = 0
review = review.split()
#STEMMING - to avoid sparsity
#from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
# for all the words
review = [ps.stem(word) for word in review if not word in set(stopwords.words(‘english’))]
review = ’ '.join(review)
corpus.append(review)
X=corpus
max_words=910
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer()
X=cv.fit_transform(corpus)
#from sklearn.feature_extraction.text import TfidfTransformer
#tf_transformer = TfidfTransformer(use_idf=False).fit(X)
#X = tf_transformer.transform(X).toarray()
def CalculateAccuracy(y_test,pred_label):
nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
acc = 100*nnz/float(np.shape(y_test)[0])
return accclf1 = KNeighborsClassifier(n_neighbors=2)
clf2 = RandomForestClassifier(n_estimators = 2,random_state=0)
clf3 = SVC(kernel = ‘linear’, random_state = 0)
lr = LogisticRegression()clf1.fit(X, y)
clf2.fit(X, y)
clf3.fit(X, y)f1 = clf1.predict(X) #86.75
acc1 = CalculateAccuracy(y, f1)
print("accuracy from KNN: "+str(acc1) )f2 = clf2.predict(X) #85.32
acc2 = CalculateAccuracy(y, f2)
print("accuracy from Random Ff = [f1,f2,f3]
f = np.transpose(f)lr.fit(f, y)
final = lr.predict(f)acc4 = CalculateAccuracy(y, final)
print("accuracy from Stacking: "+str(acc4) )f = ‘very bad teacher, horrible teaching skills!’
review = re.sub(’[^a-zA-Z]’, ’ ’ , f);
review = review.lower()
review = review.split()
review = [ps.stem(word) for word in review if not word in set(stopwords.words(‘english’))]
review = ’ '.join(review)
review = cv.transform([review])
review = review.toarray();
y_new = lr.predict(review)
print y_new;
Posts: 1
Participants: 1