i new programming in python , need write algorithm news classification (sports, culture, entertainment, world news) using multinomial nb. wrote train_data , read_data, can't seem find need in between.
from sklearn.feature_extraction.text import countvectorizer sklearn.feature_extraction.text import tfidftransformer sklearn import svm sklearn.naive_bayes import multinomialnb def read_data(f_name): data = [] labels = [] line in open(f_name): if line[:6] == "title": continue elif line[:5] == "article": data.append(line.strip()[7:]) elif line[:5] == "class": klasa = line.strip()[7:] if klasa == "sports": labels.append(0) elif klasa == "culture": labels.append(1) elif klasa == "world news": labels.append(2) elif klasa == "entertainment": labels.append(3) return data, labels def train_model(): vectorizer = countvectorizer() brb = getdata('data.txt') xtrain = vectorizer.fit_transform(articles, class)`` brb1 = getdata('data_test.txt') xtest = vectorizer.transform(articles) nb = multinomialnb(alpha=1).fit(xtrain, class) output = open("hw3_ans_217_2012.txt", "w") ytest = nb.predict(xtest)