<div dir="ltr"><div class="gmail_quote"><br><div dir="ltr"><div>Dear Mr. / Ms.</div><div><br></div><span><div>I would appreciate if you could let me know in the following example code:</div><div><br></div><div><span><pre style="color:rgb(0,0,0);font-family:"Courier New";font-size:9.6pt;background-color:rgb(255,255,255)"><span style="color:rgb(0,0,128);font-weight:bold">from </span>collections <span style="color:rgb(0,0,128);font-weight:bold">import </span>Counter<br><span style="color:rgb(0,0,128);font-weight:bold">from </span>sklearn.datasets <span style="color:rgb(0,0,128);font-weight:bold">import </span>make_classification<br><span style="color:rgb(0,0,128);font-weight:bold">from </span>sklearn.model_selection <span style="color:rgb(0,0,128);font-weight:bold">import </span>train_test_split,<wbr>StratifiedKFold,learning_<wbr>curve,validation_curve,<wbr>GridSearchCV<br><span style="color:rgb(0,0,128);font-weight:bold">from </span>sklearn.preprocessing <span style="color:rgb(0,0,128);font-weight:bold">import </span>StandardScaler<br><span style="color:rgb(0,0,128);font-weight:bold">from </span>sklearn.linear_model <span style="color:rgb(0,0,128);font-weight:bold">import </span>LogisticRegression<br><span style="color:rgb(0,0,128);font-weight:bold">from </span>sklearn.pipeline <span style="color:rgb(0,0,128);font-weight:bold">import </span>Pipeline<br><span style="color:rgb(0,0,128);font-weight:bold">from </span>sklearn.metrics <span style="color:rgb(0,0,128);font-weight:bold">import </span>classification_report<br><span style="color:rgb(0,0,128);font-weight:bold">import </span>numpy <span style="color:rgb(0,0,128);font-weight:bold">as </span>np<br><span style="color:rgb(0,0,128);font-weight:bold">import </span>matplotlib.pyplot <span style="color:rgb(0,0,128);font-weight:bold">as </span>plt<br><br><span style="color:rgb(0,0,128);font-weight:bold">def </span>plot_learning_curve(train_<wbr>sizes, train_scores, test_scores, title, alpha=<span style="color:rgb(0,0,255)">0.1</span>):<br> train_mean = np.mean(train_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> train_std = np.std(train_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> test_mean = np.mean(test_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> test_std = np.std(test_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> plt.plot(train_sizes, train_mean, <span style="color:rgb(102,0,153)">label</span>=<span style="color:rgb(0,128,128);font-weight:bold">'train score'</span>, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'blue'</span>, <span style="color:rgb(102,0,153)">marker</span>=<span style="color:rgb(0,128,128);font-weight:bold">'o'</span>)<br> plt.fill_between(train_sizes, train_mean + train_std,<br> train_mean - train_std, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'blue'</span>, <span style="color:rgb(102,0,153)">alpha</span>=alpha)<br> plt.plot(train_sizes, test_mean, <span style="color:rgb(102,0,153)">label</span>=<span style="color:rgb(0,128,128);font-weight:bold">'test score'</span>, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'red'</span>, <span style="color:rgb(102,0,153)">marker</span>=<span style="color:rgb(0,128,128);font-weight:bold">'o'</span>)<br> plt.fill_between(train_sizes, test_mean + test_std, test_mean - test_std, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'red'</span>, <span style="color:rgb(102,0,153)">alpha</span>=alpha)<br> plt.title(title)<br> plt.xlabel(<span style="color:rgb(0,128,128);font-weight:bold">'Number of training points'</span>)<br> plt.ylabel(<span style="color:rgb(0,128,128);font-weight:bold">'F-measure'</span>)<br> plt.grid(<span style="color:rgb(102,0,153)">ls</span>=<span style="color:rgb(0,128,128);font-weight:bold">'--'</span>)<br> plt.legend(<span style="color:rgb(102,0,153)">loc</span>=<span style="color:rgb(0,128,128);font-weight:bold">'best'</span>)<br> plt.show()<br><br><br><span style="color:rgb(0,0,128);font-weight:bold">def </span>plot_validation_curve(param_<wbr>range, train_scores, test_scores, title, alpha=<span style="color:rgb(0,0,255)">0.1</span>):<br> train_mean = np.mean(train_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> train_std = np.std(train_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> test_mean = np.mean(test_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> test_std = np.std(test_scores, <span style="color:rgb(102,0,153)">axis</span>=<span style="color:rgb(0,0,255)">1</span>)<br> plt.plot(param_range, train_mean, <span style="color:rgb(102,0,153)">label</span>=<span style="color:rgb(0,128,128);font-weight:bold">'train score'</span>, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'blue'</span>, <span style="color:rgb(102,0,153)">marker</span>=<span style="color:rgb(0,128,128);font-weight:bold">'o'</span>)<br> plt.fill_between(param_range, train_mean + train_std,<br> train_mean - train_std, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'blue'</span>, <span style="color:rgb(102,0,153)">alpha</span>=alpha)<br> plt.plot(param_range, test_mean, <span style="color:rgb(102,0,153)">label</span>=<span style="color:rgb(0,128,128);font-weight:bold">'test score'</span>, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'red'</span>, <span style="color:rgb(102,0,153)">marker</span>=<span style="color:rgb(0,128,128);font-weight:bold">'o'</span>)<br> plt.fill_between(param_range, test_mean + test_std, test_mean - test_std, <span style="color:rgb(102,0,153)">color</span>=<span style="color:rgb(0,128,128);font-weight:bold">'red'</span>, <span style="color:rgb(102,0,153)">alpha</span>=alpha)<br> plt.title(title)<br> plt.grid(<span style="color:rgb(102,0,153)">ls</span>=<span style="color:rgb(0,128,128);font-weight:bold">'--'</span>)<br> plt.xlabel(<span style="color:rgb(0,128,128);font-weight:bold">'Parameter value'</span>)<br> plt.ylabel(<span style="color:rgb(0,128,128);font-weight:bold">'F-measure'</span>)<br> plt.legend(<span style="color:rgb(102,0,153)">loc</span>=<span style="color:rgb(0,128,128);font-weight:bold">'best'</span>)<br> plt.show()<br><br>X, y = make_classification(<span style="color:rgb(102,0,153)">n_classes</span>=<span style="color:rgb(0,0,255)"><wbr>2</span>, <span style="color:rgb(102,0,153)">class_sep</span>=<span style="color:rgb(0,0,255)">2</span>,<span style="color:rgb(102,0,153)">weights</span>=[<span style="color:rgb(0,0,255)">0.9</span>, <span style="color:rgb(0,0,255)">0.1</span>], <span style="color:rgb(102,0,153)">n_informative</span>=<span style="color:rgb(0,0,255)">3</span>, <span style="color:rgb(102,0,153)">n_redundant</span>=<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(102,0,153)">flip_y</span>=<span style="color:rgb(0,0,255)">0</span>, <span style="color:rgb(102,0,153)">n_features</span>=<span style="color:rgb(0,0,255)">20</span>, <span style="color:rgb(102,0,153)">n_clusters_per_class</span>=<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(102,0,153)">n_samples</span>=<span style="color:rgb(0,0,255)">1000</span>, <span style="color:rgb(102,0,153)">random_state</span>=<span style="color:rgb(0,0,255)">10</span>)<br><span style="color:rgb(0,0,128)">print</span>(<span style="color:rgb(0,128,128);font-weight:bold">'Original dataset shape {}'</span>.format(Counter(y)))<br><br>ln = X.shape<br>names = [<span style="color:rgb(0,128,128);font-weight:bold">"x%s" </span>% i <span style="color:rgb(0,0,128);font-weight:bold">for </span>i <span style="color:rgb(0,0,128);font-weight:bold">in </span><span style="color:rgb(0,0,128)">range</span>(<span style="color:rgb(0,0,255)">1</span>, ln[<span style="color:rgb(0,0,255)">1</span>] + <span style="color:rgb(0,0,255)">1</span>)]<br><br>X_train, X_test, y_train, y_test = train_test_split(X, y,<span style="color:rgb(102,0,153)">random_state</span>=<span style="color:rgb(0,0,255)">0</span>)<br>st=StandardScaler()<br><br>rg = LogisticRegression(<span style="color:rgb(102,0,153)">class_<wbr>weight </span>= { <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">6.5 </span>}, <span style="color:rgb(102,0,153)">random_state </span>= <span style="color:rgb(0,0,255)">42</span>, <span style="color:rgb(102,0,153)">solver </span>= <span style="color:rgb(0,128,128);font-weight:bold">'saga'</span>,<span style="color:rgb(102,0,153)">max_iter</span>=<span style="color:rgb(0,0,255)">100</span>,<span style="color:rgb(102,0,153)">n_jobs</span>=-<span style="color:rgb(0,0,255)">1</span>)<br><br>param_grid = {<span style="color:rgb(0,128,128);font-weight:bold">'clf__C'</span>: [<span style="color:rgb(0,0,255)">0.001</span>,<span style="color:rgb(0,0,255)">0.01</span>,<span style="color:rgb(0,0,255)">0.1</span>,<span style="color:rgb(0,0,255)">0.002</span>,<span style="color:rgb(0,0,255)">0.02</span>,<span style="color:rgb(0,0,255)">0.<wbr>005</span>,<span style="color:rgb(0,0,255)">0.0007</span>,<span style="color:rgb(0,0,255)">.0006</span>,<span style="color:rgb(0,0,255)">0.0005</span>],<br> <span style="color:rgb(0,128,128);font-weight:bold">'clf__class_weight'</span>:[{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">6 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">4 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">5.5 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">4.5 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">5 </span>}]<br> }<br><br>pipeline = Pipeline(<span style="color:rgb(102,0,153)">steps</span>=[(<span style="color:rgb(0,128,128);font-weight:bold">'scaler'</span>, st),<br> (<span style="color:rgb(0,128,128);font-weight:bold">'clf'</span>, rg )])<br><br>cv=StratifiedKFold(<span style="color:rgb(102,0,153)">n_splits</span>=<span style="color:rgb(0,0,255)">5</span>,<span style="color:rgb(102,0,153)"><wbr>random_state</span>=<span style="color:rgb(0,0,255)">42</span>)<br>rg_cv = GridSearchCV(pipeline, param_grid, <span style="color:rgb(102,0,153)">cv</span>=cv, <span style="color:rgb(102,0,153)">scoring </span>= <span style="color:rgb(0,128,128);font-weight:bold">'f1'</span>)<br>rg_cv.fit(X_train, y_train)<br><span style="color:rgb(0,0,128)">print</span>(<span style="color:rgb(0,128,128);font-weight:bold">"Tuned rg best params: {}"</span>.format(rg_cv.best_params_)<wbr>)<br><br>ypred = rg_cv.predict(X_train)<br><span style="color:rgb(0,0,128)">print</span>(classification_report(y_<wbr>train, ypred))<br><span style="color:rgb(0,0,128)">print</span>(<span style="color:rgb(0,128,128);font-weight:bold">'######################'</span><wbr>)<br>ypred2 = rg_cv.predict(X_test)<br><span style="color:rgb(0,0,128)">print</span>(classification_report(y_<wbr>test, ypred2))<br><br>plt.figure(<span style="color:rgb(102,0,153)">figsize</span>=(<span style="color:rgb(0,0,255)">9</span>,<span style="color:rgb(0,0,255)">6</span>))<br>param_range1=[i / <span style="color:rgb(0,0,255)">10000.0 </span><span style="color:rgb(0,0,128);font-weight:bold">for </span>i <span style="color:rgb(0,0,128);font-weight:bold">in </span><span style="color:rgb(0,0,128)">range</span>(<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">11</span>)]<br>param_range2=[{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">6 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">4 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">5.5 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">4.5 </span>},{ <span style="color:rgb(0,0,255)">0</span>:<span style="color:rgb(0,0,255)">1</span>, <span style="color:rgb(0,0,255)">1</span>:<span style="color:rgb(0,0,255)">5 </span>}]<br><br><span style="color:rgb(0,0,128);font-weight:bold">if </span>__name__ == <span style="color:rgb(0,128,128);font-weight:bold">'__main__'</span>:<br> train_sizes, train_scores, test_scores = learning_curve(<br> <span style="color:rgb(102,0,153)">estimator</span>= rg_cv.best_estimator_ , <span style="color:rgb(102,0,153)">X</span>= X_train, <span style="color:rgb(102,0,153)">y </span>= y_train,<br> <span style="color:rgb(102,0,153)">train_sizes</span>=np.arange(<span style="color:rgb(0,0,255)">0.1</span>,<span style="color:rgb(0,0,255)">1.1</span>,<span style="color:rgb(0,0,255)"><wbr>0.1</span>), <span style="color:rgb(102,0,153)">cv</span>= cv, <span style="color:rgb(102,0,153)">scoring</span>=<span style="color:rgb(0,128,128);font-weight:bold">'f1'</span>, <span style="color:rgb(102,0,153)">n_jobs</span>= - <span style="color:rgb(0,0,255)">1</span>)<br><br> plot_learning_curve(train_<wbr>sizes, train_scores, test_scores, <span style="color:rgb(102,0,153)">title</span>=<span style="color:rgb(0,128,128);font-weight:bold">'Learning curve for Logistic Regression'</span>)<br><br> train_scores, test_scores = validation_curve(<br> <span style="color:rgb(102,0,153)">estimator</span>=rg_cv.best_<wbr>estimator_, <span style="color:rgb(102,0,153)">X</span>=X_train, <span style="color:rgb(102,0,153)">y</span>=y_train, <span style="color:rgb(102,0,153)">param_name</span>=<span style="color:rgb(0,128,128);font-weight:bold">"clf__C"</span>, <span style="color:rgb(102,0,153)">param_range</span>=param_range1,<br> <span style="color:rgb(102,0,153)">cv</span>=cv, <span style="color:rgb(102,0,153)">scoring</span>=<span style="color:rgb(0,128,128);font-weight:bold">"f1"</span>, <span style="color:rgb(102,0,153)">n_jobs</span>=-<span style="color:rgb(0,0,255)">1</span>)<br><br> plot_validation_curve(param_<wbr>range1, train_scores, test_scores, <span style="color:rgb(102,0,153)">title</span>=<span style="color:rgb(0,128,128);font-weight:bold">"Validation Curve for C"</span>, <span style="color:rgb(102,0,153)">alpha</span>=<span style="color:rgb(0,0,255)">0.1</span>)<br><br> train_scores, test_scores = validation_curve(<br> <span style="color:rgb(102,0,153)">estimator</span>=rg_cv.best_<wbr>estimator_, <span style="color:rgb(102,0,153)">X</span>=X_train, <span style="color:rgb(102,0,153)">y</span>=y_train, <span style="color:rgb(102,0,153)">param_name</span>=<span style="color:rgb(0,128,128);font-weight:bold">"clf__class_weight"</span><wbr>, <span style="color:rgb(102,0,153)">param_range</span>=param_range2,<br> <span style="color:rgb(102,0,153)">cv</span>=cv, <span style="color:rgb(102,0,153)">scoring</span>=<span style="color:rgb(0,128,128);font-weight:bold">"f1"</span>, <span style="color:rgb(102,0,153)">n_jobs</span>=-<span style="color:rgb(0,0,255)">1</span>)<br><br> plot_validation_curve(param_<wbr>range2, train_scores, test_scores, <span style="color:rgb(102,0,153)">title</span>=<span style="color:rgb(0,128,128);font-weight:bold">"Validation Curve for class_weight"</span>, <span style="color:rgb(102,0,153)">alpha</span>=<span style="color:rgb(0,0,255)">0.1</span>)</pre></span></div><div><br></div><div><span><p>1- Why when the best estimator of GridSearchCv is passed into the learning curve function, it prints all the previous print lines several times (run on windows)?</p><p>2- How to plot validation curve for class weight? <span><font color="#cb008e">TypeError: float() argument must be a string or a number, not 'dict'</font><br></span></p></span></div><div>Thanks in advance.</div><div>Best regards,<span></span></div></span></div>
</div><br></div>