import numpy as np
import pandas as pd
from IPython.display import display, HTML
ratings = pd.read_csv('BX-Book-Ratings.csv',sep=';')
ratings.head()
User-ID | ISBN | Book-Rating | |
---|---|---|---|
0 | 276725 | 034545104X | 0 |
1 | 276726 | 0155061224 | 5 |
2 | 276727 | 0446520802 | 0 |
3 | 276729 | 052165615X | 3 |
4 | 276729 | 0521795028 | 6 |
#No negative ratings
ratings[ratings['Book-Rating']<0].head()
User-ID | ISBN | Book-Rating |
---|
ratings[ratings['User-ID']==69188].head()
User-ID | ISBN | Book-Rating | |
---|---|---|---|
289554 | 69188 | 0060911239 | 0 |
289555 | 69188 | 0060929499 | 8 |
289556 | 69188 | 0060969989 | 8 |
289557 | 69188 | 014027684X | 9 |
289558 | 69188 | 014029628X | 9 |
users = pd.read_csv('BX-Users.csv',sep=';')
users.head()
User-ID | Location | Age | |
---|---|---|---|
0 | 1 | nyc, new york, usa | NaN |
1 | 2 | stockton, california, usa | 18.0 |
2 | 3 | moscow, yukon territory, russia | NaN |
3 | 4 | porto, v.n.gaia, portugal | 17.0 |
4 | 5 | farnborough, hants, united kingdom | NaN |
books = pd.read_csv('BX-Books.csv',sep='";"')
books.head()
C:\Users\Sahil Gupta\Anaconda2\envs\gl-env\lib\site-packages\ipykernel\__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'. if __name__ == '__main__':
"ISBN | Book-Title | Book-Author | Year-Of-Publication | Publisher | Image-URL-S | Image-URL-M | Image-URL-L" | |
---|---|---|---|---|---|---|---|---|
0 | "0195153448 | Classical Mythology | Mark P. O. Morford | 2002 | Oxford University Press | http://images.amazon.com/images/P/0195153448.0... | http://images.amazon.com/images/P/0195153448.0... | http://images.amazon.com/images/P/0195153448.0... |
1 | "0002005018 | Clara Callan | Richard Bruce Wright | 2001 | HarperFlamingo Canada | http://images.amazon.com/images/P/0002005018.0... | http://images.amazon.com/images/P/0002005018.0... | http://images.amazon.com/images/P/0002005018.0... |
2 | "0060973129 | Decision in Normandy | Carlo D'Este | 1991 | HarperPerennial | http://images.amazon.com/images/P/0060973129.0... | http://images.amazon.com/images/P/0060973129.0... | http://images.amazon.com/images/P/0060973129.0... |
3 | "0374157065 | Flu: The Story of the Great Influenza Pandemic... | Gina Bari Kolata | 1999 | Farrar Straus Giroux | http://images.amazon.com/images/P/0374157065.0... | http://images.amazon.com/images/P/0374157065.0... | http://images.amazon.com/images/P/0374157065.0... |
4 | "0393045218 | The Mummies of Urumchi | E. J. W. Barber | 1999 | W. W. Norton & Company | http://images.amazon.com/images/P/0393045218.0... | http://images.amazon.com/images/P/0393045218.0... | http://images.amazon.com/images/P/0393045218.0... |
books.columns = [col.replace("\"","") for col in books.columns]
books['ISBN'] = [val.replace("\"","") for val in books['ISBN']]
books['ISBN'].head()
0 0195153448 1 0002005018 2 0060973129 3 0374157065 4 0393045218 Name: ISBN, dtype: object
books['Image-URL-L'] = [val.replace("\"","") for val in books['Image-URL-L']]
books['Image-URL-L'].head()
0 http://images.amazon.com/images/P/0195153448.0... 1 http://images.amazon.com/images/P/0002005018.0... 2 http://images.amazon.com/images/P/0060973129.0... 3 http://images.amazon.com/images/P/0374157065.0... 4 http://images.amazon.com/images/P/0393045218.0... Name: Image-URL-L, dtype: object
# Create a new conda environment with Python 2.7.x
#!conda create -n gl-env python=2.7 anaconda=4.0.0
# Activate the conda environment
#!activate gl-env
#!python -m pip install --upgrade pip
#!pip install --upgrade --no-cache-dir https://get.graphlab.com/GraphLab-Create/2.1/gupta376@umn.edu/4715-4040-4813-090E-5214-4B52-7359-CB73/GraphLab-Create-License.tar.gz
import graphlab
#graphlab.get_dependencies()
from sklearn.cross_validation import cross_val_score, train_test_split
## Train the model (fit the data)
train_data, test_data = train_test_split(ratings, train_size=0.95, random_state=100)
train_data.head()
User-ID | ISBN | Book-Rating | |
---|---|---|---|
238955 | 55492 | 0373484143 | 0 |
368161 | 88499 | 076790592X | 9 |
124453 | 28591 | 0451411307 | 0 |
1006318 | 241666 | 0060198125 | 0 |
687490 | 167471 | 1861263317 | 0 |
sf = graphlab.SFrame(train_data)
sfTest = graphlab.SFrame(test_data)
This non-commercial license of GraphLab Create for academic use is assigned to gupta376@umn.edu and will expire on December 12, 2017.
[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\SAHILG~1\AppData\Local\Temp\graphlab_server_1483465715.log.0
popularity_model = graphlab.popularity_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating')
Recsys training: model = popularity
Preparing data set.
Data has 1092291 observations with 102300 users and 330476 items.
Data prepared in: 1.44484s
1092291 observations to process; with 330476 unique items.
popularity_recomm = popularity_model.recommend(users=range(1,10),k=5)
popularity_recomm.print_rows(num_rows=10)
+---------+------------+-------+------+ | User-ID | ISBN | score | rank | +---------+------------+-------+------+ | 1 | 0679415327 | 10.0 | 1 | | 1 | 0971938830 | 10.0 | 2 | | 1 | 0486424650 | 10.0 | 3 | | 1 | 0785270639 | 10.0 | 4 | | 1 | 3492223303 | 10.0 | 5 | | 2 | 0679415327 | 10.0 | 1 | | 2 | 0971938830 | 10.0 | 2 | | 2 | 0486424650 | 10.0 | 3 | | 2 | 0785270639 | 10.0 | 4 | | 2 | 3492223303 | 10.0 | 5 | +---------+------------+-------+------+ [45 rows x 4 columns]
aggr = pd.merge(graphlab.SFrame.to_dataframe(popularity_recomm), books[['ISBN','Book-Title']], on = ['ISBN'], how='inner')
display(aggr)
User-ID | ISBN | score | rank | Book-Title | |
---|---|---|---|---|---|
0 | 1 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
1 | 2 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
2 | 3 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
3 | 4 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
4 | 5 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
5 | 6 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
6 | 7 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
7 | 8 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
8 | 9 | 0679415327 | 10.0 | 1 | A Place of My Own: The Education of an Amateur... |
9 | 1 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
10 | 2 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
11 | 3 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
12 | 4 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
13 | 5 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
14 | 6 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
15 | 7 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
16 | 8 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
17 | 9 | 0971938830 | 10.0 | 2 | The Authentic Annals of the Early Hebrews |
18 | 1 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
19 | 2 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
20 | 3 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
21 | 4 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
22 | 5 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
23 | 6 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
24 | 7 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
25 | 8 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
26 | 9 | 0486424650 | 10.0 | 3 | The Communist Manifesto and Other Revolutionar... |
27 | 1 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
28 | 2 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
29 | 3 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
30 | 4 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
31 | 5 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
32 | 6 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
33 | 7 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
34 | 8 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
35 | 9 | 0785270639 | 10.0 | 4 | Living Simply In God's Abundance Strength And ... |
popularity_model.evaluate_rmse(sfTest, target='Book-Rating')
{'rmse_by_item': Columns: ISBN str count int rmse float Rows: 40456 Data: +------------+-------+---------------+ | ISBN | count | rmse | +------------+-------+---------------+ | 0263737330 | 1 | 2.86717550543 | | 037312189X | 1 | 0.0 | | 0345274563 | 1 | 2.92307692308 | | 0333387287 | 1 | 2.86717550543 | | 050552239X | 1 | 0.0 | | 0743410602 | 1 | 2.5652173913 | | 0590212877 | 1 | 2.33333333333 | | 043957742X | 1 | 0.0 | | 0743417844 | 1 | 0.0 | | 0312929994 | 1 | 2.625 | +------------+-------+---------------+ [40456 rows x 3 columns] Note: Only the head of the SFrame is printed. You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns., 'rmse_by_user': Columns: User-ID int count int rmse float Rows: 17780 Data: +---------+-------+---------------+ | User-ID | count | rmse | +---------+-------+---------------+ | 237313 | 1 | 2.4 | | 43116 | 1 | 3.43323863636 | | 115341 | 1 | 5.36170212766 | | 69188 | 1 | 5.0 | | 118012 | 1 | 3.66666666667 | | 66750 | 1 | 1.11428571429 | | 97741 | 1 | 1.0 | | 230692 | 1 | 1.5 | | 135535 | 1 | 2.875 | | 36992 | 2 | 2.63200857442 | +---------+-------+---------------+ [17780 rows x 3 columns] Note: Only the head of the SFrame is printed. You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns., 'rmse_overall': 4.073509434250113}
item_sim_model = graphlab.item_similarity_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating')
Recsys training: model = item_similarity
Preparing data set.
Data has 862335 observations with 89722 users and 286804 items.
Data prepared in: 1.12007s
Training model from provided data.
Gathering per-item and per-user statistics.
+--------------------------------+------------+
| Elapsed Time (Item Statistics) | % Complete |
+--------------------------------+------------+
| 15.546ms | 1 |
| 58.887ms | 100 |
+--------------------------------+------------+
Setting up lookup tables.
Processing data in one pass using sparse lookup tables.
+-------------------------------------+------------------+-----------------+
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
+-------------------------------------+------------------+-----------------+
| 2.55s | 0 | 20 |
| 3.57s | 4.5 | 12960 |
| 4.57s | 8.5 | 25031 |
| 5.54s | 12.5 | 36496 |
| 6.56s | 16.5 | 47869 |
| 7.71s | 18.25 | 53054 |
| 8.58s | 19.75 | 57117 |
| 9.54s | 32.25 | 93053 |
| 10.52s | 35.25 | 101152 |
| 11.67s | 41 | 118188 |
| 12.52s | 42 | 121162 |
| 13.52s | 42.75 | 123145 |
| 14.74s | 43.5 | 124866 |
| 15.53s | 43.75 | 125713 |
| 16.51s | 44.25 | 127012 |
| 17.54s | 44.25 | 127161 |
| 18.50s | 45 | 129751 |
| 19.49s | 47 | 135154 |
| 20.49s | 50.5 | 145329 |
| 23.18s | 55.25 | 158973 |
| 24.33s | 57.5 | 165171 |
| 26.36s | 63.5 | 182784 |
| 28.15s | 67.25 | 193051 |
| 28.48s | 67.75 | 194483 |
| 31.23s | 73.5 | 210957 |
| 35.84s | 78 | 223901 |
| 37.99s | 78.75 | 226323 |
| 41.20s | 79.75 | 228962 |
| 42.26s | 79.75 | 229279 |
| 52.56s | 81.25 | 233157 |
| 53.79s | 81.5 | 233851 |
| 57.07s | 81.5 | 233862 |
| 58.48s | 81.5 | 234145 |
| 1m 0s | 81.75 | 234545 |
| 1m 1s | 81.75 | 234651 |
| 1m 5s | 82.25 | 235992 |
| 1m 13s | 82.25 | 235992 |
| 1m 19s | 82.25 | 236493 |
| 1m 22s | 82.5 | 236626 |
| 1m 37s | 82.75 | 237526 |
| 1m 54s | 83.5 | 239676 |
| 1m 59s | 83.75 | 240229 |
| 2m 1s | 83.75 | 240319 |
| 2m 4s | 83.75 | 240441 |
| 3m 32s | 84.5 | 242661 |
| 3m 33s | 84.5 | 242873 |
| 3m 46s | 84.5 | 242873 |
| 3m 49s | 84.5 | 242919 |
| 4m 1s | 84.75 | 243107 |
| 5m 43s | 85.25 | 244512 |
| 5m 50s | 85.25 | 244538 |
| 5m 52s | 85.25 | 244538 |
| 5m 54s | 85.25 | 244567 |
| 5m 56s | 85.25 | 244567 |
| 6m 3s | 85.25 | 244624 |
| 6m 14s | 85.25 | 244700 |
| 6m 45s | 85.5 | 245269 |
| 6m 46s | 85.5 | 245285 |
| 6m 48s | 85.5 | 245422 |
| 6m 50s | 85.5 | 245609 |
| 6m 53s | 85.5 | 245862 |
| 6m 57s | 85.75 | 246259 |
| 7m 17s | 86.25 | 247876 |
| 7m 19s | 86.25 | 248021 |
| 7m 19s | 86.5 | 248144 |
| 7m 21s | 86.5 | 248150 |
| 7m 22s | 86.5 | 248209 |
| 7m 24s | 86.5 | 248264 |
| 7m 38s | 86.5 | 248760 |
| 7m 41s | 86.75 | 248843 |
| 7m 44s | 86.75 | 248937 |
| 7m 45s | 86.75 | 249007 |
| 7m 48s | 87 | 249705 |
| 8m 21s | 87 | 249705 |
| 8m 23s | 87 | 249765 |
| 8m 24s | 87 | 249832 |
| 8m 27s | 87 | 249835 |
| 8m 29s | 87.5 | 251033 |
| 9m 52s | 87.5 | 251033 |
| 9m 53s | 87.5 | 251068 |
| 9m 54s | 87.5 | 251296 |
| 10m 9s | 87.75 | 252312 |
| 11m 29s | 89.25 | 256049 |
| 12m 45s | 89.25 | 256051 |
| 12m 46s | 89.25 | 256196 |
| 12m 50s | 89.25 | 256214 |
| 12m 52s | 89.25 | 256242 |
| 13m 5s | 89.25 | 256555 |
| 13m 8s | 89.25 | 256613 |
| 13m 10s | 89.25 | 256634 |
| 13m 11s | 89.25 | 256660 |
| 13m 11s | 89.5 | 256881 |
| 13m 22s | 89.5 | 256890 |
| 13m 23s | 89.5 | 256923 |
| 13m 25s | 89.5 | 256924 |
| 13m 35s | 89.5 | 257070 |
| 15m 50s | 90.25 | 258949 |
| 15m 54s | 90.25 | 259018 |
| 15m 56s | 91 | 261332 |
| 17m 7s | 91 | 261333 |
| 17m 9s | 91 | 261520 |
| 17m 34s | 91.5 | 262635 |
| 18m 9s | 91.75 | 263538 |
| 18m 13s | 91.75 | 263538 |
| 18m 16s | 91.75 | 263584 |
| 18m 19s | 91.75 | 263737 |
| 18m 24s | 91.75 | 263771 |
| 18m 26s | 91.75 | 263773 |
| 18m 26s | 91.75 | 263808 |
| 18m 28s | 91.75 | 263822 |
| 18m 29s | 91.75 | 263838 |
| 18m 33s | 92 | 264204 |
| 18m 52s | 92 | 264204 |
| 18m 57s | 92 | 264285 |
| 21m 3s | 92.5 | 265847 |
| 21m 17s | 92.5 | 265875 |
| 21m 19s | 92.5 | 265875 |
| 21m 22s | 92.5 | 265911 |
| 21m 24s | 92.5 | 266001 |
| 21m 35s | 92.5 | 266001 |
| 21m 52s | 92.75 | 266275 |
| 21m 57s | 92.75 | 266605 |
| 22m 0s | 93 | 267392 |
| 22m 13s | 93 | 267396 |
| 22m 23s | 93.25 | 267873 |
| 22m 24s | 93.25 | 267916 |
| 27m 5s | 94.75 | 272438 |
| 27m 6s | 95 | 272511 |
| 27m 7s | 95 | 272597 |
| 27m 8s | 95 | 272924 |
| 27m 11s | 95 | 272924 |
| 27m 14s | 95 | 273086 |
| 27m 15s | 95 | 273174 |
| 27m 19s | 95.25 | 273894 |
| 27m 27s | 95.5 | 273940 |
| 27m 29s | 95.5 | 273940 |
| 27m 32s | 95.5 | 274041 |
| 27m 34s | 95.5 | 274091 |
| 27m 35s | 95.5 | 274103 |
| 28m 27s | 95.75 | 275265 |
| 28m 35s | 95.75 | 275329 |
| 28m 40s | 96 | 275354 |
| 28m 42s | 96 | 275357 |
| 29m 22s | 96 | 275849 |
| 29m 23s | 96 | 275863 |
| 29m 33s | 96 | 275981 |
| 31m 45s | 96.75 | 278054 |
| 32m 45s | 96.75 | 278057 |
| 32m 46s | 96.75 | 278136 |
| 32m 56s | 96.75 | 278136 |
| 33m 10s | 97 | 278257 |
| 33m 11s | 97 | 278269 |
| 33m 12s | 97 | 278285 |
| 33m 13s | 97 | 278304 |
| 33m 14s | 97 | 278304 |
| 33m 15s | 97 | 278368 |
| 33m 20s | 97 | 278368 |
| 34m 7s | 97 | 278906 |
| 34m 8s | 97.25 | 278924 |
| 34m 52s | 98 | 281627 |
| 37m 11s | 98 | 281627 |
| 37m 12s | 98 | 281711 |
| 37m 23s | 98 | 281718 |
| 37m 24s | 98.25 | 281798 |
| 37m 42s | 98.5 | 283069 |
| 38m 5s | 98.75 | 283922 |
| 38m 9s | 99 | 284027 |
| 38m 12s | 99 | 284102 |
| 38m 16s | 99 | 284166 |
| 38m 49s | 99.25 | 284771 |
| 38m 52s | 99.25 | 284798 |
| 39m 41s | 99.5 | 285532 |
| 39m 55s | 99.5 | 285717 |
| 39m 58s | 99.75 | 286122 |
| 40m 31s | 99.75 | 286164 |
| 40m 34s | 99.75 | 286165 |
| 45m 11s | 100 | 286804 |
+-------------------------------------+------------------+-----------------+
Finalizing lookup tables.
Generating candidate set for working with new users.
Finished training in 2712.91s
#Make Recommendations:
item_sim_recomm = item_sim_model.recommend(users=range(1,10),k=5)
item_sim_recomm.print_rows(num_rows=10)
+---------+------------+------------------+------+ | User-ID | ISBN | score | rank | +---------+------------+------------------+------+ | 1 | 044021145X | 0.0100413656235 | 1 | | 1 | 0060928336 | 0.00940719604492 | 2 | | 1 | 0440213525 | 0.00894405722618 | 3 | | 1 | 0312195516 | 0.00889273405075 | 4 | | 1 | 0440214041 | 0.00855695486069 | 5 | | 2 | 044021145X | 0.0100413656235 | 1 | | 2 | 0060928336 | 0.00940719604492 | 2 | | 2 | 0440213525 | 0.00894405722618 | 3 | | 2 | 0312195516 | 0.00889273405075 | 4 | | 2 | 0440214041 | 0.00855695486069 | 5 | +---------+------------+------------------+------+ [45 rows x 4 columns]
item_sim_recomm.show()
Canvas is updated and available in a tab in the default browser.
aggr = pd.merge(graphlab.SFrame.to_dataframe(item_sim_recomm), books[['ISBN','Book-Title']], on = ['ISBN'], how='inner')
display(aggr)
User-ID | ISBN | score | rank | Book-Title | |
---|---|---|---|---|---|
0 | 1 | 044021145X | 0.010041 | 1 | The Firm |
1 | 2 | 044021145X | 0.010041 | 1 | The Firm |
2 | 3 | 044021145X | 0.010041 | 1 | The Firm |
3 | 4 | 044021145X | 0.010041 | 1 | The Firm |
4 | 5 | 044021145X | 0.010041 | 1 | The Firm |
5 | 6 | 044021145X | 0.010041 | 1 | The Firm |
6 | 7 | 044021145X | 0.010041 | 1 | The Firm |
7 | 1 | 0060928336 | 0.009407 | 2 | Divine Secrets of the Ya-Ya Sisterhood: A Novel |
8 | 2 | 0060928336 | 0.009407 | 2 | Divine Secrets of the Ya-Ya Sisterhood: A Novel |
9 | 3 | 0060928336 | 0.009407 | 2 | Divine Secrets of the Ya-Ya Sisterhood: A Novel |
10 | 4 | 0060928336 | 0.009407 | 2 | Divine Secrets of the Ya-Ya Sisterhood: A Novel |
11 | 5 | 0060928336 | 0.009407 | 2 | Divine Secrets of the Ya-Ya Sisterhood: A Novel |
12 | 6 | 0060928336 | 0.009407 | 2 | Divine Secrets of the Ya-Ya Sisterhood: A Novel |
13 | 7 | 0060928336 | 0.009407 | 2 | Divine Secrets of the Ya-Ya Sisterhood: A Novel |
14 | 1 | 0440213525 | 0.008944 | 3 | The Client |
15 | 2 | 0440213525 | 0.008944 | 3 | The Client |
16 | 3 | 0440213525 | 0.008944 | 3 | The Client |
17 | 4 | 0440213525 | 0.008944 | 3 | The Client |
18 | 5 | 0440213525 | 0.008944 | 3 | The Client |
19 | 6 | 0440213525 | 0.008944 | 3 | The Client |
20 | 7 | 0440213525 | 0.008944 | 3 | The Client |
21 | 1 | 0312195516 | 0.008893 | 4 | The Red Tent (Bestselling Backlist) |
22 | 2 | 0312195516 | 0.008893 | 4 | The Red Tent (Bestselling Backlist) |
23 | 3 | 0312195516 | 0.008893 | 4 | The Red Tent (Bestselling Backlist) |
24 | 4 | 0312195516 | 0.008893 | 4 | The Red Tent (Bestselling Backlist) |
25 | 5 | 0312195516 | 0.008893 | 4 | The Red Tent (Bestselling Backlist) |
26 | 6 | 0312195516 | 0.008893 | 4 | The Red Tent (Bestselling Backlist) |
27 | 7 | 0312195516 | 0.008893 | 4 | The Red Tent (Bestselling Backlist) |
28 | 1 | 0440214041 | 0.008557 | 5 | The Pelican Brief |
29 | 2 | 0440214041 | 0.008557 | 5 | The Pelican Brief |
30 | 3 | 0440214041 | 0.008557 | 5 | The Pelican Brief |
31 | 4 | 0440214041 | 0.008557 | 5 | The Pelican Brief |
32 | 5 | 0440214041 | 0.008557 | 5 | The Pelican Brief |
33 | 6 | 0440214041 | 0.008557 | 5 | The Pelican Brief |
34 | 7 | 0440214041 | 0.008557 | 5 | The Pelican Brief |
35 | 8 | 1861470592 | 0.035714 | 1 | The Small Garden Handbook |
36 | 8 | 0671890778 | 0.035714 | 2 | Hold It! You're Exercizing Wrong : Your Prescr... |
37 | 8 | 0817012370 | 0.035714 | 3 | How to Recover from Grief |
38 | 8 | 0553252275 | 0.035714 | 4 | Reflections on Life After Life |
39 | 8 | 078686043X | 0.035714 | 5 | The Beardstown Ladies' Common-Sense Investment... |
40 | 9 | 0385312202 | 0.017857 | 1 | Leaving Cold Sassy: The Unfinished Sequel to C... |
41 | 9 | 0553278355 | 0.014286 | 2 | The Bell Jar |
42 | 9 | 0316899984 | 0.013201 | 3 | River, Cross My Heart |
43 | 9 | 0060175982 | 0.012821 | 4 | Mars and Venus Starting Over: A Practical Guid... |
44 | 9 | 0312950853 | 0.012821 | 5 | The Nanny Murder Trial (St. Martin's True Crim... |
model_performance = graphlab.compare(sfTest, [popularity_model, item_sim_model])
graphlab.show_comparison(model_performance,[popularity_model, item_sim_model])
PROGRESS: Evaluate model M0
recommendations finished on 1000/47880 queries. users per second: 287.309
recommendations finished on 2000/47880 queries. users per second: 284.384
recommendations finished on 3000/47880 queries. users per second: 286.821
recommendations finished on 4000/47880 queries. users per second: 286.685
recommendations finished on 5000/47880 queries. users per second: 287.046
recommendations finished on 6000/47880 queries. users per second: 287.064
recommendations finished on 7000/47880 queries. users per second: 287.079
recommendations finished on 8000/47880 queries. users per second: 287.537
recommendations finished on 9000/47880 queries. users per second: 287.533
recommendations finished on 10000/47880 queries. users per second: 287.681
recommendations finished on 11000/47880 queries. users per second: 288.087
recommendations finished on 12000/47880 queries. users per second: 286.695
recommendations finished on 13000/47880 queries. users per second: 284.319
recommendations finished on 14000/47880 queries. users per second: 282.179
recommendations finished on 15000/47880 queries. users per second: 282.417
recommendations finished on 16000/47880 queries. users per second: 282.755
recommendations finished on 17000/47880 queries. users per second: 283.235
recommendations finished on 18000/47880 queries. users per second: 283.599
recommendations finished on 19000/47880 queries. users per second: 283.815
recommendations finished on 20000/47880 queries. users per second: 284.059
recommendations finished on 21000/47880 queries. users per second: 284.132
recommendations finished on 22000/47880 queries. users per second: 284.121
recommendations finished on 23000/47880 queries. users per second: 284.164
recommendations finished on 24000/47880 queries. users per second: 284.463
recommendations finished on 25000/47880 queries. users per second: 284.739
recommendations finished on 26000/47880 queries. users per second: 284.781
recommendations finished on 27000/47880 queries. users per second: 283.839
recommendations finished on 28000/47880 queries. users per second: 279.88
recommendations finished on 29000/47880 queries. users per second: 274.783
recommendations finished on 30000/47880 queries. users per second: 271.046
recommendations finished on 31000/47880 queries. users per second: 268.063
recommendations finished on 32000/47880 queries. users per second: 264.9
recommendations finished on 33000/47880 queries. users per second: 262.532
recommendations finished on 34000/47880 queries. users per second: 259.89
recommendations finished on 35000/47880 queries. users per second: 257.496
recommendations finished on 36000/47880 queries. users per second: 257.058
recommendations finished on 37000/47880 queries. users per second: 257.773
recommendations finished on 38000/47880 queries. users per second: 258.627
recommendations finished on 39000/47880 queries. users per second: 259.48
recommendations finished on 40000/47880 queries. users per second: 260.368
recommendations finished on 41000/47880 queries. users per second: 261.166
recommendations finished on 42000/47880 queries. users per second: 261.909
recommendations finished on 43000/47880 queries. users per second: 262.586
recommendations finished on 44000/47880 queries. users per second: 263.226
recommendations finished on 45000/47880 queries. users per second: 263.273
recommendations finished on 46000/47880 queries. users per second: 263.363
recommendations finished on 47000/47880 queries. users per second: 263.333
Precision and recall summary statistics by cutoff +--------+-------------------+-------------------+ | cutoff | mean_precision | mean_recall | +--------+-------------------+-------------------+ | 1 | 0.0 | 0.0 | | 2 | 0.0 | 0.0 | | 3 | 6.96184906711e-06 | 1.04427736007e-05 | | 4 | 1.04427736007e-05 | 2.08855472013e-05 | | 5 | 8.35421888053e-06 | 2.08855472013e-05 | | 6 | 6.96184906711e-06 | 2.08855472013e-05 | | 7 | 5.96729920038e-06 | 2.08855472013e-05 | | 8 | 7.8320802005e-06 | 2.09549344013e-05 | | 9 | 6.96184906711e-06 | 2.09549344013e-05 | | 10 | 6.2656641604e-06 | 2.09549344013e-05 | +--------+-------------------+-------------------+ [10 rows x 3 columns] PROGRESS: Evaluate model M1
recommendations finished on 1000/47880 queries. users per second: 168.926
recommendations finished on 2000/47880 queries. users per second: 171.453
recommendations finished on 3000/47880 queries. users per second: 182.693
recommendations finished on 4000/47880 queries. users per second: 192.49
recommendations finished on 5000/47880 queries. users per second: 197.691
recommendations finished on 6000/47880 queries. users per second: 202.161
recommendations finished on 7000/47880 queries. users per second: 205.281
recommendations finished on 8000/47880 queries. users per second: 207.419
recommendations finished on 9000/47880 queries. users per second: 209.144
recommendations finished on 10000/47880 queries. users per second: 209.605
recommendations finished on 11000/47880 queries. users per second: 208.333
recommendations finished on 12000/47880 queries. users per second: 208.403
recommendations finished on 13000/47880 queries. users per second: 208.118
recommendations finished on 14000/47880 queries. users per second: 206.763
recommendations finished on 15000/47880 queries. users per second: 207.29
recommendations finished on 16000/47880 queries. users per second: 207.835
recommendations finished on 17000/47880 queries. users per second: 208.364
recommendations finished on 18000/47880 queries. users per second: 208.216
recommendations finished on 19000/47880 queries. users per second: 208.83
recommendations finished on 20000/47880 queries. users per second: 208.696
recommendations finished on 21000/47880 queries. users per second: 208.558
recommendations finished on 22000/47880 queries. users per second: 207.861
recommendations finished on 23000/47880 queries. users per second: 207.68
recommendations finished on 24000/47880 queries. users per second: 208.381
recommendations finished on 25000/47880 queries. users per second: 209.025
recommendations finished on 26000/47880 queries. users per second: 209.627
recommendations finished on 27000/47880 queries. users per second: 210.14
recommendations finished on 28000/47880 queries. users per second: 210.684
recommendations finished on 29000/47880 queries. users per second: 211.21
recommendations finished on 30000/47880 queries. users per second: 211.735
recommendations finished on 31000/47880 queries. users per second: 212.197
recommendations finished on 32000/47880 queries. users per second: 212.657
recommendations finished on 33000/47880 queries. users per second: 212.939
recommendations finished on 34000/47880 queries. users per second: 213.327
recommendations finished on 35000/47880 queries. users per second: 213.703
recommendations finished on 36000/47880 queries. users per second: 214.008
recommendations finished on 37000/47880 queries. users per second: 214.302
recommendations finished on 38000/47880 queries. users per second: 214.576
recommendations finished on 39000/47880 queries. users per second: 214.812
recommendations finished on 40000/47880 queries. users per second: 215.05
recommendations finished on 41000/47880 queries. users per second: 215.262
recommendations finished on 42000/47880 queries. users per second: 215.451
recommendations finished on 43000/47880 queries. users per second: 215.531
recommendations finished on 44000/47880 queries. users per second: 215.055
recommendations finished on 45000/47880 queries. users per second: 215.262
recommendations finished on 46000/47880 queries. users per second: 215.422
recommendations finished on 47000/47880 queries. users per second: 215.628
Precision and recall summary statistics by cutoff +--------+------------------+------------------+ | cutoff | mean_precision | mean_recall | +--------+------------------+------------------+ | 1 | 0.00921052631579 | 0.00271518546502 | | 2 | 0.00749791144528 | 0.00435890716239 | | 3 | 0.00619604566973 | 0.00541474996357 | | 4 | 0.00541457811195 | 0.00644506824528 | | 5 | 0.00475772765246 | 0.00703571716626 | | 6 | 0.00424672793094 | 0.00767339928592 | | 7 | 0.00381907148824 | 0.00795239029457 | | 8 | 0.00350616123642 | 0.00832217537057 | | 9 | 0.00328367214332 | 0.00899210770841 | | 10 | 0.00310359231412 | 0.00965827849019 | +--------+------------------+------------------+ [10 rows x 3 columns] Model compare metric: precision_recall Canvas is updated and available in a tab in the default browser.
popularity_recomm.show()
Canvas is accessible via web browser at the URL: http://localhost:54099/index.html Opening Canvas in default web browser.
matrix_factor_model = graphlab.factorization_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating',
regularization=1e-6, max_iterations=500)
Recsys training: model = factorization_recommender
Preparing data set.
Data has 1092291 observations with 102300 users and 330476 items.
Data prepared in: 1.43589s
Training factorization_recommender for recommendations.
+--------------------------------+--------------------------------------------------+----------+
| Parameter | Description | Value |
+--------------------------------+--------------------------------------------------+----------+
| num_factors | Factor Dimension | 8 |
| regularization | L2 Regularization on Factors | 1e-006 |
| solver | Solver used for training | sgd |
| linear_regularization | L2 Regularization on Linear Coefficients | 1e-010 |
| max_iterations | Maximum Number of Iterations | 500 |
+--------------------------------+--------------------------------------------------+----------+
Optimizing model using SGD; tuning step size.
Using 136536 / 1092291 points for tuning the step size.
+---------+-------------------+------------------------------------------+
| Attempt | Initial Step Size | Estimated Objective Value |
+---------+-------------------+------------------------------------------+
| 0 | 25 | No Decrease (62.7398 >= 14.8356) |
| 1 | 6.25 | No Decrease (65.6024 >= 14.8356) |
| 2 | 1.5625 | No Decrease (58.1537 >= 14.8356) |
| 3 | 0.390625 | No Decrease (27.6335 >= 14.8356) |
| 4 | 0.0976562 | 0.923907 |
| 5 | 0.0488281 | 6.3842 |
+---------+-------------------+------------------------------------------+
| Final | 0.0976562 | 0.923907 |
+---------+-------------------+------------------------------------------+
Starting Optimization.
+---------+--------------+-------------------+-----------------------+-------------+
| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |
+---------+--------------+-------------------+-----------------------+-------------+
| Initial | 5.013ms | 14.8563 | 3.85439 | |
+---------+--------------+-------------------+-----------------------+-------------+
| 1 | 269.323ms | 25.2161 | 4.8682 | 0.0976562 |
| 2 | 561.028ms | 19.2428 | 4.21399 | 0.0580668 |
| 3 | 778.604ms | 12.8849 | 3.40055 | 0.042841 |
| 4 | 1.03s | 8.87575 | 2.76633 | 0.0290334 |
| 5 | 1.27s | 6.98182 | 2.40467 | 0.0245592 |
| 6 | 1.54s | 6.01277 | 2.19747 | 0.0214205 |
| 10 | 2.33s | 4.40128 | 1.80303 | 0.014603 |
| 11 | 2.66s | 4.22173 | 1.75394 | 0.0135956 |
| 15 | 3.51s | 3.75562 | 1.62098 | 0.0107739 |
| 20 | 4.42s | 3.45361 | 1.53091 | 0.008683 |
| 25 | 5.35s | 3.27705 | 1.47666 | 0.00734493 |
| 30 | 6.31s | 3.13993 | 1.43381 | 0.00640622 |
| 35 | 7.30s | 3.04788 | 1.40488 | 0.00570679 |
| 40 | 8.29s | 2.97842 | 1.38319 | 0.00516295 |
| 45 | 9.22s | 2.92098 | 1.36532 | 0.00472643 |
| 50 | 10.14s | 2.87083 | 1.34961 | 0.00436732 |
| 51 | 10.37s | 2.8639 | 1.34754 | 0.00430294 |
| 55 | 11.30s | 2.82912 | 1.3365 | 0.00406603 |
| 60 | 12.42s | 2.79553 | 1.32622 | 0.00380916 |
| 65 | 13.43s | 2.76362 | 1.31636 | 0.00358722 |
| 70 | 14.67s | 2.73734 | 1.3083 | 0.00339327 |
| 75 | 15.66s | 2.71249 | 1.3007 | 0.00322216 |
| 80 | 16.57s | 2.69051 | 1.29397 | 0.00306991 |
| 85 | 17.50s | 2.67032 | 1.28778 | 0.00293345 |
| 90 | 18.49s | 2.65301 | 1.28264 | 0.00281035 |
| 95 | 19.42s | 2.63589 | 1.2775 | 0.00269867 |
| 100 | 20.35s | 2.61984 | 1.27261 | 0.00259682 |
| 101 | 20.57s | 2.61689 | 1.2717 | 0.00257752 |
| 105 | 21.36s | 2.60532 | 1.26826 | 0.00250352 |
| 110 | 22.34s | 2.59196 | 1.26428 | 0.00241768 |
| 115 | 23.27s | 2.58595 | 1.26311 | 0.0023384 |
| 120 | 24.20s | 2.52658 | 1.24017 | 0.00134674 |
| 125 | 25.12s | 2.49454 | 1.22769 | 0.000776632 |
| 130 | 26.07s | 2.47645 | 1.2206 | 0.000448402 |
| 135 | 27.04s | 2.47313 | 1.21948 | 0.000435888 |
| 140 | 28.00s | 2.46361 | 1.21575 | 0.000252207 |
| 145 | 29.12s | 2.4569 | 1.2131 | 0.000146068 |
| 150 | 30.12s | 2.45369 | 1.21184 | |
| 155 | 31.19s | 2.45185 | 1.21112 | |
| 160 | 32.11s | 2.45042 | 1.21056 | 3.3918e-005 |
| 165 | 33.02s | 2.45 | 1.2104 | |
| 170 | 33.95s | 2.44983 | 1.21035 | |
| 175 | 34.94s | 2.44971 | 1.21031 | |
| 180 | 35.86s | 2.44954 | 1.21025 | |
| 185 | 36.86s | 2.44943 | 1.21022 | 2.5579e-005 |
| 190 | 37.82s | 2.44927 | 1.21017 | |
| 195 | 38.80s | 2.44914 | 1.21013 | |
| 200 | 39.72s | 2.44901 | 1.21009 | |
| 205 | 40.66s | 2.44889 | 1.21006 | |
| 210 | 41.55s | 2.44858 | 1.20994 | |
| 215 | 42.59s | 2.44818 | 1.20978 | |
| 220 | 43.51s | 2.44812 | 1.20977 | |
| 225 | 44.41s | 2.44804 | 1.20974 | |
| 230 | 45.42s | 2.44798 | 1.20972 | 1.2918e-005 |
| 235 | 46.36s | 2.44791 | 1.2097 | |
| 240 | 47.26s | 2.44785 | 1.20968 | |
| 245 | 48.33s | 2.44779 | 1.20966 | |
| 250 | 49.28s | 2.44773 | 1.20965 | |
| 255 | 50.25s | 2.44767 | 1.20963 | 1.1956e-005 |
| 260 | 51.20s | 2.44761 | 1.20961 | |
| 265 | 52.20s | 2.44755 | 1.20959 | 1.1616e-005 |
| 270 | 53.13s | 2.44749 | 1.20958 | |
| 275 | 54.13s | 2.44744 | 1.20956 | |
| 280 | 55.09s | 2.44738 | 1.20954 | |
| 285 | 56.05s | 2.44732 | 1.20953 | |
| 290 | 57.01s | 2.44727 | 1.20951 | |
| 295 | 57.94s | 2.44722 | 1.2095 | |
| 300 | 58.82s | 2.44716 | 1.20948 | 1.0584e-005 |
| 305 | 59.83s | 2.44711 | 1.20947 | |
| 310 | 1m 0s | 2.44706 | 1.20945 | |
| 315 | 1m 1s | 2.44701 | 1.20944 | |
| 320 | 1m 2s | 2.44697 | 1.20942 | |
+---------+--------------+-------------------+-----------------------+-------------+
Optimization Complete: Convergence on objective within bounds.
Computing final objective value and training RMSE.
Final objective value: 2.44638
Final training RMSE: 1.20918
#Make Recommendations:
matrix_factor_recomm = matrix_factor_model.recommend(users=[69188],k=5)
matrix_factor_recomm.print_rows(num_rows=5)
+---------+------------+---------------+------+ | User-ID | ISBN | score | rank | +---------+------------+---------------+------+ | 69188 | 0671734563 | 31.7740267971 | 1 | | 69188 | 0394800389 | 31.5581492641 | 2 | | 69188 | 0553057758 | 28.8189067104 | 3 | | 69188 | 0152047379 | 28.4868544796 | 4 | | 69188 | 0380799502 | 28.193433688 | 5 | +---------+------------+---------------+------+ [5 rows x 4 columns]
matrix_factor_model.evaluate_rmse(sfTest, target='Book-Rating')
{'rmse_by_item': Columns: ISBN str count int rmse float Rows: 40456 Data: +------------+-------+----------------+ | ISBN | count | rmse | +------------+-------+----------------+ | 0263737330 | 1 | 0.133969233303 | | 037312189X | 1 | 0.256416325525 | | 0345274563 | 1 | 4.99950559524 | | 0333387287 | 1 | 0.834043906002 | | 050552239X | 1 | 0.700068726272 | | 0743410602 | 1 | 0.906036569805 | | 0590212877 | 1 | 1.04643426922 | | 043957742X | 1 | 0.384221271544 | | 0743417844 | 1 | 4.03500670853 | | 0312929994 | 1 | 7.76407335785 | +------------+-------+----------------+ [40456 rows x 3 columns] Note: Only the head of the SFrame is printed. You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns., 'rmse_by_user': Columns: User-ID int count int rmse float Rows: 17780 Data: +---------+-------+----------------+ | User-ID | count | rmse | +---------+-------+----------------+ | 237313 | 1 | 2.23872773436 | | 43116 | 1 | 0.585648565621 | | 115341 | 1 | 6.0998668513 | | 69188 | 1 | 1.13800565985 | | 118012 | 1 | 3.63738138887 | | 66750 | 1 | 2.53537179616 | | 97741 | 1 | 2.83883710119 | | 230692 | 1 | 1.108466939 | | 135535 | 1 | 9.15111968753 | | 36992 | 2 | 12.6012445133 | +---------+-------+----------------+ [17780 rows x 3 columns] Note: Only the head of the SFrame is printed. You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns., 'rmse_overall': 4.254394701094985}
rank_factor_model = graphlab.ranking_factorization_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating',
max_iterations=50)
Recsys training: model = ranking_factorization_recommender
Preparing data set.
Data has 1092291 observations with 102300 users and 330476 items.
Data prepared in: 1.45786s
Training ranking_factorization_recommender for recommendations.
+--------------------------------+--------------------------------------------------+----------+
| Parameter | Description | Value |
+--------------------------------+--------------------------------------------------+----------+
| num_factors | Factor Dimension | 32 |
| regularization | L2 Regularization on Factors | 1e-009 |
| solver | Solver used for training | sgd |
| linear_regularization | L2 Regularization on Linear Coefficients | 1e-009 |
| ranking_regularization | Rank-based Regularization Weight | 0.25 |
| max_iterations | Maximum Number of Iterations | 50 |
+--------------------------------+--------------------------------------------------+----------+
Optimizing model using SGD; tuning step size.
Using 136536 / 1092291 points for tuning the step size.
+---------+-------------------+------------------------------------------+
| Attempt | Initial Step Size | Estimated Objective Value |
+---------+-------------------+------------------------------------------+
| 0 | 25 | Not Viable |
| 1 | 6.25 | Not Viable |
| 2 | 1.5625 | Not Viable |
| 3 | 0.390625 | Not Viable |
| 4 | 0.0976562 | No Decrease (118.188 >= 29.0736) |
| 5 | 0.0244141 | 20.2593 |
| 6 | 0.012207 | 21.8816 |
| 7 | 0.00610352 | 23.0865 |
| 8 | 0.00305176 | 24.1301 |
+---------+-------------------+------------------------------------------+
| Final | 0.0244141 | 20.2593 |
+---------+-------------------+------------------------------------------+
Starting Optimization.
+---------+--------------+-------------------+-----------------------+-------------+
| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |
+---------+--------------+-------------------+-----------------------+-------------+
| Initial | 0us | 29.1255 | 3.85439 | |
+---------+--------------+-------------------+-----------------------+-------------+
| 1 | 1.22s | 23.8308 | 3.67647 | 0.0244141 |
| 2 | 2.27s | 22.5723 | 3.58656 | 0.0145167 |
| 3 | 3.25s | 21.6437 | 3.49804 | 0.0107102 |
| 4 | 4.22s | 20.4307 | 3.36114 | 0.00863167 |
| 5 | 5.20s | 18.9328 | 3.17794 | 0.00730151 |
| 6 | 6.17s | 17.3876 | 2.97577 | 0.00636835 |
| 10 | 10.04s | 12.8017 | 2.32913 | 0.0043415 |
| 11 | 11.01s | 12.0613 | 2.2169 | 0.00404199 |
| 15 | 14.78s | 10.0048 | 1.88625 | 0.00320311 |
| 20 | 19.36s | 8.54348 | 1.6416 | 0.00258147 |
| 25 | 23.88s | 7.64791 | 1.48445 | 0.00218366 |
| 30 | 28.38s | 7.01902 | 1.37335 | 0.00190458 |
| 35 | 32.82s | 6.5621 | 1.28996 | 0.00169664 |
| 40 | 37.31s | 6.21423 | 1.22521 | 0.00153495 |
| 45 | 41.82s | 5.92714 | 1.17354 | 0.00140518 |
| 50 | 46.36s | 5.68119 | 1.12973 | 0.00129841 |
+---------+--------------+-------------------+-----------------------+-------------+
Optimization Complete: Maximum number of passes through the data reached.
Computing final objective value and training RMSE.
Final objective value: 5.57345
Final training RMSE: 1.08339
#Make Recommendations:
rank_factor_recomm = rank_factor_model.recommend(users=[69188],k=5)
rank_factor_recomm.print_rows(num_rows=5)
+---------+------------+---------------+------+ | User-ID | ISBN | score | rank | +---------+------------+---------------+------+ | 69188 | 0811825558 | 14.8589946487 | 1 | | 69188 | 0345315715 | 14.6149875381 | 2 | | 69188 | 0553380958 | 14.2803146103 | 3 | | 69188 | 0060976241 | 13.6427222946 | 4 | | 69188 | 0812550706 | 13.5892755249 | 5 | +---------+------------+---------------+------+ [5 rows x 4 columns]
rank_factor_model.evaluate_rmse(sfTest, target='Book-Rating')
{'rmse_by_item': Columns: ISBN str count int rmse float Rows: 40456 Data: +------------+-------+---------------+ | ISBN | count | rmse | +------------+-------+---------------+ | 0263737330 | 1 | 5.34130151484 | | 037312189X | 1 | 4.46604509565 | | 0345274563 | 1 | 1.4633745617 | | 0333387287 | 1 | 5.31082303736 | | 050552239X | 1 | 4.30599315378 | | 0743410602 | 1 | 1.65934709001 | | 0590212877 | 1 | 3.46737320635 | | 043957742X | 1 | 3.38589246485 | | 0743417844 | 1 | 8.80384717795 | | 0312929994 | 1 | 2.51979693857 | +------------+-------+---------------+ [40456 rows x 3 columns] Note: Only the head of the SFrame is printed. You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns., 'rmse_by_user': Columns: User-ID int count int rmse float Rows: 17780 Data: +---------+-------+----------------+ | User-ID | count | rmse | +---------+-------+----------------+ | 237313 | 1 | 1.49798698637 | | 43116 | 1 | 2.84383349207 | | 115341 | 1 | 5.26586948249 | | 69188 | 1 | 3.36552937242 | | 118012 | 1 | 5.01669451508 | | 66750 | 1 | 3.62250982311 | | 97741 | 1 | 2.89033086512 | | 230692 | 1 | 0.797411755591 | | 135535 | 1 | 2.65705485728 | | 36992 | 2 | 3.30571380721 | +---------+-------+----------------+ [17780 rows x 3 columns] Note: Only the head of the SFrame is printed. You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns., 'rmse_overall': 7.080758290955872}
books[['ISBN','Book-Title','Book-Author','Year-Of-Publication','Publisher']].head()
ISBN | Book-Title | Book-Author | Year-Of-Publication | Publisher | |
---|---|---|---|---|---|
0 | 0195153448 | Classical Mythology | Mark P. O. Morford | 2002 | Oxford University Press |
1 | 0002005018 | Clara Callan | Richard Bruce Wright | 2001 | HarperFlamingo Canada |
2 | 0060973129 | Decision in Normandy | Carlo D'Este | 1991 | HarperPerennial |
3 | 0374157065 | Flu: The Story of the Great Influenza Pandemic... | Gina Bari Kolata | 1999 | Farrar Straus Giroux |
4 | 0393045218 | The Mummies of Urumchi | E. J. W. Barber | 1999 | W. W. Norton & Company |
items_data = books[['ISBN','Book-Title','Book-Author','Year-Of-Publication','Publisher']]
items_data.to_csv('items_data.csv', sep="|",index=False)
items_sf = graphlab.SFrame.read_csv('items_data.csv', sep="|")
Unable to parse line "078946697X|"DK Readers: Creating the X-Men, How It All Began (Level 4: Proficient Readers)\"""|Michael Teitelbaum|2000|DK Publishing Inc"
Unable to parse line "2070426769|"Peuple du ciel, suivi de 'Les Bergers\"""|Jean-Marie Gustave Le Cl�©zio|2003|Gallimard"
Unable to parse line "0789466953|"DK Readers: Creating the X-Men, How Comic Books Come to Life (Level 4: Proficient Readers)\"""|James Buckley|2000|DK Publishing Inc"
3 lines failed to parse correctly
Finished parsing file C:\Users\Sahil Gupta\Google Drive\Winter Project\items_data.csv
Parsing completed. Parsed 100 lines in 0.746783 secs.
------------------------------------------------------ Inferred types from first 100 line(s) of file as column_type_hints=[str,str,str,long,str] If parsing fails due to incorrect types, you can correct the inferred type list above and pass it to read_csv in the column_type_hints argument ------------------------------------------------------
Unable to parse line "078946697X|"DK Readers: Creating the X-Men, How It All Began (Level 4: Proficient Readers)\"""|Michael Teitelbaum|2000|DK Publishing Inc"
Unable to parse line "2070426769|"Peuple du ciel, suivi de 'Les Bergers\"""|Jean-Marie Gustave Le Cl�©zio|2003|Gallimard"
Unable to parse line "0789466953|"DK Readers: Creating the X-Men, How Comic Books Come to Life (Level 4: Proficient Readers)\"""|James Buckley|2000|DK Publishing Inc"
3 lines failed to parse correctly
Finished parsing file C:\Users\Sahil Gupta\Google Drive\Winter Project\items_data.csv
Parsing completed. Parsed 271376 lines in 0.615655 secs.
items_sf.head(2)
ISBN | Book-Title | Book-Author | Year-Of-Publication | Publisher |
---|---|---|---|---|
0195153448 | Classical Mythology | Mark P. O. Morford | 2002 | Oxford University Press |
0002005018 | Clara Callan | Richard Bruce Wright | 2001 | HarperFlamingo Canada |
content_based_model = graphlab.item_content_recommender.create(items_sf, observation_data=graphlab.SFrame(ratings),
user_id='User-ID', item_id='ISBN', target='Book-Rating')
WARNING: The ItemContentRecommender model is still in beta. WARNING: This feature transformer is still in beta, and some interpretation rules may change in the future. ('Applying transform:\n', Class : AutoVectorizer Model Fields ------------ Features : ['Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher'] Excluded Features : ['ISBN'] Column Type Interpretation Transforms Output Type ------------------- ---- -------------- --------------------------------- ----------- Book-Title str short_text 3-Character NGram Counts -> TFIDF dict Book-Author str categorical None str Year-Of-Publication int numerical None int Publisher str categorical None str )
Recsys training: model = item_content_recommender
Defaulting to brute force instead of ball tree because there are multiple distance components.
Starting brute force nearest neighbors model training.
Starting pairwise querying.
+--------------+---------+-------------+--------------+
| Query points | # Pairs | % Complete. | Elapsed Time |
+--------------+---------+-------------+--------------+
| 0 | 67844 | | 198.834ms |
| 17 | 4681236 | 0.00635649 | 1.24s |
| 30 | 8344812 | 0.0113311 | 2.20s |
| 46 | 1e+007 | 0.0170428 | 3.22s |
| 61 | 2e+007 | 0.0227544 | 4.20s |
| 78 | 2e+007 | 0.0287424 | 5.17s |
| 95 | 3e+007 | 0.0352831 | 6.18s |
| 112 | 3e+007 | 0.0414554 | 7.15s |
| 130 | 4e+007 | 0.0479961 | 8.15s |
| 149 | 4e+007 | 0.0549054 | 9.14s |
| 167 | 5e+007 | 0.0616304 | 10.13s |
| 186 | 5e+007 | 0.0686317 | 11.13s |
| 204 | 6e+007 | 0.0753567 | 12.12s |
| 223 | 6e+007 | 0.0824502 | 13.13s |
| 243 | 7e+007 | 0.0895437 | 14.12s |
| 262 | 7e+007 | 0.0966371 | 15.13s |
| 280 | 8e+007 | 0.103454 | 16.12s |
| 299 | 8e+007 | 0.110179 | 17.10s |
| 315 | 9e+007 | 0.116351 | 18.11s |
| 334 | 9e+007 | 0.123353 | 19.09s |
| 353 | 1e+008 | 0.13017 | 20.08s |
| 372 | 1e+008 | 0.137079 | 21.08s |
| 390 | 1e+008 | 0.143712 | 22.08s |
| 409 | 1e+008 | 0.150713 | 23.06s |
| 427 | 1e+008 | 0.157438 | 24.06s |
| 444 | 1e+008 | 0.163887 | 25.05s |
| 463 | 1e+008 | 0.170796 | 26.04s |
| 479 | 1e+008 | 0.176784 | 27.04s |
| 498 | 1e+008 | 0.183693 | 28.04s |
| 517 | 1e+008 | 0.190695 | 29.03s |
| 537 | 1e+008 | 0.19788 | 30.02s |
| 557 | 2e+008 | 0.205342 | 31.01s |
| 577 | 2e+008 | 0.212712 | 32.00s |
| 590 | 2e+008 | 0.217503 | 33.02s |
| 605 | 2e+008 | 0.222938 | 34.01s |
| 621 | 2e+008 | 0.229018 | 34.99s |
| 638 | 2e+008 | 0.235375 | 35.99s |
| 654 | 2e+008 | 0.241086 | 36.97s |
| 671 | 2e+008 | 0.247535 | 37.97s |
| 688 | 2e+008 | 0.253799 | 38.97s |
| 707 | 2e+008 | 0.260524 | 39.97s |
| 723 | 2e+008 | 0.266696 | 40.95s |
| 740 | 2e+008 | 0.272684 | 41.95s |
| 758 | 2e+008 | 0.279502 | 42.95s |
| 775 | 2e+008 | 0.285858 | 43.93s |
| 792 | 2e+008 | 0.291938 | 44.95s |
| 807 | 2e+008 | 0.297558 | 45.94s |
| 827 | 2e+008 | 0.304835 | 46.94s |
| 845 | 2e+008 | 0.311652 | 47.91s |
| 865 | 2e+008 | 0.318746 | 48.91s |
| 882 | 2e+008 | 0.325195 | 49.91s |
| 901 | 2e+008 | 0.332288 | 50.92s |
| 918 | 2e+008 | 0.338368 | 51.89s |
| 936 | 3e+008 | 0.345093 | 52.89s |
| 954 | 3e+008 | 0.351818 | 53.89s |
| 973 | 3e+008 | 0.358819 | 54.88s |
| 991 | 3e+008 | 0.36536 | 55.87s |
| 1011 | 3e+008 | 0.372822 | 56.88s |
| 1030 | 3e+008 | 0.379547 | 57.86s |
| 1048 | 3e+008 | 0.386272 | 58.88s |
| 1066 | 3e+008 | 0.393089 | 59.84s |
| 1085 | 3e+008 | 0.399999 | 1m 0s |
| 1104 | 3e+008 | 0.407 | 1m 1s |
| 1123 | 3e+008 | 0.414001 | 1m 2s |
| 1141 | 3e+008 | 0.420542 | 1m 3s |
| 1161 | 3e+008 | 0.42782 | 1m 4s |
| 1177 | 3e+008 | 0.433992 | 1m 5s |
| 1195 | 3e+008 | 0.440533 | 1m 6s |
| 1214 | 3e+008 | 0.44735 | 1m 7s |
| 1227 | 3e+008 | 0.452232 | 1m 8s |
| 1246 | 3e+008 | 0.459326 | 1m 9s |
| 1264 | 3e+008 | 0.466051 | 1m 10s |
| 1283 | 3e+008 | 0.47296 | 1m 11s |
| 1301 | 4e+008 | 0.479593 | 1m 12s |
| 1320 | 4e+008 | 0.48641 | 1m 13s |
| 1339 | 4e+008 | 0.493596 | 1m 14s |
| 1354 | 4e+008 | 0.499215 | 1m 15s |
| 1373 | 4e+008 | 0.50594 | 1m 16s |
| 1391 | 4e+008 | 0.512665 | 1m 17s |
| 1409 | 4e+008 | 0.51939 | 1m 18s |
| 1425 | 4e+008 | 0.525194 | 1m 19s |
| 1444 | 4e+008 | 0.532195 | 1m 20s |
| 1463 | 4e+008 | 0.539197 | 1m 21s |
| 1482 | 4e+008 | 0.546382 | 1m 22s |
| 1502 | 4e+008 | 0.553752 | 1m 23s |
| 1519 | 4e+008 | 0.559924 | 1m 24s |
| 1536 | 4e+008 | 0.566189 | 1m 25s |
| 1554 | 4e+008 | 0.572821 | 1m 26s |
| 1574 | 4e+008 | 0.580283 | 1m 27s |
| 1593 | 4e+008 | 0.587193 | 1m 28s |
| 1612 | 4e+008 | 0.59401 | 1m 29s |
| 1631 | 4e+008 | 0.601011 | 1m 30s |
| 1647 | 4e+008 | 0.607183 | 1m 31s |
| 1665 | 5e+008 | 0.613816 | 1m 32s |
| 1683 | 5e+008 | 0.620357 | 1m 33s |
| 1702 | 5e+008 | 0.627266 | 1m 34s |
| 1722 | 5e+008 | 0.634636 | 1m 35s |
| 1741 | 5e+008 | 0.641637 | 1m 36s |
| 1759 | 5e+008 | 0.648362 | 1m 37s |
| 1779 | 5e+008 | 0.65564 | 1m 38s |
| 1797 | 5e+008 | 0.662457 | 1m 39s |
| 1816 | 5e+008 | 0.669274 | 1m 40s |
| 1835 | 5e+008 | 0.676368 | 1m 41s |
| 1851 | 5e+008 | 0.682356 | 1m 42s |
| 1871 | 5e+008 | 0.689634 | 1m 43s |
| 1889 | 5e+008 | 0.696359 | 1m 44s |
| 1906 | 5e+008 | 0.702347 | 1m 45s |
| 1925 | 5e+008 | 0.709348 | 1m 46s |
| 1945 | 5e+008 | 0.716718 | 1m 47s |
| 1962 | 5e+008 | 0.723074 | 1m 48s |
| 1981 | 5e+008 | 0.730076 | 1m 49s |
| 2000 | 5e+008 | 0.737261 | 1m 50s |
| 2017 | 5e+008 | 0.743249 | 1m 51s |
| 2034 | 6e+008 | 0.749698 | 1m 52s |