In [1]:
import numpy as np
import pandas as pd
from IPython.display import display, HTML
In [2]:
ratings = pd.read_csv('BX-Book-Ratings.csv',sep=';')
ratings.head()
Out[2]:
User-ID ISBN Book-Rating
0 276725 034545104X 0
1 276726 0155061224 5
2 276727 0446520802 0
3 276729 052165615X 3
4 276729 0521795028 6
In [3]:
#No negative ratings
ratings[ratings['Book-Rating']<0].head()
Out[3]:
User-ID ISBN Book-Rating
In [4]:
ratings[ratings['User-ID']==69188].head()  
Out[4]:
User-ID ISBN Book-Rating
289554 69188 0060911239 0
289555 69188 0060929499 8
289556 69188 0060969989 8
289557 69188 014027684X 9
289558 69188 014029628X 9
In [5]:
users = pd.read_csv('BX-Users.csv',sep=';')
users.head()
Out[5]:
User-ID Location Age
0 1 nyc, new york, usa NaN
1 2 stockton, california, usa 18.0
2 3 moscow, yukon territory, russia NaN
3 4 porto, v.n.gaia, portugal 17.0
4 5 farnborough, hants, united kingdom NaN
In [30]:
books = pd.read_csv('BX-Books.csv',sep='";"')
books.head()
C:\Users\Sahil Gupta\Anaconda2\envs\gl-env\lib\site-packages\ipykernel\__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'.
  if __name__ == '__main__':
Out[30]:
"ISBN Book-Title Book-Author Year-Of-Publication Publisher Image-URL-S Image-URL-M Image-URL-L"
0 "0195153448 Classical Mythology Mark P. O. Morford 2002 Oxford University Press http://images.amazon.com/images/P/0195153448.0... http://images.amazon.com/images/P/0195153448.0... http://images.amazon.com/images/P/0195153448.0...
1 "0002005018 Clara Callan Richard Bruce Wright 2001 HarperFlamingo Canada http://images.amazon.com/images/P/0002005018.0... http://images.amazon.com/images/P/0002005018.0... http://images.amazon.com/images/P/0002005018.0...
2 "0060973129 Decision in Normandy Carlo D'Este 1991 HarperPerennial http://images.amazon.com/images/P/0060973129.0... http://images.amazon.com/images/P/0060973129.0... http://images.amazon.com/images/P/0060973129.0...
3 "0374157065 Flu: The Story of the Great Influenza Pandemic... Gina Bari Kolata 1999 Farrar Straus Giroux http://images.amazon.com/images/P/0374157065.0... http://images.amazon.com/images/P/0374157065.0... http://images.amazon.com/images/P/0374157065.0...
4 "0393045218 The Mummies of Urumchi E. J. W. Barber 1999 W. W. Norton &amp; Company http://images.amazon.com/images/P/0393045218.0... http://images.amazon.com/images/P/0393045218.0... http://images.amazon.com/images/P/0393045218.0...
In [33]:
books.columns = [col.replace("\"","") for col in books.columns]
In [34]:
books['ISBN'] = [val.replace("\"","") for val in books['ISBN']]
In [35]:
books['ISBN'].head()
Out[35]:
0    0195153448
1    0002005018
2    0060973129
3    0374157065
4    0393045218
Name: ISBN, dtype: object
In [36]:
books['Image-URL-L'] = [val.replace("\"","") for val in books['Image-URL-L']]
In [37]:
books['Image-URL-L'].head()
Out[37]:
0    http://images.amazon.com/images/P/0195153448.0...
1    http://images.amazon.com/images/P/0002005018.0...
2    http://images.amazon.com/images/P/0060973129.0...
3    http://images.amazon.com/images/P/0374157065.0...
4    http://images.amazon.com/images/P/0393045218.0...
Name: Image-URL-L, dtype: object
In [38]:
# Create a new conda environment with Python 2.7.x
#!conda create -n gl-env python=2.7 anaconda=4.0.0

# Activate the conda environment
#!activate gl-env
In [14]:
#!python -m pip install --upgrade pip
In [15]:
#!pip install --upgrade --no-cache-dir https://get.graphlab.com/GraphLab-Create/2.1/gupta376@umn.edu/4715-4040-4813-090E-5214-4B52-7359-CB73/GraphLab-Create-License.tar.gz
In [16]:
import graphlab
In [17]:
#graphlab.get_dependencies()
In [18]:
from sklearn.cross_validation import cross_val_score, train_test_split
In [19]:
## Train the model (fit the data)
train_data, test_data = train_test_split(ratings, train_size=0.95, random_state=100)
In [20]:
train_data.head()
Out[20]:
User-ID ISBN Book-Rating
238955 55492 0373484143 0
368161 88499 076790592X 9
124453 28591 0451411307 0
1006318 241666 0060198125 0
687490 167471 1861263317 0
In [21]:
sf = graphlab.SFrame(train_data)
sfTest = graphlab.SFrame(test_data)
This non-commercial license of GraphLab Create for academic use is assigned to gupta376@umn.edu and will expire on December 12, 2017.
[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\SAHILG~1\AppData\Local\Temp\graphlab_server_1483465715.log.0
In [31]:
popularity_model = graphlab.popularity_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating')
Recsys training: model = popularity
Preparing data set.
    Data has 1092291 observations with 102300 users and 330476 items.
    Data prepared in: 1.44484s
1092291 observations to process; with 330476 unique items.
In [61]:
popularity_recomm = popularity_model.recommend(users=range(1,10),k=5)
popularity_recomm.print_rows(num_rows=10)
+---------+------------+-------+------+
| User-ID |    ISBN    | score | rank |
+---------+------------+-------+------+
|    1    | 0679415327 |  10.0 |  1   |
|    1    | 0971938830 |  10.0 |  2   |
|    1    | 0486424650 |  10.0 |  3   |
|    1    | 0785270639 |  10.0 |  4   |
|    1    | 3492223303 |  10.0 |  5   |
|    2    | 0679415327 |  10.0 |  1   |
|    2    | 0971938830 |  10.0 |  2   |
|    2    | 0486424650 |  10.0 |  3   |
|    2    | 0785270639 |  10.0 |  4   |
|    2    | 3492223303 |  10.0 |  5   |
+---------+------------+-------+------+
[45 rows x 4 columns]

In [62]:
aggr = pd.merge(graphlab.SFrame.to_dataframe(popularity_recomm), books[['ISBN','Book-Title']], on = ['ISBN'], how='inner')
In [63]:
display(aggr)
User-ID ISBN score rank Book-Title
0 1 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
1 2 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
2 3 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
3 4 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
4 5 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
5 6 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
6 7 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
7 8 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
8 9 0679415327 10.0 1 A Place of My Own: The Education of an Amateur...
9 1 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
10 2 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
11 3 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
12 4 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
13 5 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
14 6 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
15 7 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
16 8 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
17 9 0971938830 10.0 2 The Authentic Annals of the Early Hebrews
18 1 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
19 2 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
20 3 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
21 4 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
22 5 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
23 6 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
24 7 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
25 8 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
26 9 0486424650 10.0 3 The Communist Manifesto and Other Revolutionar...
27 1 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
28 2 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
29 3 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
30 4 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
31 5 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
32 6 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
33 7 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
34 8 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
35 9 0785270639 10.0 4 Living Simply In God's Abundance Strength And ...
In [32]:
popularity_model.evaluate_rmse(sfTest, target='Book-Rating')
Out[32]:
{'rmse_by_item': Columns:
 	ISBN	str
 	count	int
 	rmse	float
 
 Rows: 40456
 
 Data:
 +------------+-------+---------------+
 |    ISBN    | count |      rmse     |
 +------------+-------+---------------+
 | 0263737330 |   1   | 2.86717550543 |
 | 037312189X |   1   |      0.0      |
 | 0345274563 |   1   | 2.92307692308 |
 | 0333387287 |   1   | 2.86717550543 |
 | 050552239X |   1   |      0.0      |
 | 0743410602 |   1   |  2.5652173913 |
 | 0590212877 |   1   | 2.33333333333 |
 | 043957742X |   1   |      0.0      |
 | 0743417844 |   1   |      0.0      |
 | 0312929994 |   1   |     2.625     |
 +------------+-------+---------------+
 [40456 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_by_user': Columns:
 	User-ID	int
 	count	int
 	rmse	float
 
 Rows: 17780
 
 Data:
 +---------+-------+---------------+
 | User-ID | count |      rmse     |
 +---------+-------+---------------+
 |  237313 |   1   |      2.4      |
 |  43116  |   1   | 3.43323863636 |
 |  115341 |   1   | 5.36170212766 |
 |  69188  |   1   |      5.0      |
 |  118012 |   1   | 3.66666666667 |
 |  66750  |   1   | 1.11428571429 |
 |  97741  |   1   |      1.0      |
 |  230692 |   1   |      1.5      |
 |  135535 |   1   |     2.875     |
 |  36992  |   2   | 2.63200857442 |
 +---------+-------+---------------+
 [17780 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_overall': 4.073509434250113}
In [22]:
item_sim_model = graphlab.item_similarity_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating')
Recsys training: model = item_similarity
Preparing data set.
    Data has 862335 observations with 89722 users and 286804 items.
    Data prepared in: 1.12007s
Training model from provided data.
Gathering per-item and per-user statistics.
+--------------------------------+------------+
| Elapsed Time (Item Statistics) | % Complete |
+--------------------------------+------------+
| 15.546ms                       | 1          |
| 58.887ms                       | 100        |
+--------------------------------+------------+
Setting up lookup tables.
Processing data in one pass using sparse lookup tables.
+-------------------------------------+------------------+-----------------+
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
+-------------------------------------+------------------+-----------------+
| 2.55s                               | 0                | 20              |
| 3.57s                               | 4.5              | 12960           |
| 4.57s                               | 8.5              | 25031           |
| 5.54s                               | 12.5             | 36496           |
| 6.56s                               | 16.5             | 47869           |
| 7.71s                               | 18.25            | 53054           |
| 8.58s                               | 19.75            | 57117           |
| 9.54s                               | 32.25            | 93053           |
| 10.52s                              | 35.25            | 101152          |
| 11.67s                              | 41               | 118188          |
| 12.52s                              | 42               | 121162          |
| 13.52s                              | 42.75            | 123145          |
| 14.74s                              | 43.5             | 124866          |
| 15.53s                              | 43.75            | 125713          |
| 16.51s                              | 44.25            | 127012          |
| 17.54s                              | 44.25            | 127161          |
| 18.50s                              | 45               | 129751          |
| 19.49s                              | 47               | 135154          |
| 20.49s                              | 50.5             | 145329          |
| 23.18s                              | 55.25            | 158973          |
| 24.33s                              | 57.5             | 165171          |
| 26.36s                              | 63.5             | 182784          |
| 28.15s                              | 67.25            | 193051          |
| 28.48s                              | 67.75            | 194483          |
| 31.23s                              | 73.5             | 210957          |
| 35.84s                              | 78               | 223901          |
| 37.99s                              | 78.75            | 226323          |
| 41.20s                              | 79.75            | 228962          |
| 42.26s                              | 79.75            | 229279          |
| 52.56s                              | 81.25            | 233157          |
| 53.79s                              | 81.5             | 233851          |
| 57.07s                              | 81.5             | 233862          |
| 58.48s                              | 81.5             | 234145          |
| 1m 0s                               | 81.75            | 234545          |
| 1m 1s                               | 81.75            | 234651          |
| 1m 5s                               | 82.25            | 235992          |
| 1m 13s                              | 82.25            | 235992          |
| 1m 19s                              | 82.25            | 236493          |
| 1m 22s                              | 82.5             | 236626          |
| 1m 37s                              | 82.75            | 237526          |
| 1m 54s                              | 83.5             | 239676          |
| 1m 59s                              | 83.75            | 240229          |
| 2m 1s                               | 83.75            | 240319          |
| 2m 4s                               | 83.75            | 240441          |
| 3m 32s                              | 84.5             | 242661          |
| 3m 33s                              | 84.5             | 242873          |
| 3m 46s                              | 84.5             | 242873          |
| 3m 49s                              | 84.5             | 242919          |
| 4m 1s                               | 84.75            | 243107          |
| 5m 43s                              | 85.25            | 244512          |
| 5m 50s                              | 85.25            | 244538          |
| 5m 52s                              | 85.25            | 244538          |
| 5m 54s                              | 85.25            | 244567          |
| 5m 56s                              | 85.25            | 244567          |
| 6m 3s                               | 85.25            | 244624          |
| 6m 14s                              | 85.25            | 244700          |
| 6m 45s                              | 85.5             | 245269          |
| 6m 46s                              | 85.5             | 245285          |
| 6m 48s                              | 85.5             | 245422          |
| 6m 50s                              | 85.5             | 245609          |
| 6m 53s                              | 85.5             | 245862          |
| 6m 57s                              | 85.75            | 246259          |
| 7m 17s                              | 86.25            | 247876          |
| 7m 19s                              | 86.25            | 248021          |
| 7m 19s                              | 86.5             | 248144          |
| 7m 21s                              | 86.5             | 248150          |
| 7m 22s                              | 86.5             | 248209          |
| 7m 24s                              | 86.5             | 248264          |
| 7m 38s                              | 86.5             | 248760          |
| 7m 41s                              | 86.75            | 248843          |
| 7m 44s                              | 86.75            | 248937          |
| 7m 45s                              | 86.75            | 249007          |
| 7m 48s                              | 87               | 249705          |
| 8m 21s                              | 87               | 249705          |
| 8m 23s                              | 87               | 249765          |
| 8m 24s                              | 87               | 249832          |
| 8m 27s                              | 87               | 249835          |
| 8m 29s                              | 87.5             | 251033          |
| 9m 52s                              | 87.5             | 251033          |
| 9m 53s                              | 87.5             | 251068          |
| 9m 54s                              | 87.5             | 251296          |
| 10m 9s                              | 87.75            | 252312          |
| 11m 29s                             | 89.25            | 256049          |
| 12m 45s                             | 89.25            | 256051          |
| 12m 46s                             | 89.25            | 256196          |
| 12m 50s                             | 89.25            | 256214          |
| 12m 52s                             | 89.25            | 256242          |
| 13m 5s                              | 89.25            | 256555          |
| 13m 8s                              | 89.25            | 256613          |
| 13m 10s                             | 89.25            | 256634          |
| 13m 11s                             | 89.25            | 256660          |
| 13m 11s                             | 89.5             | 256881          |
| 13m 22s                             | 89.5             | 256890          |
| 13m 23s                             | 89.5             | 256923          |
| 13m 25s                             | 89.5             | 256924          |
| 13m 35s                             | 89.5             | 257070          |
| 15m 50s                             | 90.25            | 258949          |
| 15m 54s                             | 90.25            | 259018          |
| 15m 56s                             | 91               | 261332          |
| 17m 7s                              | 91               | 261333          |
| 17m 9s                              | 91               | 261520          |
| 17m 34s                             | 91.5             | 262635          |
| 18m 9s                              | 91.75            | 263538          |
| 18m 13s                             | 91.75            | 263538          |
| 18m 16s                             | 91.75            | 263584          |
| 18m 19s                             | 91.75            | 263737          |
| 18m 24s                             | 91.75            | 263771          |
| 18m 26s                             | 91.75            | 263773          |
| 18m 26s                             | 91.75            | 263808          |
| 18m 28s                             | 91.75            | 263822          |
| 18m 29s                             | 91.75            | 263838          |
| 18m 33s                             | 92               | 264204          |
| 18m 52s                             | 92               | 264204          |
| 18m 57s                             | 92               | 264285          |
| 21m 3s                              | 92.5             | 265847          |
| 21m 17s                             | 92.5             | 265875          |
| 21m 19s                             | 92.5             | 265875          |
| 21m 22s                             | 92.5             | 265911          |
| 21m 24s                             | 92.5             | 266001          |
| 21m 35s                             | 92.5             | 266001          |
| 21m 52s                             | 92.75            | 266275          |
| 21m 57s                             | 92.75            | 266605          |
| 22m 0s                              | 93               | 267392          |
| 22m 13s                             | 93               | 267396          |
| 22m 23s                             | 93.25            | 267873          |
| 22m 24s                             | 93.25            | 267916          |
| 27m 5s                              | 94.75            | 272438          |
| 27m 6s                              | 95               | 272511          |
| 27m 7s                              | 95               | 272597          |
| 27m 8s                              | 95               | 272924          |
| 27m 11s                             | 95               | 272924          |
| 27m 14s                             | 95               | 273086          |
| 27m 15s                             | 95               | 273174          |
| 27m 19s                             | 95.25            | 273894          |
| 27m 27s                             | 95.5             | 273940          |
| 27m 29s                             | 95.5             | 273940          |
| 27m 32s                             | 95.5             | 274041          |
| 27m 34s                             | 95.5             | 274091          |
| 27m 35s                             | 95.5             | 274103          |
| 28m 27s                             | 95.75            | 275265          |
| 28m 35s                             | 95.75            | 275329          |
| 28m 40s                             | 96               | 275354          |
| 28m 42s                             | 96               | 275357          |
| 29m 22s                             | 96               | 275849          |
| 29m 23s                             | 96               | 275863          |
| 29m 33s                             | 96               | 275981          |
| 31m 45s                             | 96.75            | 278054          |
| 32m 45s                             | 96.75            | 278057          |
| 32m 46s                             | 96.75            | 278136          |
| 32m 56s                             | 96.75            | 278136          |
| 33m 10s                             | 97               | 278257          |
| 33m 11s                             | 97               | 278269          |
| 33m 12s                             | 97               | 278285          |
| 33m 13s                             | 97               | 278304          |
| 33m 14s                             | 97               | 278304          |
| 33m 15s                             | 97               | 278368          |
| 33m 20s                             | 97               | 278368          |
| 34m 7s                              | 97               | 278906          |
| 34m 8s                              | 97.25            | 278924          |
| 34m 52s                             | 98               | 281627          |
| 37m 11s                             | 98               | 281627          |
| 37m 12s                             | 98               | 281711          |
| 37m 23s                             | 98               | 281718          |
| 37m 24s                             | 98.25            | 281798          |
| 37m 42s                             | 98.5             | 283069          |
| 38m 5s                              | 98.75            | 283922          |
| 38m 9s                              | 99               | 284027          |
| 38m 12s                             | 99               | 284102          |
| 38m 16s                             | 99               | 284166          |
| 38m 49s                             | 99.25            | 284771          |
| 38m 52s                             | 99.25            | 284798          |
| 39m 41s                             | 99.5             | 285532          |
| 39m 55s                             | 99.5             | 285717          |
| 39m 58s                             | 99.75            | 286122          |
| 40m 31s                             | 99.75            | 286164          |
| 40m 34s                             | 99.75            | 286165          |
| 45m 11s                             | 100              | 286804          |
+-------------------------------------+------------------+-----------------+
Finalizing lookup tables.
Generating candidate set for working with new users.
Finished training in 2712.91s
In [64]:
#Make Recommendations:
item_sim_recomm = item_sim_model.recommend(users=range(1,10),k=5)
item_sim_recomm.print_rows(num_rows=10)
+---------+------------+------------------+------+
| User-ID |    ISBN    |      score       | rank |
+---------+------------+------------------+------+
|    1    | 044021145X | 0.0100413656235  |  1   |
|    1    | 0060928336 | 0.00940719604492 |  2   |
|    1    | 0440213525 | 0.00894405722618 |  3   |
|    1    | 0312195516 | 0.00889273405075 |  4   |
|    1    | 0440214041 | 0.00855695486069 |  5   |
|    2    | 044021145X | 0.0100413656235  |  1   |
|    2    | 0060928336 | 0.00940719604492 |  2   |
|    2    | 0440213525 | 0.00894405722618 |  3   |
|    2    | 0312195516 | 0.00889273405075 |  4   |
|    2    | 0440214041 | 0.00855695486069 |  5   |
+---------+------------+------------------+------+
[45 rows x 4 columns]

In [51]:
item_sim_recomm.show()
Canvas is updated and available in a tab in the default browser.
In [65]:
aggr = pd.merge(graphlab.SFrame.to_dataframe(item_sim_recomm), books[['ISBN','Book-Title']], on = ['ISBN'], how='inner')
In [66]:
display(aggr)
User-ID ISBN score rank Book-Title
0 1 044021145X 0.010041 1 The Firm
1 2 044021145X 0.010041 1 The Firm
2 3 044021145X 0.010041 1 The Firm
3 4 044021145X 0.010041 1 The Firm
4 5 044021145X 0.010041 1 The Firm
5 6 044021145X 0.010041 1 The Firm
6 7 044021145X 0.010041 1 The Firm
7 1 0060928336 0.009407 2 Divine Secrets of the Ya-Ya Sisterhood: A Novel
8 2 0060928336 0.009407 2 Divine Secrets of the Ya-Ya Sisterhood: A Novel
9 3 0060928336 0.009407 2 Divine Secrets of the Ya-Ya Sisterhood: A Novel
10 4 0060928336 0.009407 2 Divine Secrets of the Ya-Ya Sisterhood: A Novel
11 5 0060928336 0.009407 2 Divine Secrets of the Ya-Ya Sisterhood: A Novel
12 6 0060928336 0.009407 2 Divine Secrets of the Ya-Ya Sisterhood: A Novel
13 7 0060928336 0.009407 2 Divine Secrets of the Ya-Ya Sisterhood: A Novel
14 1 0440213525 0.008944 3 The Client
15 2 0440213525 0.008944 3 The Client
16 3 0440213525 0.008944 3 The Client
17 4 0440213525 0.008944 3 The Client
18 5 0440213525 0.008944 3 The Client
19 6 0440213525 0.008944 3 The Client
20 7 0440213525 0.008944 3 The Client
21 1 0312195516 0.008893 4 The Red Tent (Bestselling Backlist)
22 2 0312195516 0.008893 4 The Red Tent (Bestselling Backlist)
23 3 0312195516 0.008893 4 The Red Tent (Bestselling Backlist)
24 4 0312195516 0.008893 4 The Red Tent (Bestselling Backlist)
25 5 0312195516 0.008893 4 The Red Tent (Bestselling Backlist)
26 6 0312195516 0.008893 4 The Red Tent (Bestselling Backlist)
27 7 0312195516 0.008893 4 The Red Tent (Bestselling Backlist)
28 1 0440214041 0.008557 5 The Pelican Brief
29 2 0440214041 0.008557 5 The Pelican Brief
30 3 0440214041 0.008557 5 The Pelican Brief
31 4 0440214041 0.008557 5 The Pelican Brief
32 5 0440214041 0.008557 5 The Pelican Brief
33 6 0440214041 0.008557 5 The Pelican Brief
34 7 0440214041 0.008557 5 The Pelican Brief
35 8 1861470592 0.035714 1 The Small Garden Handbook
36 8 0671890778 0.035714 2 Hold It! You're Exercizing Wrong : Your Prescr...
37 8 0817012370 0.035714 3 How to Recover from Grief
38 8 0553252275 0.035714 4 Reflections on Life After Life
39 8 078686043X 0.035714 5 The Beardstown Ladies' Common-Sense Investment...
40 9 0385312202 0.017857 1 Leaving Cold Sassy: The Unfinished Sequel to C...
41 9 0553278355 0.014286 2 The Bell Jar
42 9 0316899984 0.013201 3 River, Cross My Heart
43 9 0060175982 0.012821 4 Mars and Venus Starting Over: A Practical Guid...
44 9 0312950853 0.012821 5 The Nanny Murder Trial (St. Martin's True Crim...
In [44]:
model_performance = graphlab.compare(sfTest, [popularity_model, item_sim_model])
graphlab.show_comparison(model_performance,[popularity_model, item_sim_model])
PROGRESS: Evaluate model M0
recommendations finished on 1000/47880 queries. users per second: 287.309
recommendations finished on 2000/47880 queries. users per second: 284.384
recommendations finished on 3000/47880 queries. users per second: 286.821
recommendations finished on 4000/47880 queries. users per second: 286.685
recommendations finished on 5000/47880 queries. users per second: 287.046
recommendations finished on 6000/47880 queries. users per second: 287.064
recommendations finished on 7000/47880 queries. users per second: 287.079
recommendations finished on 8000/47880 queries. users per second: 287.537
recommendations finished on 9000/47880 queries. users per second: 287.533
recommendations finished on 10000/47880 queries. users per second: 287.681
recommendations finished on 11000/47880 queries. users per second: 288.087
recommendations finished on 12000/47880 queries. users per second: 286.695
recommendations finished on 13000/47880 queries. users per second: 284.319
recommendations finished on 14000/47880 queries. users per second: 282.179
recommendations finished on 15000/47880 queries. users per second: 282.417
recommendations finished on 16000/47880 queries. users per second: 282.755
recommendations finished on 17000/47880 queries. users per second: 283.235
recommendations finished on 18000/47880 queries. users per second: 283.599
recommendations finished on 19000/47880 queries. users per second: 283.815
recommendations finished on 20000/47880 queries. users per second: 284.059
recommendations finished on 21000/47880 queries. users per second: 284.132
recommendations finished on 22000/47880 queries. users per second: 284.121
recommendations finished on 23000/47880 queries. users per second: 284.164
recommendations finished on 24000/47880 queries. users per second: 284.463
recommendations finished on 25000/47880 queries. users per second: 284.739
recommendations finished on 26000/47880 queries. users per second: 284.781
recommendations finished on 27000/47880 queries. users per second: 283.839
recommendations finished on 28000/47880 queries. users per second: 279.88
recommendations finished on 29000/47880 queries. users per second: 274.783
recommendations finished on 30000/47880 queries. users per second: 271.046
recommendations finished on 31000/47880 queries. users per second: 268.063
recommendations finished on 32000/47880 queries. users per second: 264.9
recommendations finished on 33000/47880 queries. users per second: 262.532
recommendations finished on 34000/47880 queries. users per second: 259.89
recommendations finished on 35000/47880 queries. users per second: 257.496
recommendations finished on 36000/47880 queries. users per second: 257.058
recommendations finished on 37000/47880 queries. users per second: 257.773
recommendations finished on 38000/47880 queries. users per second: 258.627
recommendations finished on 39000/47880 queries. users per second: 259.48
recommendations finished on 40000/47880 queries. users per second: 260.368
recommendations finished on 41000/47880 queries. users per second: 261.166
recommendations finished on 42000/47880 queries. users per second: 261.909
recommendations finished on 43000/47880 queries. users per second: 262.586
recommendations finished on 44000/47880 queries. users per second: 263.226
recommendations finished on 45000/47880 queries. users per second: 263.273
recommendations finished on 46000/47880 queries. users per second: 263.363
recommendations finished on 47000/47880 queries. users per second: 263.333
Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    |        0.0        |        0.0        |
|   3    | 6.96184906711e-06 | 1.04427736007e-05 |
|   4    | 1.04427736007e-05 | 2.08855472013e-05 |
|   5    | 8.35421888053e-06 | 2.08855472013e-05 |
|   6    | 6.96184906711e-06 | 2.08855472013e-05 |
|   7    | 5.96729920038e-06 | 2.08855472013e-05 |
|   8    |  7.8320802005e-06 | 2.09549344013e-05 |
|   9    | 6.96184906711e-06 | 2.09549344013e-05 |
|   10   |  6.2656641604e-06 | 2.09549344013e-05 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1
recommendations finished on 1000/47880 queries. users per second: 168.926
recommendations finished on 2000/47880 queries. users per second: 171.453
recommendations finished on 3000/47880 queries. users per second: 182.693
recommendations finished on 4000/47880 queries. users per second: 192.49
recommendations finished on 5000/47880 queries. users per second: 197.691
recommendations finished on 6000/47880 queries. users per second: 202.161
recommendations finished on 7000/47880 queries. users per second: 205.281
recommendations finished on 8000/47880 queries. users per second: 207.419
recommendations finished on 9000/47880 queries. users per second: 209.144
recommendations finished on 10000/47880 queries. users per second: 209.605
recommendations finished on 11000/47880 queries. users per second: 208.333
recommendations finished on 12000/47880 queries. users per second: 208.403
recommendations finished on 13000/47880 queries. users per second: 208.118
recommendations finished on 14000/47880 queries. users per second: 206.763
recommendations finished on 15000/47880 queries. users per second: 207.29
recommendations finished on 16000/47880 queries. users per second: 207.835
recommendations finished on 17000/47880 queries. users per second: 208.364
recommendations finished on 18000/47880 queries. users per second: 208.216
recommendations finished on 19000/47880 queries. users per second: 208.83
recommendations finished on 20000/47880 queries. users per second: 208.696
recommendations finished on 21000/47880 queries. users per second: 208.558
recommendations finished on 22000/47880 queries. users per second: 207.861
recommendations finished on 23000/47880 queries. users per second: 207.68
recommendations finished on 24000/47880 queries. users per second: 208.381
recommendations finished on 25000/47880 queries. users per second: 209.025
recommendations finished on 26000/47880 queries. users per second: 209.627
recommendations finished on 27000/47880 queries. users per second: 210.14
recommendations finished on 28000/47880 queries. users per second: 210.684
recommendations finished on 29000/47880 queries. users per second: 211.21
recommendations finished on 30000/47880 queries. users per second: 211.735
recommendations finished on 31000/47880 queries. users per second: 212.197
recommendations finished on 32000/47880 queries. users per second: 212.657
recommendations finished on 33000/47880 queries. users per second: 212.939
recommendations finished on 34000/47880 queries. users per second: 213.327
recommendations finished on 35000/47880 queries. users per second: 213.703
recommendations finished on 36000/47880 queries. users per second: 214.008
recommendations finished on 37000/47880 queries. users per second: 214.302
recommendations finished on 38000/47880 queries. users per second: 214.576
recommendations finished on 39000/47880 queries. users per second: 214.812
recommendations finished on 40000/47880 queries. users per second: 215.05
recommendations finished on 41000/47880 queries. users per second: 215.262
recommendations finished on 42000/47880 queries. users per second: 215.451
recommendations finished on 43000/47880 queries. users per second: 215.531
recommendations finished on 44000/47880 queries. users per second: 215.055
recommendations finished on 45000/47880 queries. users per second: 215.262
recommendations finished on 46000/47880 queries. users per second: 215.422
recommendations finished on 47000/47880 queries. users per second: 215.628
Precision and recall summary statistics by cutoff
+--------+------------------+------------------+
| cutoff |  mean_precision  |   mean_recall    |
+--------+------------------+------------------+
|   1    | 0.00921052631579 | 0.00271518546502 |
|   2    | 0.00749791144528 | 0.00435890716239 |
|   3    | 0.00619604566973 | 0.00541474996357 |
|   4    | 0.00541457811195 | 0.00644506824528 |
|   5    | 0.00475772765246 | 0.00703571716626 |
|   6    | 0.00424672793094 | 0.00767339928592 |
|   7    | 0.00381907148824 | 0.00795239029457 |
|   8    | 0.00350616123642 | 0.00832217537057 |
|   9    | 0.00328367214332 | 0.00899210770841 |
|   10   | 0.00310359231412 | 0.00965827849019 |
+--------+------------------+------------------+
[10 rows x 3 columns]

Model compare metric: precision_recall
Canvas is updated and available in a tab in the default browser.
In [68]:
popularity_recomm.show()
Canvas is accessible via web browser at the URL: http://localhost:54099/index.html
Opening Canvas in default web browser.
In [48]:
matrix_factor_model = graphlab.factorization_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating', 
                                                                regularization=1e-6, max_iterations=500)
Recsys training: model = factorization_recommender
Preparing data set.
    Data has 1092291 observations with 102300 users and 330476 items.
    Data prepared in: 1.43589s
Training factorization_recommender for recommendations.
+--------------------------------+--------------------------------------------------+----------+
| Parameter                      | Description                                      | Value    |
+--------------------------------+--------------------------------------------------+----------+
| num_factors                    | Factor Dimension                                 | 8        |
| regularization                 | L2 Regularization on Factors                     | 1e-006   |
| solver                         | Solver used for training                         | sgd      |
| linear_regularization          | L2 Regularization on Linear Coefficients         | 1e-010   |
| max_iterations                 | Maximum Number of Iterations                     | 500      |
+--------------------------------+--------------------------------------------------+----------+
  Optimizing model using SGD; tuning step size.
  Using 136536 / 1092291 points for tuning the step size.
+---------+-------------------+------------------------------------------+
| Attempt | Initial Step Size | Estimated Objective Value                |
+---------+-------------------+------------------------------------------+
| 0       | 25                | No Decrease (62.7398 >= 14.8356)         |
| 1       | 6.25              | No Decrease (65.6024 >= 14.8356)         |
| 2       | 1.5625            | No Decrease (58.1537 >= 14.8356)         |
| 3       | 0.390625          | No Decrease (27.6335 >= 14.8356)         |
| 4       | 0.0976562         | 0.923907                                 |
| 5       | 0.0488281         | 6.3842                                   |
+---------+-------------------+------------------------------------------+
| Final   | 0.0976562         | 0.923907                                 |
+---------+-------------------+------------------------------------------+
Starting Optimization.
+---------+--------------+-------------------+-----------------------+-------------+
| Iter.   | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size   |
+---------+--------------+-------------------+-----------------------+-------------+
| Initial | 5.013ms      | 14.8563           | 3.85439               |             |
+---------+--------------+-------------------+-----------------------+-------------+
| 1       | 269.323ms    | 25.2161           | 4.8682                | 0.0976562   |
| 2       | 561.028ms    | 19.2428           | 4.21399               | 0.0580668   |
| 3       | 778.604ms    | 12.8849           | 3.40055               | 0.042841    |
| 4       | 1.03s        | 8.87575           | 2.76633               | 0.0290334   |
| 5       | 1.27s        | 6.98182           | 2.40467               | 0.0245592   |
| 6       | 1.54s        | 6.01277           | 2.19747               | 0.0214205   |
| 10      | 2.33s        | 4.40128           | 1.80303               | 0.014603    |
| 11      | 2.66s        | 4.22173           | 1.75394               | 0.0135956   |
| 15      | 3.51s        | 3.75562           | 1.62098               | 0.0107739   |
| 20      | 4.42s        | 3.45361           | 1.53091               | 0.008683    |
| 25      | 5.35s        | 3.27705           | 1.47666               | 0.00734493  |
| 30      | 6.31s        | 3.13993           | 1.43381               | 0.00640622  |
| 35      | 7.30s        | 3.04788           | 1.40488               | 0.00570679  |
| 40      | 8.29s        | 2.97842           | 1.38319               | 0.00516295  |
| 45      | 9.22s        | 2.92098           | 1.36532               | 0.00472643  |
| 50      | 10.14s       | 2.87083           | 1.34961               | 0.00436732  |
| 51      | 10.37s       | 2.8639            | 1.34754               | 0.00430294  |
| 55      | 11.30s       | 2.82912           | 1.3365                | 0.00406603  |
| 60      | 12.42s       | 2.79553           | 1.32622               | 0.00380916  |
| 65      | 13.43s       | 2.76362           | 1.31636               | 0.00358722  |
| 70      | 14.67s       | 2.73734           | 1.3083                | 0.00339327  |
| 75      | 15.66s       | 2.71249           | 1.3007                | 0.00322216  |
| 80      | 16.57s       | 2.69051           | 1.29397               | 0.00306991  |
| 85      | 17.50s       | 2.67032           | 1.28778               | 0.00293345  |
| 90      | 18.49s       | 2.65301           | 1.28264               | 0.00281035  |
| 95      | 19.42s       | 2.63589           | 1.2775                | 0.00269867  |
| 100     | 20.35s       | 2.61984           | 1.27261               | 0.00259682  |
| 101     | 20.57s       | 2.61689           | 1.2717                | 0.00257752  |
| 105     | 21.36s       | 2.60532           | 1.26826               | 0.00250352  |
| 110     | 22.34s       | 2.59196           | 1.26428               | 0.00241768  |
| 115     | 23.27s       | 2.58595           | 1.26311               | 0.0023384   |
| 120     | 24.20s       | 2.52658           | 1.24017               | 0.00134674  |
| 125     | 25.12s       | 2.49454           | 1.22769               | 0.000776632 |
| 130     | 26.07s       | 2.47645           | 1.2206                | 0.000448402 |
| 135     | 27.04s       | 2.47313           | 1.21948               | 0.000435888 |
| 140     | 28.00s       | 2.46361           | 1.21575               | 0.000252207 |
| 145     | 29.12s       | 2.4569            | 1.2131                | 0.000146068 |
| 150     | 30.12s       | 2.45369           | 1.21184               |             |
| 155     | 31.19s       | 2.45185           | 1.21112               |             |
| 160     | 32.11s       | 2.45042           | 1.21056               | 3.3918e-005 |
| 165     | 33.02s       | 2.45              | 1.2104                |             |
| 170     | 33.95s       | 2.44983           | 1.21035               |             |
| 175     | 34.94s       | 2.44971           | 1.21031               |             |
| 180     | 35.86s       | 2.44954           | 1.21025               |             |
| 185     | 36.86s       | 2.44943           | 1.21022               | 2.5579e-005 |
| 190     | 37.82s       | 2.44927           | 1.21017               |             |
| 195     | 38.80s       | 2.44914           | 1.21013               |             |
| 200     | 39.72s       | 2.44901           | 1.21009               |             |
| 205     | 40.66s       | 2.44889           | 1.21006               |             |
| 210     | 41.55s       | 2.44858           | 1.20994               |             |
| 215     | 42.59s       | 2.44818           | 1.20978               |             |
| 220     | 43.51s       | 2.44812           | 1.20977               |             |
| 225     | 44.41s       | 2.44804           | 1.20974               |             |
| 230     | 45.42s       | 2.44798           | 1.20972               | 1.2918e-005 |
| 235     | 46.36s       | 2.44791           | 1.2097                |             |
| 240     | 47.26s       | 2.44785           | 1.20968               |             |
| 245     | 48.33s       | 2.44779           | 1.20966               |             |
| 250     | 49.28s       | 2.44773           | 1.20965               |             |
| 255     | 50.25s       | 2.44767           | 1.20963               | 1.1956e-005 |
| 260     | 51.20s       | 2.44761           | 1.20961               |             |
| 265     | 52.20s       | 2.44755           | 1.20959               | 1.1616e-005 |
| 270     | 53.13s       | 2.44749           | 1.20958               |             |
| 275     | 54.13s       | 2.44744           | 1.20956               |             |
| 280     | 55.09s       | 2.44738           | 1.20954               |             |
| 285     | 56.05s       | 2.44732           | 1.20953               |             |
| 290     | 57.01s       | 2.44727           | 1.20951               |             |
| 295     | 57.94s       | 2.44722           | 1.2095                |             |
| 300     | 58.82s       | 2.44716           | 1.20948               | 1.0584e-005 |
| 305     | 59.83s       | 2.44711           | 1.20947               |             |
| 310     | 1m 0s        | 2.44706           | 1.20945               |             |
| 315     | 1m 1s        | 2.44701           | 1.20944               |             |
| 320     | 1m 2s        | 2.44697           | 1.20942               |             |
+---------+--------------+-------------------+-----------------------+-------------+
Optimization Complete: Convergence on objective within bounds.
Computing final objective value and training RMSE.
       Final objective value: 2.44638
       Final training RMSE: 1.20918
In [49]:
#Make Recommendations:
matrix_factor_recomm = matrix_factor_model.recommend(users=[69188],k=5)
matrix_factor_recomm.print_rows(num_rows=5)
+---------+------------+---------------+------+
| User-ID |    ISBN    |     score     | rank |
+---------+------------+---------------+------+
|  69188  | 0671734563 | 31.7740267971 |  1   |
|  69188  | 0394800389 | 31.5581492641 |  2   |
|  69188  | 0553057758 | 28.8189067104 |  3   |
|  69188  | 0152047379 | 28.4868544796 |  4   |
|  69188  | 0380799502 |  28.193433688 |  5   |
+---------+------------+---------------+------+
[5 rows x 4 columns]

In [50]:
matrix_factor_model.evaluate_rmse(sfTest, target='Book-Rating')
Out[50]:
{'rmse_by_item': Columns:
 	ISBN	str
 	count	int
 	rmse	float
 
 Rows: 40456
 
 Data:
 +------------+-------+----------------+
 |    ISBN    | count |      rmse      |
 +------------+-------+----------------+
 | 0263737330 |   1   | 0.133969233303 |
 | 037312189X |   1   | 0.256416325525 |
 | 0345274563 |   1   | 4.99950559524  |
 | 0333387287 |   1   | 0.834043906002 |
 | 050552239X |   1   | 0.700068726272 |
 | 0743410602 |   1   | 0.906036569805 |
 | 0590212877 |   1   | 1.04643426922  |
 | 043957742X |   1   | 0.384221271544 |
 | 0743417844 |   1   | 4.03500670853  |
 | 0312929994 |   1   | 7.76407335785  |
 +------------+-------+----------------+
 [40456 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_by_user': Columns:
 	User-ID	int
 	count	int
 	rmse	float
 
 Rows: 17780
 
 Data:
 +---------+-------+----------------+
 | User-ID | count |      rmse      |
 +---------+-------+----------------+
 |  237313 |   1   | 2.23872773436  |
 |  43116  |   1   | 0.585648565621 |
 |  115341 |   1   |  6.0998668513  |
 |  69188  |   1   | 1.13800565985  |
 |  118012 |   1   | 3.63738138887  |
 |  66750  |   1   | 2.53537179616  |
 |  97741  |   1   | 2.83883710119  |
 |  230692 |   1   |  1.108466939   |
 |  135535 |   1   | 9.15111968753  |
 |  36992  |   2   | 12.6012445133  |
 +---------+-------+----------------+
 [17780 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_overall': 4.254394701094985}
In [54]:
rank_factor_model = graphlab.ranking_factorization_recommender.create(sf, user_id='User-ID', item_id='ISBN', target='Book-Rating', 
                                                                max_iterations=50)
Recsys training: model = ranking_factorization_recommender
Preparing data set.
    Data has 1092291 observations with 102300 users and 330476 items.
    Data prepared in: 1.45786s
Training ranking_factorization_recommender for recommendations.
+--------------------------------+--------------------------------------------------+----------+
| Parameter                      | Description                                      | Value    |
+--------------------------------+--------------------------------------------------+----------+
| num_factors                    | Factor Dimension                                 | 32       |
| regularization                 | L2 Regularization on Factors                     | 1e-009   |
| solver                         | Solver used for training                         | sgd      |
| linear_regularization          | L2 Regularization on Linear Coefficients         | 1e-009   |
| ranking_regularization         | Rank-based Regularization Weight                 | 0.25     |
| max_iterations                 | Maximum Number of Iterations                     | 50       |
+--------------------------------+--------------------------------------------------+----------+
  Optimizing model using SGD; tuning step size.
  Using 136536 / 1092291 points for tuning the step size.
+---------+-------------------+------------------------------------------+
| Attempt | Initial Step Size | Estimated Objective Value                |
+---------+-------------------+------------------------------------------+
| 0       | 25                | Not Viable                               |
| 1       | 6.25              | Not Viable                               |
| 2       | 1.5625            | Not Viable                               |
| 3       | 0.390625          | Not Viable                               |
| 4       | 0.0976562         | No Decrease (118.188 >= 29.0736)         |
| 5       | 0.0244141         | 20.2593                                  |
| 6       | 0.012207          | 21.8816                                  |
| 7       | 0.00610352        | 23.0865                                  |
| 8       | 0.00305176        | 24.1301                                  |
+---------+-------------------+------------------------------------------+
| Final   | 0.0244141         | 20.2593                                  |
+---------+-------------------+------------------------------------------+
Starting Optimization.
+---------+--------------+-------------------+-----------------------+-------------+
| Iter.   | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size   |
+---------+--------------+-------------------+-----------------------+-------------+
| Initial | 0us          | 29.1255           | 3.85439               |             |
+---------+--------------+-------------------+-----------------------+-------------+
| 1       | 1.22s        | 23.8308           | 3.67647               | 0.0244141   |
| 2       | 2.27s        | 22.5723           | 3.58656               | 0.0145167   |
| 3       | 3.25s        | 21.6437           | 3.49804               | 0.0107102   |
| 4       | 4.22s        | 20.4307           | 3.36114               | 0.00863167  |
| 5       | 5.20s        | 18.9328           | 3.17794               | 0.00730151  |
| 6       | 6.17s        | 17.3876           | 2.97577               | 0.00636835  |
| 10      | 10.04s       | 12.8017           | 2.32913               | 0.0043415   |
| 11      | 11.01s       | 12.0613           | 2.2169                | 0.00404199  |
| 15      | 14.78s       | 10.0048           | 1.88625               | 0.00320311  |
| 20      | 19.36s       | 8.54348           | 1.6416                | 0.00258147  |
| 25      | 23.88s       | 7.64791           | 1.48445               | 0.00218366  |
| 30      | 28.38s       | 7.01902           | 1.37335               | 0.00190458  |
| 35      | 32.82s       | 6.5621            | 1.28996               | 0.00169664  |
| 40      | 37.31s       | 6.21423           | 1.22521               | 0.00153495  |
| 45      | 41.82s       | 5.92714           | 1.17354               | 0.00140518  |
| 50      | 46.36s       | 5.68119           | 1.12973               | 0.00129841  |
+---------+--------------+-------------------+-----------------------+-------------+
Optimization Complete: Maximum number of passes through the data reached.
Computing final objective value and training RMSE.
       Final objective value: 5.57345
       Final training RMSE: 1.08339
In [55]:
#Make Recommendations:
rank_factor_recomm = rank_factor_model.recommend(users=[69188],k=5)
rank_factor_recomm.print_rows(num_rows=5)
+---------+------------+---------------+------+
| User-ID |    ISBN    |     score     | rank |
+---------+------------+---------------+------+
|  69188  | 0811825558 | 14.8589946487 |  1   |
|  69188  | 0345315715 | 14.6149875381 |  2   |
|  69188  | 0553380958 | 14.2803146103 |  3   |
|  69188  | 0060976241 | 13.6427222946 |  4   |
|  69188  | 0812550706 | 13.5892755249 |  5   |
+---------+------------+---------------+------+
[5 rows x 4 columns]

In [56]:
rank_factor_model.evaluate_rmse(sfTest, target='Book-Rating')
Out[56]:
{'rmse_by_item': Columns:
 	ISBN	str
 	count	int
 	rmse	float
 
 Rows: 40456
 
 Data:
 +------------+-------+---------------+
 |    ISBN    | count |      rmse     |
 +------------+-------+---------------+
 | 0263737330 |   1   | 5.34130151484 |
 | 037312189X |   1   | 4.46604509565 |
 | 0345274563 |   1   |  1.4633745617 |
 | 0333387287 |   1   | 5.31082303736 |
 | 050552239X |   1   | 4.30599315378 |
 | 0743410602 |   1   | 1.65934709001 |
 | 0590212877 |   1   | 3.46737320635 |
 | 043957742X |   1   | 3.38589246485 |
 | 0743417844 |   1   | 8.80384717795 |
 | 0312929994 |   1   | 2.51979693857 |
 +------------+-------+---------------+
 [40456 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_by_user': Columns:
 	User-ID	int
 	count	int
 	rmse	float
 
 Rows: 17780
 
 Data:
 +---------+-------+----------------+
 | User-ID | count |      rmse      |
 +---------+-------+----------------+
 |  237313 |   1   | 1.49798698637  |
 |  43116  |   1   | 2.84383349207  |
 |  115341 |   1   | 5.26586948249  |
 |  69188  |   1   | 3.36552937242  |
 |  118012 |   1   | 5.01669451508  |
 |  66750  |   1   | 3.62250982311  |
 |  97741  |   1   | 2.89033086512  |
 |  230692 |   1   | 0.797411755591 |
 |  135535 |   1   | 2.65705485728  |
 |  36992  |   2   | 3.30571380721  |
 +---------+-------+----------------+
 [17780 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'rmse_overall': 7.080758290955872}
In [26]:
books[['ISBN','Book-Title','Book-Author','Year-Of-Publication','Publisher']].head()
Out[26]:
ISBN Book-Title Book-Author Year-Of-Publication Publisher
0 0195153448 Classical Mythology Mark P. O. Morford 2002 Oxford University Press
1 0002005018 Clara Callan Richard Bruce Wright 2001 HarperFlamingo Canada
2 0060973129 Decision in Normandy Carlo D'Este 1991 HarperPerennial
3 0374157065 Flu: The Story of the Great Influenza Pandemic... Gina Bari Kolata 1999 Farrar Straus Giroux
4 0393045218 The Mummies of Urumchi E. J. W. Barber 1999 W. W. Norton &amp; Company
In [43]:
items_data = books[['ISBN','Book-Title','Book-Author','Year-Of-Publication','Publisher']]
items_data.to_csv('items_data.csv', sep="|",index=False)

items_sf = graphlab.SFrame.read_csv('items_data.csv', sep="|")
Unable to parse line "078946697X|"DK Readers: Creating the X-Men, How It All Began (Level 4: Proficient Readers)\"""|Michael Teitelbaum|2000|DK Publishing Inc"
Unable to parse line "2070426769|"Peuple du ciel, suivi de 'Les Bergers\"""|Jean-Marie Gustave Le Cl�©zio|2003|Gallimard"
Unable to parse line "0789466953|"DK Readers: Creating the X-Men, How Comic Books Come to Life (Level 4: Proficient Readers)\"""|James Buckley|2000|DK Publishing Inc"
3 lines failed to parse correctly
Finished parsing file C:\Users\Sahil Gupta\Google Drive\Winter Project\items_data.csv
Parsing completed. Parsed 100 lines in 0.746783 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str,str,long,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Unable to parse line "078946697X|"DK Readers: Creating the X-Men, How It All Began (Level 4: Proficient Readers)\"""|Michael Teitelbaum|2000|DK Publishing Inc"
Unable to parse line "2070426769|"Peuple du ciel, suivi de 'Les Bergers\"""|Jean-Marie Gustave Le Cl�©zio|2003|Gallimard"
Unable to parse line "0789466953|"DK Readers: Creating the X-Men, How Comic Books Come to Life (Level 4: Proficient Readers)\"""|James Buckley|2000|DK Publishing Inc"
3 lines failed to parse correctly
Finished parsing file C:\Users\Sahil Gupta\Google Drive\Winter Project\items_data.csv
Parsing completed. Parsed 271376 lines in 0.615655 secs.
In [45]:
items_sf.head(2)
Out[45]:
ISBN Book-Title Book-Author Year-Of-Publication Publisher
0195153448 Classical Mythology Mark P. O. Morford 2002 Oxford University Press
0002005018 Clara Callan Richard Bruce Wright 2001 HarperFlamingo Canada
[2 rows x 5 columns]
In [ ]:
content_based_model = graphlab.item_content_recommender.create(items_sf, observation_data=graphlab.SFrame(ratings), 
                                                               user_id='User-ID', item_id='ISBN', target='Book-Rating')
WARNING: The ItemContentRecommender model is still in beta.
WARNING: This feature transformer is still in beta, and some interpretation rules may change in the future.
('Applying transform:\n', Class             : AutoVectorizer

Model Fields
------------
Features          : ['Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']
Excluded Features : ['ISBN']

Column               Type  Interpretation  Transforms                         Output Type
-------------------  ----  --------------  ---------------------------------  -----------
Book-Title           str   short_text      3-Character NGram Counts -> TFIDF  dict       
Book-Author          str   categorical     None                               str        
Year-Of-Publication  int   numerical       None                               int        
Publisher            str   categorical     None                               str        

)
Recsys training: model = item_content_recommender
Defaulting to brute force instead of ball tree because there are multiple distance components.
Starting brute force nearest neighbors model training.
Starting pairwise querying.
+--------------+---------+-------------+--------------+
| Query points | # Pairs | % Complete. | Elapsed Time |
+--------------+---------+-------------+--------------+
| 0            | 67844   |             | 198.834ms    |
| 17           | 4681236 | 0.00635649  | 1.24s        |
| 30           | 8344812 | 0.0113311   | 2.20s        |
| 46           | 1e+007  | 0.0170428   | 3.22s        |
| 61           | 2e+007  | 0.0227544   | 4.20s        |
| 78           | 2e+007  | 0.0287424   | 5.17s        |
| 95           | 3e+007  | 0.0352831   | 6.18s        |
| 112          | 3e+007  | 0.0414554   | 7.15s        |
| 130          | 4e+007  | 0.0479961   | 8.15s        |
| 149          | 4e+007  | 0.0549054   | 9.14s        |
| 167          | 5e+007  | 0.0616304   | 10.13s       |
| 186          | 5e+007  | 0.0686317   | 11.13s       |
| 204          | 6e+007  | 0.0753567   | 12.12s       |
| 223          | 6e+007  | 0.0824502   | 13.13s       |
| 243          | 7e+007  | 0.0895437   | 14.12s       |
| 262          | 7e+007  | 0.0966371   | 15.13s       |
| 280          | 8e+007  | 0.103454    | 16.12s       |
| 299          | 8e+007  | 0.110179    | 17.10s       |
| 315          | 9e+007  | 0.116351    | 18.11s       |
| 334          | 9e+007  | 0.123353    | 19.09s       |
| 353          | 1e+008  | 0.13017     | 20.08s       |
| 372          | 1e+008  | 0.137079    | 21.08s       |
| 390          | 1e+008  | 0.143712    | 22.08s       |
| 409          | 1e+008  | 0.150713    | 23.06s       |
| 427          | 1e+008  | 0.157438    | 24.06s       |
| 444          | 1e+008  | 0.163887    | 25.05s       |
| 463          | 1e+008  | 0.170796    | 26.04s       |
| 479          | 1e+008  | 0.176784    | 27.04s       |
| 498          | 1e+008  | 0.183693    | 28.04s       |
| 517          | 1e+008  | 0.190695    | 29.03s       |
| 537          | 1e+008  | 0.19788     | 30.02s       |
| 557          | 2e+008  | 0.205342    | 31.01s       |
| 577          | 2e+008  | 0.212712    | 32.00s       |
| 590          | 2e+008  | 0.217503    | 33.02s       |
| 605          | 2e+008  | 0.222938    | 34.01s       |
| 621          | 2e+008  | 0.229018    | 34.99s       |
| 638          | 2e+008  | 0.235375    | 35.99s       |
| 654          | 2e+008  | 0.241086    | 36.97s       |
| 671          | 2e+008  | 0.247535    | 37.97s       |
| 688          | 2e+008  | 0.253799    | 38.97s       |
| 707          | 2e+008  | 0.260524    | 39.97s       |
| 723          | 2e+008  | 0.266696    | 40.95s       |
| 740          | 2e+008  | 0.272684    | 41.95s       |
| 758          | 2e+008  | 0.279502    | 42.95s       |
| 775          | 2e+008  | 0.285858    | 43.93s       |
| 792          | 2e+008  | 0.291938    | 44.95s       |
| 807          | 2e+008  | 0.297558    | 45.94s       |
| 827          | 2e+008  | 0.304835    | 46.94s       |
| 845          | 2e+008  | 0.311652    | 47.91s       |
| 865          | 2e+008  | 0.318746    | 48.91s       |
| 882          | 2e+008  | 0.325195    | 49.91s       |
| 901          | 2e+008  | 0.332288    | 50.92s       |
| 918          | 2e+008  | 0.338368    | 51.89s       |
| 936          | 3e+008  | 0.345093    | 52.89s       |
| 954          | 3e+008  | 0.351818    | 53.89s       |
| 973          | 3e+008  | 0.358819    | 54.88s       |
| 991          | 3e+008  | 0.36536     | 55.87s       |
| 1011         | 3e+008  | 0.372822    | 56.88s       |
| 1030         | 3e+008  | 0.379547    | 57.86s       |
| 1048         | 3e+008  | 0.386272    | 58.88s       |
| 1066         | 3e+008  | 0.393089    | 59.84s       |
| 1085         | 3e+008  | 0.399999    | 1m 0s        |
| 1104         | 3e+008  | 0.407       | 1m 1s        |
| 1123         | 3e+008  | 0.414001    | 1m 2s        |
| 1141         | 3e+008  | 0.420542    | 1m 3s        |
| 1161         | 3e+008  | 0.42782     | 1m 4s        |
| 1177         | 3e+008  | 0.433992    | 1m 5s        |
| 1195         | 3e+008  | 0.440533    | 1m 6s        |
| 1214         | 3e+008  | 0.44735     | 1m 7s        |
| 1227         | 3e+008  | 0.452232    | 1m 8s        |
| 1246         | 3e+008  | 0.459326    | 1m 9s        |
| 1264         | 3e+008  | 0.466051    | 1m 10s       |
| 1283         | 3e+008  | 0.47296     | 1m 11s       |
| 1301         | 4e+008  | 0.479593    | 1m 12s       |
| 1320         | 4e+008  | 0.48641     | 1m 13s       |
| 1339         | 4e+008  | 0.493596    | 1m 14s       |
| 1354         | 4e+008  | 0.499215    | 1m 15s       |
| 1373         | 4e+008  | 0.50594     | 1m 16s       |
| 1391         | 4e+008  | 0.512665    | 1m 17s       |
| 1409         | 4e+008  | 0.51939     | 1m 18s       |
| 1425         | 4e+008  | 0.525194    | 1m 19s       |
| 1444         | 4e+008  | 0.532195    | 1m 20s       |
| 1463         | 4e+008  | 0.539197    | 1m 21s       |
| 1482         | 4e+008  | 0.546382    | 1m 22s       |
| 1502         | 4e+008  | 0.553752    | 1m 23s       |
| 1519         | 4e+008  | 0.559924    | 1m 24s       |
| 1536         | 4e+008  | 0.566189    | 1m 25s       |
| 1554         | 4e+008  | 0.572821    | 1m 26s       |
| 1574         | 4e+008  | 0.580283    | 1m 27s       |
| 1593         | 4e+008  | 0.587193    | 1m 28s       |
| 1612         | 4e+008  | 0.59401     | 1m 29s       |
| 1631         | 4e+008  | 0.601011    | 1m 30s       |
| 1647         | 4e+008  | 0.607183    | 1m 31s       |
| 1665         | 5e+008  | 0.613816    | 1m 32s       |
| 1683         | 5e+008  | 0.620357    | 1m 33s       |
| 1702         | 5e+008  | 0.627266    | 1m 34s       |
| 1722         | 5e+008  | 0.634636    | 1m 35s       |
| 1741         | 5e+008  | 0.641637    | 1m 36s       |
| 1759         | 5e+008  | 0.648362    | 1m 37s       |
| 1779         | 5e+008  | 0.65564     | 1m 38s       |
| 1797         | 5e+008  | 0.662457    | 1m 39s       |
| 1816         | 5e+008  | 0.669274    | 1m 40s       |
| 1835         | 5e+008  | 0.676368    | 1m 41s       |
| 1851         | 5e+008  | 0.682356    | 1m 42s       |
| 1871         | 5e+008  | 0.689634    | 1m 43s       |
| 1889         | 5e+008  | 0.696359    | 1m 44s       |
| 1906         | 5e+008  | 0.702347    | 1m 45s       |
| 1925         | 5e+008  | 0.709348    | 1m 46s       |
| 1945         | 5e+008  | 0.716718    | 1m 47s       |
| 1962         | 5e+008  | 0.723074    | 1m 48s       |
| 1981         | 5e+008  | 0.730076    | 1m 49s       |
| 2000         | 5e+008  | 0.737261    | 1m 50s       |
| 2017         | 5e+008  | 0.743249    | 1m 51s       |
| 2034         | 6e+008  | 0.749698    | 1m 52s       |
In [ ]: