waxl commited on
Commit
f23dbc8
·
1 Parent(s): 4f69b0b

first deploy

Browse files
Files changed (4) hide show
  1. app.py +144 -0
  2. data/df_wine_combi.pkl +3 -0
  3. data/df_wine_us_rate.pkl +3 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import python lib
2
+ import streamlit as st
3
+ import time
4
+ import pandas as pd
5
+ import numpy as np
6
+ from surprise import Dataset, Reader
7
+ from surprise import KNNBaseline
8
+
9
+ # Import wine dataframes
10
+ df_wine_model = pd.read_pickle('./data/df_wine_us_rate.pkl')
11
+ df_wine_combi = pd.read_pickle('./data/df_wine_combi.pkl')
12
+
13
+ # Instantiate the list of wine traits
14
+ all_traits = ['almond', 'anise', 'apple', 'apricot', 'baked', 'baking_spices', 'berry', 'black_cherry', 'black_currant', 'black_pepper', 'black_tea', 'blackberry', 'blueberry',
15
+ 'boysenberry', 'bramble', 'bright', 'butter', 'candy', 'caramel', 'cardamom', 'cassis', 'cedar', 'chalk', 'cherry', 'chocolate', 'cinnamon', 'citrus', 'clean', 'closed',
16
+ 'clove', 'cocoa', 'coffee', 'cola', 'complex', 'concentrated', 'cranberry', 'cream', 'crisp', 'dark', 'dark_chocolate', 'dense', 'depth', 'dried_herb', 'dry', 'dust',
17
+ 'earth', 'edgy', 'elderberry', 'elegant', 'fennel', 'firm', 'flower', 'forest_floor', 'french_oak', 'fresh', 'fruit', 'full_bodied', 'game', 'grapefruit', 'graphite',
18
+ 'green', 'gripping', 'grippy', 'hearty', 'herb', 'honey', 'honeysuckle', 'jam', 'juicy', 'lavender', 'leafy', 'lean', 'leather', 'lemon', 'lemon_peel', 'length', 'licorice',
19
+ 'light_bodied', 'lime', 'lush', 'meaty', 'medium_bodied', 'melon', 'milk_chocolate', 'minerality', 'mint', 'nutmeg', 'oak', 'olive', 'orange', 'orange_peel', 'peach',
20
+ 'pear', 'pencil_lead', 'pepper', 'pine', 'pineapple', 'plum', 'plush', 'polished', 'pomegranate', 'powerful', 'purple', 'purple_flower', 'raspberry', 'refreshing',
21
+ 'restrained', 'rich', 'ripe', 'robust', 'rose', 'round', 'sage', 'salt', 'savory', 'sharp', 'silky', 'smoke', 'smoked_meat', 'smooth', 'soft', 'sparkling', 'spice',
22
+ 'steel', 'stone', 'strawberry', 'succulent', 'supple', 'sweet', 'tangy', 'tannin', 'tar', 'tart', 'tea', 'thick', 'thyme', 'tight', 'toast', 'tobacco', 'tropical_fruit',
23
+ 'vanilla', 'velvety', 'vibrant', 'violet', 'warm', 'weight', 'wet_rocks', 'white', 'white_pepper', 'wood']
24
+
25
+ #---------------------------------------------------------------------------------------------------------
26
+
27
+ # Function to instantiate the model & return the est recsys scores
28
+ def recommend_scores():
29
+
30
+ # Instantiate reader & data for surprise
31
+ reader = Reader(rating_scale=(88, 100))
32
+ data = Dataset.load_from_df(df_wine_model, reader)
33
+
34
+ # Instantiate recsys model
35
+ sim_options={'name':'cosine'}
36
+ model = KNNBaseline(k=35, min_k=1, sim_options=sim_options, verbose=False)
37
+
38
+ # Train & fit the data into model
39
+ train=data.build_full_trainset()
40
+ model.fit(train)
41
+
42
+ # Start the model to compute the best estimate match score on wine list
43
+ recommend_list = []
44
+ user_wines = df_wine_model[df_wine_model.taster_name == 'mockuser']['title'].unique()
45
+ not_user_wines = []
46
+
47
+ for wine in df_wine_model['title'].unique():
48
+ if wine not in user_wines:
49
+ not_user_wines.append(wine)
50
+
51
+ for wine in not_user_wines:
52
+ wine_compatibility = []
53
+ prediction = model.predict(uid='mockuser', iid=wine)
54
+ wine_compatibility.append(prediction.iid)
55
+ wine_compatibility.append(prediction.est)
56
+ recommend_list.append(wine_compatibility)
57
+
58
+ result_df = pd.DataFrame(recommend_list, columns = ['title', 'est_match_pts'])
59
+
60
+ return result_df
61
+
62
+ def add_bg_from_url():
63
+ st.markdown(
64
+ f"""
65
+ <style>
66
+
67
+ [data-testid="stAppViewContainer"] {{
68
+ background-image: url("https://images.pexels.com/photos/391213/pexels-photo-391213.jpeg");
69
+ background-attachment: fixed;
70
+ background-size: cover
71
+ }}
72
+
73
+ [data-testid="stVerticalBlock"] {{
74
+ background-color: rgba(255,255,255,0.75)
75
+ }}
76
+
77
+ </style>
78
+ """,
79
+ unsafe_allow_html=True
80
+ )
81
+
82
+ #----------------------------------------------------------------------------------------------------------
83
+
84
+ st.title("Which wine should I get?")
85
+ st.write("By Lee Wan Xian ([GitHub](https://github.com/leewanxian/wine_recommender))")
86
+ st.text("")
87
+ st.write("You can type the wine traits that you want in the dropdown list below")
88
+ add_bg_from_url()
89
+
90
+ select_temptrait = st.multiselect('Choose the traits that you want in your wine', options = all_traits)
91
+
92
+ if st.button('Show me the wines!'):
93
+ with st.spinner('Should you have some wine now?'):
94
+
95
+ time.sleep(2)
96
+ # Instantiate selected wine traits
97
+ if len(select_temptrait) == 0:
98
+ selected_traits = all_traits
99
+ else:
100
+ selected_traits = select_temptrait
101
+
102
+ # Run recommender model
103
+ recommend_df = recommend_scores()
104
+
105
+ # Instantiate traits filter
106
+ trait_filter = ['title']
107
+
108
+ # Add on any traits selected by user
109
+ trait_filter.extend(selected_traits)
110
+
111
+ # Create dataframe for wine name and traits
112
+ df_temp_traits = df_wine_combi.drop(columns=['taster_name', 'points', 'variety', 'designation', 'winery', 'country', 'province', 'region_1', 'region_2', 'price', 'description',
113
+ 'desc_wd_count', 'traits'])
114
+
115
+ # Code to start filtering out wines with either one of the selected traits
116
+ df_temp_traits = df_temp_traits[trait_filter]
117
+ df_temp_traits['sum'] = df_temp_traits.sum(axis=1, numeric_only=True)
118
+ df_temp_traits = df_temp_traits[df_temp_traits['sum'] != 0]
119
+
120
+ # Merge the selected wines traits with recommend scores
121
+ df_selectrec_temp = df_temp_traits.merge(recommend_df, on='title', how='left')
122
+
123
+ # Merge the selected wines with recommendations with df on details
124
+ df_selectrec_detail = df_selectrec_temp.merge(df_wine_combi, on='title', how='left')
125
+ df_selectrec_detail.drop_duplicates(inplace=True)
126
+
127
+ # Pull out the top 10 recommendations (raw)
128
+ df_rec_raw = df_selectrec_detail.sort_values('est_match_pts', ascending=False).head(10)
129
+
130
+ # Prepare the display for the top 10 recommendations
131
+ df_rec_final = df_rec_raw[['title', 'country', 'province', 'variety', 'winery', 'points', 'price', 'traits', 'description']].reset_index(drop=True)
132
+ df_rec_final.index = df_rec_final.index + 1
133
+ df_rec_final['traits']=df_rec_final['traits'].str.replace(" ", " | ")
134
+ df_rec_final.rename(columns={'title':'Name',
135
+ 'country':'Country',
136
+ 'province':'State/Province',
137
+ 'variety':'Type',
138
+ 'winery':'Winery',
139
+ 'points':'Rating',
140
+ 'price':'Price',
141
+ 'description':'Review',
142
+ 'traits':'Key Traits'}, inplace=True)
143
+ st.balloons()
144
+ st.dataframe(df_rec_final)
data/df_wine_combi.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5324ee33b8c5daeefd5529101d1ac3095ebcc115b759c1f6f0b96daa493f9f87
3
+ size 18288367
data/df_wine_us_rate.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76893c95a7f28fedb325f5a89576ac3344e7117cdc470d3894f5d6614bfaf5dd
3
+ size 1219978
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ scikit-surprise