first deploy
Browse files- app.py +144 -0
- data/df_wine_combi.pkl +3 -0
- data/df_wine_us_rate.pkl +3 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import python lib
|
2 |
+
import streamlit as st
|
3 |
+
import time
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
from surprise import Dataset, Reader
|
7 |
+
from surprise import KNNBaseline
|
8 |
+
|
9 |
+
# Import wine dataframes
|
10 |
+
df_wine_model = pd.read_pickle('./data/df_wine_us_rate.pkl')
|
11 |
+
df_wine_combi = pd.read_pickle('./data/df_wine_combi.pkl')
|
12 |
+
|
13 |
+
# Instantiate the list of wine traits
|
14 |
+
all_traits = ['almond', 'anise', 'apple', 'apricot', 'baked', 'baking_spices', 'berry', 'black_cherry', 'black_currant', 'black_pepper', 'black_tea', 'blackberry', 'blueberry',
|
15 |
+
'boysenberry', 'bramble', 'bright', 'butter', 'candy', 'caramel', 'cardamom', 'cassis', 'cedar', 'chalk', 'cherry', 'chocolate', 'cinnamon', 'citrus', 'clean', 'closed',
|
16 |
+
'clove', 'cocoa', 'coffee', 'cola', 'complex', 'concentrated', 'cranberry', 'cream', 'crisp', 'dark', 'dark_chocolate', 'dense', 'depth', 'dried_herb', 'dry', 'dust',
|
17 |
+
'earth', 'edgy', 'elderberry', 'elegant', 'fennel', 'firm', 'flower', 'forest_floor', 'french_oak', 'fresh', 'fruit', 'full_bodied', 'game', 'grapefruit', 'graphite',
|
18 |
+
'green', 'gripping', 'grippy', 'hearty', 'herb', 'honey', 'honeysuckle', 'jam', 'juicy', 'lavender', 'leafy', 'lean', 'leather', 'lemon', 'lemon_peel', 'length', 'licorice',
|
19 |
+
'light_bodied', 'lime', 'lush', 'meaty', 'medium_bodied', 'melon', 'milk_chocolate', 'minerality', 'mint', 'nutmeg', 'oak', 'olive', 'orange', 'orange_peel', 'peach',
|
20 |
+
'pear', 'pencil_lead', 'pepper', 'pine', 'pineapple', 'plum', 'plush', 'polished', 'pomegranate', 'powerful', 'purple', 'purple_flower', 'raspberry', 'refreshing',
|
21 |
+
'restrained', 'rich', 'ripe', 'robust', 'rose', 'round', 'sage', 'salt', 'savory', 'sharp', 'silky', 'smoke', 'smoked_meat', 'smooth', 'soft', 'sparkling', 'spice',
|
22 |
+
'steel', 'stone', 'strawberry', 'succulent', 'supple', 'sweet', 'tangy', 'tannin', 'tar', 'tart', 'tea', 'thick', 'thyme', 'tight', 'toast', 'tobacco', 'tropical_fruit',
|
23 |
+
'vanilla', 'velvety', 'vibrant', 'violet', 'warm', 'weight', 'wet_rocks', 'white', 'white_pepper', 'wood']
|
24 |
+
|
25 |
+
#---------------------------------------------------------------------------------------------------------
|
26 |
+
|
27 |
+
# Function to instantiate the model & return the est recsys scores
|
28 |
+
def recommend_scores():
|
29 |
+
|
30 |
+
# Instantiate reader & data for surprise
|
31 |
+
reader = Reader(rating_scale=(88, 100))
|
32 |
+
data = Dataset.load_from_df(df_wine_model, reader)
|
33 |
+
|
34 |
+
# Instantiate recsys model
|
35 |
+
sim_options={'name':'cosine'}
|
36 |
+
model = KNNBaseline(k=35, min_k=1, sim_options=sim_options, verbose=False)
|
37 |
+
|
38 |
+
# Train & fit the data into model
|
39 |
+
train=data.build_full_trainset()
|
40 |
+
model.fit(train)
|
41 |
+
|
42 |
+
# Start the model to compute the best estimate match score on wine list
|
43 |
+
recommend_list = []
|
44 |
+
user_wines = df_wine_model[df_wine_model.taster_name == 'mockuser']['title'].unique()
|
45 |
+
not_user_wines = []
|
46 |
+
|
47 |
+
for wine in df_wine_model['title'].unique():
|
48 |
+
if wine not in user_wines:
|
49 |
+
not_user_wines.append(wine)
|
50 |
+
|
51 |
+
for wine in not_user_wines:
|
52 |
+
wine_compatibility = []
|
53 |
+
prediction = model.predict(uid='mockuser', iid=wine)
|
54 |
+
wine_compatibility.append(prediction.iid)
|
55 |
+
wine_compatibility.append(prediction.est)
|
56 |
+
recommend_list.append(wine_compatibility)
|
57 |
+
|
58 |
+
result_df = pd.DataFrame(recommend_list, columns = ['title', 'est_match_pts'])
|
59 |
+
|
60 |
+
return result_df
|
61 |
+
|
62 |
+
def add_bg_from_url():
|
63 |
+
st.markdown(
|
64 |
+
f"""
|
65 |
+
<style>
|
66 |
+
|
67 |
+
[data-testid="stAppViewContainer"] {{
|
68 |
+
background-image: url("https://images.pexels.com/photos/391213/pexels-photo-391213.jpeg");
|
69 |
+
background-attachment: fixed;
|
70 |
+
background-size: cover
|
71 |
+
}}
|
72 |
+
|
73 |
+
[data-testid="stVerticalBlock"] {{
|
74 |
+
background-color: rgba(255,255,255,0.75)
|
75 |
+
}}
|
76 |
+
|
77 |
+
</style>
|
78 |
+
""",
|
79 |
+
unsafe_allow_html=True
|
80 |
+
)
|
81 |
+
|
82 |
+
#----------------------------------------------------------------------------------------------------------
|
83 |
+
|
84 |
+
st.title("Which wine should I get?")
|
85 |
+
st.write("By Lee Wan Xian ([GitHub](https://github.com/leewanxian/wine_recommender))")
|
86 |
+
st.text("")
|
87 |
+
st.write("You can type the wine traits that you want in the dropdown list below")
|
88 |
+
add_bg_from_url()
|
89 |
+
|
90 |
+
select_temptrait = st.multiselect('Choose the traits that you want in your wine', options = all_traits)
|
91 |
+
|
92 |
+
if st.button('Show me the wines!'):
|
93 |
+
with st.spinner('Should you have some wine now?'):
|
94 |
+
|
95 |
+
time.sleep(2)
|
96 |
+
# Instantiate selected wine traits
|
97 |
+
if len(select_temptrait) == 0:
|
98 |
+
selected_traits = all_traits
|
99 |
+
else:
|
100 |
+
selected_traits = select_temptrait
|
101 |
+
|
102 |
+
# Run recommender model
|
103 |
+
recommend_df = recommend_scores()
|
104 |
+
|
105 |
+
# Instantiate traits filter
|
106 |
+
trait_filter = ['title']
|
107 |
+
|
108 |
+
# Add on any traits selected by user
|
109 |
+
trait_filter.extend(selected_traits)
|
110 |
+
|
111 |
+
# Create dataframe for wine name and traits
|
112 |
+
df_temp_traits = df_wine_combi.drop(columns=['taster_name', 'points', 'variety', 'designation', 'winery', 'country', 'province', 'region_1', 'region_2', 'price', 'description',
|
113 |
+
'desc_wd_count', 'traits'])
|
114 |
+
|
115 |
+
# Code to start filtering out wines with either one of the selected traits
|
116 |
+
df_temp_traits = df_temp_traits[trait_filter]
|
117 |
+
df_temp_traits['sum'] = df_temp_traits.sum(axis=1, numeric_only=True)
|
118 |
+
df_temp_traits = df_temp_traits[df_temp_traits['sum'] != 0]
|
119 |
+
|
120 |
+
# Merge the selected wines traits with recommend scores
|
121 |
+
df_selectrec_temp = df_temp_traits.merge(recommend_df, on='title', how='left')
|
122 |
+
|
123 |
+
# Merge the selected wines with recommendations with df on details
|
124 |
+
df_selectrec_detail = df_selectrec_temp.merge(df_wine_combi, on='title', how='left')
|
125 |
+
df_selectrec_detail.drop_duplicates(inplace=True)
|
126 |
+
|
127 |
+
# Pull out the top 10 recommendations (raw)
|
128 |
+
df_rec_raw = df_selectrec_detail.sort_values('est_match_pts', ascending=False).head(10)
|
129 |
+
|
130 |
+
# Prepare the display for the top 10 recommendations
|
131 |
+
df_rec_final = df_rec_raw[['title', 'country', 'province', 'variety', 'winery', 'points', 'price', 'traits', 'description']].reset_index(drop=True)
|
132 |
+
df_rec_final.index = df_rec_final.index + 1
|
133 |
+
df_rec_final['traits']=df_rec_final['traits'].str.replace(" ", " | ")
|
134 |
+
df_rec_final.rename(columns={'title':'Name',
|
135 |
+
'country':'Country',
|
136 |
+
'province':'State/Province',
|
137 |
+
'variety':'Type',
|
138 |
+
'winery':'Winery',
|
139 |
+
'points':'Rating',
|
140 |
+
'price':'Price',
|
141 |
+
'description':'Review',
|
142 |
+
'traits':'Key Traits'}, inplace=True)
|
143 |
+
st.balloons()
|
144 |
+
st.dataframe(df_rec_final)
|
data/df_wine_combi.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5324ee33b8c5daeefd5529101d1ac3095ebcc115b759c1f6f0b96daa493f9f87
|
3 |
+
size 18288367
|
data/df_wine_us_rate.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76893c95a7f28fedb325f5a89576ac3344e7117cdc470d3894f5d6614bfaf5dd
|
3 |
+
size 1219978
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
scikit-surprise
|