Spaces:

somosnlp-hackathon-2022
/

gastronomia_para_to2

Sleeping

File size: 5,985 Bytes

9e46f6d

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
from resources import banner, error_html_response

model_checkpoint = 'gastronomia-para-to2/gastronomia_para_to2'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

special_tokens = [
    '<INPUT_START>',
    '<NEXT_INPUT>',
    '<INPUT_END>',
    '<TITLE_START>',
    '<TITLE_END>',
    '<INGR_START>',
    '<NEXT_INGR>',
    '<INGR_END>',
    '<INSTR_START>',
    '<NEXT_INSTR>',
    '<INSTR_END>']

def frame_html_response(html_response):
    return f"""<iframe style="width: 100%; height: 800px" name="result" allow="midi; geolocation; microphone; camera; 
    display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
    allow-scripts allow-same-origin allow-popups 
    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
    allowpaymentrequest="" frameborder="0" srcdoc='{html_response}'></iframe>"""


def check_special_tokens_order(pre_output):
    return (pre_output.find('<INPUT_START>') <
            pre_output.find('<NEXT_INPUT>') <=
            pre_output.rfind('<NEXT_INPUT>') <
            pre_output.find('<INPUT_END>') <
            pre_output.find('<INGR_START>') <
            pre_output.find('<NEXT_INGR>') <=
            pre_output.rfind('<NEXT_INGR>') <
            pre_output.find('<INGR_END>') <
            pre_output.find('<INSTR_START>') <
            pre_output.find('<NEXT_INSTR>') <=
            pre_output.rfind('<NEXT_INSTR>') <
            pre_output.find('<INSTR_END>') <
            pre_output.find('<TITLE_START>') <
            pre_output.find('<TITLE_END>'))


def make_html_response(title, ingredients, instructions):
    ingredients_html_list = '<ul><li>' + '</li><li>'.join(ingredients) + '</li></ul>'
    instructions_html_list = '<ol><li>' + '</li><li>'.join(instructions) + '</li></ol>'

    html_response = f'''
                    <!DOCTYPE html>
                    <html>
                    <body>                    
                    <h1>{title}</h1>

                    <h2>Ingredientes</h2>
                    {ingredients_html_list}

                    <h2>Instrucciones</h2>
                    {instructions_html_list}

                    </body>
                    </html>
    '''
    return html_response


def rerun_model_output(pre_output):
    if pre_output is None:
        return True
    elif not '<RECIPE_END>' in pre_output:
        print('<RECIPE_END> not in pre_output')
        return True
    pre_output_trimmed = pre_output[:pre_output.find('<RECIPE_END>')]
    if not all(special_token in pre_output_trimmed for special_token in special_tokens):
        print('Not all special tokens are in preoutput')
        return True
    elif not check_special_tokens_order(pre_output_trimmed):
        print('Special tokens are unordered in preoutput')
        return True
    elif len(pre_output_trimmed.split())<75:
        print('Length of the recipe is <75')
        return True
    else:
        return False


def generate_output(tokenized_input):
    pre_output = None
    while rerun_model_output(pre_output):
        output = model.generate(**tokenized_input,
                                max_length=600,
                                do_sample=True,
                                top_p=0.92,
                                top_k=50,
                                # no_repeat_ngram_size=2,
                                num_return_sequences=3)
        pre_output = tokenizer.decode(output[0], skip_special_tokens=False)
    pre_output_trimmed = pre_output[:pre_output.find('<RECIPE_END>')]
    return pre_output_trimmed


def check_wrong_ingredients(ingredients):
    if ingredients is None:
        return True
    if any(ingredient.startswith('De') for ingredient in ingredients):
        print('At least one ingredient starts with De')
        return True


def make_recipe(input_ingredients):
    input_ingredients = re.sub(' y ', ', ', input_ingredients)
    input = '<RECIPE_START> '
    input += '<INPUT_START> ' + ' <NEXT_INPUT> '.join(input_ingredients.split(', ')) + ' <INPUT_END> '
    input += '<INGR_START> '
    tokenized_input = tokenizer(input, return_tensors='pt')

    output_ingredients = None
    i = 0
    while check_wrong_ingredients(output_ingredients):
        if i == 4:
            return frame_html_response(error_html_response)
        pre_output_trimmed = generate_output(tokenized_input)
        output_ingredients = re.search('<INGR_START> (.*) <INGR_END>', pre_output_trimmed).group(1)
        output_ingredients = output_ingredients.split(' <NEXT_INGR> ')
        output_ingredients = list(set([output_ingredient.strip() for output_ingredient in output_ingredients]))
        output_ingredients = [output_ing.capitalize() for output_ing in output_ingredients]
        i += 1

    output_title = re.search('<TITLE_START> (.*) <TITLE_END>', pre_output_trimmed).group(1).strip().capitalize()
    output_instructions = re.search('<INSTR_START> (.*) <INSTR_END>', pre_output_trimmed).group(1)
    output_instructions = output_instructions.split(' <NEXT_INSTR> ')

    html_response = make_html_response(output_title, output_ingredients, output_instructions)

    return frame_html_response(html_response)


iface = gr.Interface(
    fn=make_recipe,
    inputs=
    [
        gr.inputs.Textbox(lines=1, placeholder='ingrediente_1, ingrediente_2, ..., ingrediente_n',
                          label='Dime con qué ingredientes quieres que cocinemos hoy y te sugeriremos una receta tan pronto como nuestros fogones estén libres'),
    ],
    outputs=
    [
        gr.outputs.HTML(label="¡Esta es mi propuesta para ti! ¡Buen provecho!")
    ],
    examples=
    [
        ['salmón, zumo de naranja, aceite de oliva, sal, pimienta'],
        ['harina, azúcar, huevos, chocolate, levadura Royal']
    ],
    description=banner)
iface.launch(enable_queue=True)