Yehor commited on
Commit
2c8a4e3
·
verified ·
1 Parent(s): be7f41a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import spaces
2
 
3
  import torch
4
- import torch._dynamo
 
5
  # torch._dynamo.config.suppress_errors = True
6
- torch._dynamo.disable()
7
- torch._dynamo.disallow_in_graph()
8
 
9
  import gradio as gr
10
 
@@ -20,8 +21,7 @@ model = AutoModelForCausalLM.from_pretrained(
20
  device_map='auto',
21
  torch_dtype='auto',
22
  )
23
-
24
- # model = torch.compile(model, backend="eager")
25
 
26
 
27
  print('Model dtype:', model.dtype)
@@ -36,7 +36,7 @@ def predict(question):
36
  ### Answer:
37
  '''], return_tensors = "pt").to("cuda")
38
 
39
- outputs = model.generate(**inputs, max_new_tokens = 128)
40
 
41
  results = tokenizer.batch_decode(outputs, skip_special_tokens=True)
42
 
 
1
  import spaces
2
 
3
  import torch
4
+
5
+ # import torch._dynamo
6
  # torch._dynamo.config.suppress_errors = True
7
+ # torch._dynamo.disable()
8
+ # torch._dynamo.disallow_in_graph()
9
 
10
  import gradio as gr
11
 
 
21
  device_map='auto',
22
  torch_dtype='auto',
23
  )
24
+ compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
 
25
 
26
 
27
  print('Model dtype:', model.dtype)
 
36
  ### Answer:
37
  '''], return_tensors = "pt").to("cuda")
38
 
39
+ outputs = compiled_model.generate(**inputs, max_new_tokens = 128)
40
 
41
  results = tokenizer.batch_decode(outputs, skip_special_tokens=True)
42