nuojohnchen commited on
Commit
7895988
·
verified ·
1 Parent(s): e084499

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py CHANGED
@@ -8,6 +8,22 @@ from PIL import Image
8
  import fitz # PyMuPDF
9
  import numpy as np
10
  from transformers import NougatProcessor, VisionEncoderDecoderModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Set environment variables
13
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
@@ -90,6 +106,20 @@ def extract_text_from_pdf(pdf_bytes):
90
  return default_paper_content
91
 
92
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Load Nougat model
94
  processor, model = load_nougat_model()
95
 
 
8
  import fitz # PyMuPDF
9
  import numpy as np
10
  from transformers import NougatProcessor, VisionEncoderDecoderModel
11
+ import nltk
12
+ import ssl
13
+
14
+ # 初始化NLTK
15
+ try:
16
+ _create_unverified_https_context = ssl._create_unverified_context
17
+ except AttributeError:
18
+ pass
19
+ else:
20
+ ssl._create_default_https_context = _create_unverified_https_context
21
+
22
+ # 下载NLTK必要的数据
23
+ try:
24
+ nltk.data.find('tokenizers/punkt')
25
+ except LookupError:
26
+ nltk.download('punkt')
27
 
28
  # Set environment variables
29
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
106
  return default_paper_content
107
 
108
  try:
109
+ # 确保NLTK已安装
110
+ try:
111
+ import nltk
112
+ try:
113
+ nltk.data.find('tokenizers/punkt')
114
+ except LookupError:
115
+ nltk.download('punkt')
116
+ except ImportError:
117
+ print("Installing NLTK...")
118
+ import subprocess
119
+ subprocess.check_call(["pip", "install", "nltk", "python-Levenshtein"])
120
+ import nltk
121
+ nltk.download('punkt')
122
+
123
  # Load Nougat model
124
  processor, model = load_nougat_model()
125