Spaces:

nuojohnchen
/

XtraGPT-7B

Running on Zero

App Files Files Community

nuojohnchen commited on 17 days ago

Commit

044d0d9

verified ·

1 Parent(s): 7ede7c6

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -5

app.py CHANGED Viewed

@@ -154,11 +154,22 @@ def extract_text_from_pdf(pdf_bytes):
             # Print progress
             print(f"Processed page {page_num+1}/{len(doc)}")
         # Clear GPU memory
         del pixel_values, outputs
         torch.cuda.empty_cache()
         return full_text
     except Exception as e:
         import traceback
@@ -233,12 +244,12 @@ Focus on clear, concise, and evidence-based improvements that align with the ove
             add_generation_prompt=True
         )
-        # Check input length and truncate to 15000 tokens before encoding
         input_tokens = tokenizer.encode(text)
-        if len(input_tokens) > 15000:  # Limit to 15k tokens
-            input_tokens = input_tokens[:15000]
             text = tokenizer.decode(input_tokens)
-            print(f"Input truncated to 15000 tokens")
         progress(0.5, desc="Generating improved text...")
         # Generate non-streaming
@@ -250,7 +261,7 @@ Focus on clear, concise, and evidence-based improvements that align with the ove
         with torch.no_grad():
             output_ids = model.generate(
                 input_ids,
-                attention_mask=attention_mask,  # 添加注意力掩码
                 max_new_tokens=max_new_tokens,
                 do_sample=(temperature > 0),
                 temperature=temperature if temperature > 0 else 1.0,

             # Print progress
             print(f"Processed page {page_num+1}/{len(doc)}")
+            # 检查是否已经达到15000个token的限制
+            if len(full_text.split()) > 15000:
+                print("Reached 15000 token limit, stopping extraction")
+                break
         # Clear GPU memory
         del pixel_values, outputs
         torch.cuda.empty_cache()
+        # 确保不超过15000个token
+        words = full_text.split()
+        if len(words) > 15000:
+            full_text = " ".join(words[:15000])
+            print(f"Truncated paper content to 15000 tokens")
         return full_text
     except Exception as e:
         import traceback
             add_generation_prompt=True
         )
+        # Check input length and truncate to 16384 tokens before encoding
         input_tokens = tokenizer.encode(text)
+        if len(input_tokens) > 16384:  # 模型的最大上下文长度
+            input_tokens = input_tokens[:16384]
             text = tokenizer.decode(input_tokens)
+            print(f"Input truncated to 16384 tokens")
         progress(0.5, desc="Generating improved text...")
         # Generate non-streaming
         with torch.no_grad():
             output_ids = model.generate(
                 input_ids,
+                attention_mask=attention_mask,
                 max_new_tokens=max_new_tokens,
                 do_sample=(temperature > 0),
                 temperature=temperature if temperature > 0 else 1.0,