Spaces:

ai-law-society-lab
/

Federal_Caselaw_index

Sleeping

App Files Files Community

dominiks commited on Apr 22

Commit

4836358

verified ·

1 Parent(s): ec88dba

Upload app_federal.py

Browse files

Files changed (1) hide show

app_federal.py +3 -8

app_federal.py CHANGED Viewed

@@ -80,7 +80,7 @@ def text_prompt_call(model_to_be_used, system_prompt, user_prompt ):
 def format_metadata_as_str(metadata):
     try:
-        out =  metadata["case_name"] + ", " + metadata["court_short_name"] + ", " + metadata["date_filed"] + ", precedential status " + metadata["precedential_status"]
     except:
         out = ""
     return out
@@ -98,15 +98,11 @@ def show_user_query(user_message, history):
 def format_metadata_for_reranking(metadata, text, idx):
     #print (metadata)
     #keys = [["case_name", "case name"], ["court_short_name", "court"], ["date_filed", "year"], ["citation_count", "citation count"], ["precedential_status", "precedential status"]]
-    keys = [["court_short_name", "court"], ["date_filed", "year"], ["citation_count", "citation count"], ["precedential_status", "precedential status"]]
     out_str = []
     out_str = ["<id>" + str(idx) + "</id>"]
     for key in keys:
         i,j = key
-        print ("i, j", i,j)
-        print (metadata)
-        print ("(metadata[i]", metadata[i])
         out_str.append("<" + j + ">" + str(metadata[i]) + "</" + j + ">")
     out_str.append("<paragraph>" + " ".join(text.split()) + "</paragraph>")
     return "\n".join(out_str) + "\n"
@@ -241,12 +237,11 @@ def run_dense_retrieval(query):
 def rerank_with_chatGPT(query, search_results):
     search_results_as_dict = {str(i["index"]):i for i in search_results}
-    system_prompt = """You are given a list of search results for a query. Rerank the search results such that the paragraphs answering the query in the most comprehensive way are listed first. If multiple paragraphs answer the question, prioritize the reranking in the following order:
     1. prioritize metadata according to the query.
     2. If the query doesn't ask for specific metadata, prioritize paragraphs from higher courts (Supreme Court first, Circuit courts next, district courts last)
     3. Prioritize paragraphs which have higher citation counts.
     4. Prioritize parapgrahs from more recent opinions.
-    5. Prioritize paragraphs which are published compared to unpublished ones.
     Return a python list with the ids of the five highest ranking results, nothing else.
     <query>""" + query + "</query>\n\n"
     user_prompt = []

 def format_metadata_as_str(metadata):
     try:
+        out =  metadata["case_name"] + ", " + metadata["court_short_name"] + ", " + metadata["date_filed"] #+ ", precedential status " + metadata["precedential_status"]
     except:
         out = ""
     return out
 def format_metadata_for_reranking(metadata, text, idx):
     #print (metadata)
     #keys = [["case_name", "case name"], ["court_short_name", "court"], ["date_filed", "year"], ["citation_count", "citation count"], ["precedential_status", "precedential status"]]
+    keys = [["court_short_name", "court"], ["date_filed", "year"], ["citation_count", "citation count"]]# , ["precedential_status", "precedential status"]]
     out_str = []
     out_str = ["<id>" + str(idx) + "</id>"]
     for key in keys:
         i,j = key
         out_str.append("<" + j + ">" + str(metadata[i]) + "</" + j + ">")
     out_str.append("<paragraph>" + " ".join(text.split()) + "</paragraph>")
     return "\n".join(out_str) + "\n"
 def rerank_with_chatGPT(query, search_results):
     search_results_as_dict = {str(i["index"]):i for i in search_results}
+    system_prompt = """You are given a list of search results for a query. Rerank the search results such that the paragraphs answering the query in the most comprehensive way are listed first. Additionaly, prioritize reranking in the following order:
     1. prioritize metadata according to the query.
     2. If the query doesn't ask for specific metadata, prioritize paragraphs from higher courts (Supreme Court first, Circuit courts next, district courts last)
     3. Prioritize paragraphs which have higher citation counts.
     4. Prioritize parapgrahs from more recent opinions.
     Return a python list with the ids of the five highest ranking results, nothing else.
     <query>""" + query + "</query>\n\n"
     user_prompt = []