code
def intersection_detector( context: str, answer: str, length_cutoff: int = 3, ) -> dict[str, float]: """ Check hallucinations using token intersection metrics Parameters ---------- context : str Context provided for RAG answer : str Answer from an LLM length_cutoff : int If no. tokens in the answer is smaller than length_cutoff, return scores of 1.0 Returns ------- dict[str, float] Token intersection and BLEU scores """ # populate with relevant stopwords such as articles stopword_set = {} # remove punctuation and lowercase context = re.sub(r"[^\w\s]", "", context).lower() answer = re.sub(r"[^\w\s]", "", answer).lower() # calculate metrics if len(answer) >= length_cutoff: # calculate token intersection context_split = {term for term in context if term not in stopword_set} answer_split = re.compile(r"\w+").findall(answer) answer_split = {term for term in answer_split if term not in stopword_set} intersection = sum([term in context_split for term in answer_split]) / len(answer_split) # calculate BLEU score bleu = evaluate.load("bleu") bleu_score = bleu.compute(predictions=[answer], references=[context])["precisions"] bleu_score = sum(bleu_score) / len(bleu_score) return { "intersection": 1 - intersection, "bleu": 1 - bleu_score, } return {"intersection": 0, "bleu": 0}
Authors
Sources
- Detect hallucinations for RAG-based systems - AWS aws.amazon.com via serper
Referenced by nodes (1)
- RAG concept