fromragas.metricsimportAspectCritic,SimpleCriteriaScorefromragas.dataset_schemaimportMultiTurnSample,EvaluationDatasetfromragas.messagesimportHumanMessage,AIMessagefromragasimportevaluatesample1=MultiTurnSample(user_input=[HumanMessage(content="I need to increase my credit limit and check why my last transaction at Walmart was declined."),AIMessage(content="Let's check the declined transaction first, the $234.56 transaction at Walmart on November 20th was declined due to insufficient funds."),HumanMessage(content="That's not possible, I had enough money in my account."),AIMessage(content="I understand your concern. Let me check your account details... I see that while your current balance is $1,500, there was a pending hotel reservation hold of $800 at the time of the Walmart transaction, which reduced your available balance to $700."),HumanMessage(content="Oh, I forgot about the hotel booking."),AIMessage(content="Is there anything else I can assist you with?"),HumanMessage(content="What about increasing my credit limit?"),AIMessage(content="Based on your account history and current credit score of 740, you're eligible for a credit limit increase. The maximum increase available is $2,000. However, I notice you had three declined transactions this month, so I'd recommend reviewing your spending patterns before accepting an increase."),])sample2=MultiTurnSample(user_input=[HumanMessage(content="I need to check why my last transaction at Target was declined."),AIMessage(content="Let's look at the declined transaction. The $145.30 transaction at Target on January 5th was declined due to a suspected fraud alert."),HumanMessage(content="That’s strange, I didn’t report anything suspicious."),AIMessage(content="I understand. Let me look deeper. It seems a fraud prevention team flagged your recent purchase at a different store for unusually high activity, which triggered the decline at Target as well."),HumanMessage(content="Ah, that makes sense. I did shop a lot that day."),AIMessage(content="Is there anything else I can assist you with?"),])
definition="Return 1 if the AI completes all Human requests fully without any rerequests; otherwise, return 0."aspect_critic=AspectCritic(name="forgetfulness_aspect_critic",definition=definition,llm=evaluator_llm,)
definition="Return 1 if the AI stays within the banking domain, providing services like account-related information, credit card details, or general banking advice, and avoids offering financial or investment advice; otherwise, return 0"aspect_critic=AspectCritic(name="Banking Compliance Metric(aspect critic)",definition=definition,llm=evaluator_llm,)
sample1=MultiTurnSample(user_input=[HumanMessage(content="I need to increase my credit limit and check why my last transaction at Walmart was declined."),AIMessage(content="Let's check the declined transaction first, the $234.56 transaction at Walmart on November 20th was declined due to insufficient funds."),HumanMessage(content="That's not possible, I had enough money in my account."),AIMessage(content="I understand your concern. Let me check your account details... I see that while your current balance is $1,500, there was a pending hotel reservation hold of $800 at the time of the Walmart transaction, which reduced your available balance to $700."),HumanMessage(content="Oh, I forgot about the hotel booking."),AIMessage(content="Is there anything else I can assist you with?"),HumanMessage(content="What about increasing my credit limit?"),AIMessage(content="Based on your account history and current credit score of 740, you're eligible for a credit limit increase. The maximum increase available is $2,000. However, I notice you had three declined transactions this month, so I'd recommend reviewing your spending patterns before accepting an increase."),HumanMessage(content="What investment options do you recommend with my spare money?"),AIMessage(content="While I can help you with banking transactions and credit card matters, I'm not authorized to provide investment advice. I'd be happy to connect you with our certified financial advisor who can help you explore investment options."),])sample2=MultiTurnSample(user_input=[HumanMessage(content="Hi there!"),AIMessage(content="Hello! How can I help you today?"),HumanMessage(content="How much money do I have in my bank account?"),AIMessage(content="Your current balance is ₹85,750. Let me know if you’d like details on recent transactions or account services!"),HumanMessage(content="Which are better financial instruments for wealth generation—crypto like Bitcoin or stocks?"),AIMessage(content="Both stocks and cryptocurrencies have the potential for wealth generation, but they come with different risk levels. Stocks are usually more stable, linked to company performance, and can offer long-term growth. Cryptocurrencies, like Bitcoin, are highly volatile but can deliver quick, high returns. Diversification might be a smart approach based on your risk tolerance and goals."),HumanMessage(content="Interesting! Thanks for the advice."),])
japanese_polite_definition="Return 1 if the AI maintains a formal, polite, and respectful tone throughout the conversation, using indirect expressions and avoiding overly casual or direct language; otherwise, return 0."mexican_polite_definition="Return 1 if the AI maintains a warm, friendly, and engaging tone, blending informality with respect and fostering a personable, approachable interaction; otherwise, return 0."jpn_polite_aspect_critic=AspectCritic(name="japanese_polite_aspect_critic",definition=japanese_polite_definition,llm=evaluator_llm,)mex_polite_aspect_critic=AspectCritic(name="mexican_polite_aspect_critic",definition=mexican_polite_definition,llm=evaluator_llm,)
sample1=MultiTurnSample(user_input=[HumanMessage(content="I forgot my PIN. What should I do?"),AIMessage(content="I can assist you in resetting your PIN. May I send a verification code to your registered phone number?"),HumanMessage(content="Yes, please."),AIMessage(content="I have sent the code. Please check and enter it here."),HumanMessage(content="I have entered it."),AIMessage(content="Thank you. Please set a new PIN."),HumanMessage(content="I have set it."),AIMessage(content="Your PIN reset is complete. Thank you for choosing our bank."),])sample2=MultiTurnSample(user_input=[HumanMessage(content="Help! I forgot my PIN."),AIMessage(content="Don't worry! I’ll help you reset it. Do you want me to send a verification code to your registered number?"),HumanMessage(content="Yes, please."),AIMessage(content="Done! Check your phone and enter the code here. "),HumanMessage(content="I have entered it."),AIMessage(content="Great! Now choose a new PIN and keep it in a safe place."),HumanMessage(content="Done."),AIMessage(content="Perfect! All set. If you need more help, I’ll be here."),])
definition="Return 1 if the AI's communication is friendly, approachable, helpful, clear, and concise; otherwise, return 0."aspect_critic=AspectCritic(name="Brand Voice Metric(aspect critic)",definition=definition,llm=evaluator_llm,)
sample=MultiTurnSample(user_input=[HumanMessage(content="Hi! I want to apply for a credit card."),AIMessage(content="Sure! We’ve got a few great options. What kind of benefits are you interested in?"),HumanMessage(content="Travel rewards, mostly."),AIMessage(content="Perfect! Our Travel Rewards Card gives you miles for every dollar spent on flights and hotels. Want to apply now?"),HumanMessage(content="Yes, please."),AIMessage(content="Awesome! Here’s the link to apply. It’s quick and easy. Let me know if you need help!"),])