# Create a new DataFrame with input and output columns
eval_df = spans_df[["context.span_id", "attributes.input.value", "attributes.output.value"]].copy()
eval_df.set_index("context.span_id", inplace=True)
# Combine system and user prompts from the traces
def get_prompt(input_value):
if isinstance(input_value, str):
input_value = json.loads(input_value)
system_prompt = input_value["messages"][0]["content"]
user_prompt = input_value["messages"][1]["content"]
return system_prompt + "\n" + user_prompt
# Get the responses from the traces
def get_response(output_value):
if isinstance(output_value, str):
output_value = json.loads(output_value)
return output_value["choices"][0]["message"]["content"]
# Create a list of prompts and associated responses
prompts = [get_prompt(input_value) for input_value in eval_df["attributes.input.value"]]
responses = [get_response(output_value) for output_value in eval_df["attributes.output.value"]]
eval_df["prompt"] = prompts
eval_df["response"] = responses