import pandas as pd
# List all available datasets
datasets = client.datasets.list()
for dataset in datasets:
print(f"Dataset: {dataset['name']} ({dataset['example_count']} examples)")
# Get a specific dataset with all examples
dataset = client.datasets.get_dataset(dataset="qa-evaluation")
print(f"Dataset {dataset.name} has {len(dataset)} examples")
# Convert dataset to pandas DataFrame for analysis
df = dataset.to_dataframe()
print(df.columns) # Index(['input', 'output', 'metadata'], dtype='object')
# Create a new dataset from dictionaries
dataset = client.datasets.create_dataset(
name="customer-support-qa",
dataset_description="Q&A dataset for customer support evaluation",
inputs=[
{"question": "How do I reset my password?"},
{"question": "What's your return policy?"},
{"question": "How do I track my order?"}
],
outputs=[
{"answer": "You can reset your password by clicking the 'Forgot Password' link on the login page."},
{"answer": "We offer 30-day returns for unused items in original packaging."},
{"answer": "You can track your order using the tracking number sent to your email."}
],
metadata=[
{"category": "account", "difficulty": "easy"},
{"category": "policy", "difficulty": "medium"},
{"category": "orders", "difficulty": "easy"}
]
)
# Create dataset from pandas DataFrame
df = pd.DataFrame({
"prompt": ["Hello", "Hi there", "Good morning"],
"response": ["Hi! How can I help?", "Hello! What can I do for you?", "Good morning! How may I assist?"],
"sentiment": ["neutral", "positive", "positive"],
"length": [20, 25, 30]
})
dataset = client.datasets.create_dataset(
name="greeting-responses",
dataframe=df,
input_keys=["prompt"], # Columns to use as input
output_keys=["response"], # Columns to use as expected output
metadata_keys=["sentiment", "length"] # Additional metadata columns
)