DataWiz
Mission: Acts as a data analyst for your code, generating SQL/NoSQL queries from natural language, and suggesting schema optimizations.
Multi-Table SQL Generation & Schema Optimization Example
import logging
import time
from typing import List, Dict
class DataWizAgent:
def __init__(self, db_connection: str):
self.db_connection = db_connection
self.cache = {}
logging.basicConfig(level=logging.DEBUG)
def generate_query(self, user_request: str, db_type: str="postgres") -> str:
"""
Translates a complex user request (NL) into an optimized SQL statement.
Uses caching for repeated user requests, partial parse trees, etc.
"""
cache_key = f"{user_request}_{db_type}"
if cache_key in self.cache:
logging.debug("Returning cached query result.")
return self.cache[cache_key]
# Hypothetical advanced NLP parse
parsed_request = self.parse_nlp(user_request)
sql_query = self.construct_sql(parsed_request, db_type)
self.cache[cache_key] = sql_query
logging.debug(f"Generated new SQL query for request: '{user_request}'")
return sql_query
def parse_nlp(self, request: str) -> dict:
# Simulate complex NLP parsing to identify time windows, aggregates, joins
# E.g., "Show total monthly revenue, top 5 sales reps, and product categories from last quarter"
# We build a parse tree or intermediate representation
time.sleep(0.5)
return {
"metrics": ["total_monthly_revenue", "top_5_sales_reps", "product_categories"],
"time_range": {"start": "2025-01-01", "end": "2025-03-31"},
"group_by": ["rep_name", "product_category"],
"order_by": "revenue DESC",
"limit": 5
}
def construct_sql(self, parse_obj: dict, db_type: str) -> str:
# Create a multi-table join query
# This is a conceptual example, actual logic can be more sophisticated
sql = f"""
SELECT
r.rep_name,
c.category_name,
SUM(s.revenue) AS total_revenue
FROM sales s
JOIN reps r ON s.rep_id = r.id
JOIN categories c ON s.category_id = c.id
WHERE s.sale_date >= '{parse_obj['time_range']['start']}'
AND s.sale_date <= '{parse_obj['time_range']['end']}'
GROUP BY r.rep_name, c.category_name
ORDER BY total_revenue DESC
LIMIT {parse_obj['limit']};
"""
return sql.strip()
def suggest_schema_improvements(self, table_names: List[str]) -> List[str]:
"""
Example method that proposes schema index additions or field normalization
based on hypothetical usage stats.
"""
suggestions = []
for tname in table_names:
if tname == "sales":
suggestions.append(f"Add INDEX on {tname}(sale_date) for faster time-range queries.")
if tname == "reps":
suggestions.append(f"Partition {tname} by region to optimize query distribution.")
logging.info(f"Schema suggestions for tables {table_names}: {suggestions}")
return suggestions
# Example usage
if __name__ == "__main__":
datawiz = DataWizAgent(db_connection="postgres://user:pass@localhost:5432/sales_db")
query = datawiz.generate_query(
user_request="Show total monthly revenue, top 5 sales reps, and product categories for last quarter",
db_type="postgres"
)
print("Generated Query:\n", query)
schema_recs = datawiz.suggest_schema_improvements(["sales", "reps", "categories"])
for rec in schema_recs:
print("Schema Recommendation:", rec)
Last updated