CREATE DATABASE IF NOT EXISTS operational
CREATE DATABASE IF NOT EXISTS operational
CASE 1:
CREATE DATABASE IF NOT EXISTS operational_analytics;
USE operational_analytics;
ds DATE,
job_id INT,
actor_id INT,
event VARCHAR(50),
language VARCHAR(50),
time_spent INT,
org CHAR(1)
);
2. Throughput Analysis
Objective: Calculate the 7-day rolling average of throughput
(number of events per second).
SELECT
ds,
AVG(event_count / 86400) OVER (ORDER BY ds ROWS
BETWEEN 6 PRECEDING AND CURRENT ROW) AS
rolling_throughput
FROM (
SELECT
DATE(ds) AS ds,
COUNT(event) AS event_count
FROM
job_data
GROUP BY
DATE(ds)
) AS daily_events
ORDER BY ds;
Explanation:
The inner query counts the total events per day.
The outer query calculates a 7-day rolling average
throughput, where event_count / 86400 represents events
per second (assuming each day has 86400 seconds).
3. Language Share Analysis
Objective: Calculate the percentage share of each language in
the last 30 days.
SELECT
language,
ROUND((COUNT(language) * 100.0) / (SELECT COUNT(*)
FROM job_data WHERE DATE(ds) >= DATE_SUB(CURDATE(),
INTERVAL 30 DAY)), 2) AS language_share_percentage
FROM
job_data
WHERE
DATE(ds) >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY
language
ORDER BY
language_share_percentage DESC;
Explanation: This query calculates the percentage of each
language's usage over the past 30 days by dividing the count
of each language by the total count of events in that period.
4. Duplicate Rows Detection
Objective: Identify duplicate rows in the data.
SELECT
ds, job_id, actor_id, event, language, time_spent, org,
COUNT(*) AS duplicate_count
FROM
job_data
GROUP BY
ds, job_id, actor_id, event, language, time_spent, org
HAVING
duplicate_count > 1;
Explanation: This query groups the data by all columns and
uses HAVING to show only rows with duplicate entries (where
the count is more than 1).
CASE 2:
CREATE DATABASE IF NOT EXISTS invesgtigatingspikes;
Data Aggregation
SELECT user_id, COUNT(*) AS event_count
FROM events
GROUP BY user_id
ORDER BY event_count DESC;