Bab 5 Visualisasi & Dashboard
Pertemuan kali ini kita akan fokus pada pelatihan visualisasi yang sudah perlah dipelajari pada wokshop sebelumnya yaitu Data Analytics with Python. Kita akan mulai dengan mengakses database terlebih dahulu, kemudian melakukan kueri sesuai kebutuhan analisis, selanjutnya menampilkannya dalam bentuk visual, dan pada akhirnya membentuk dashboard.
5.1 Buat Database Baru
Pertama-tama buatlah koneksi Python ke database MySQL menggunakan dan mengatifkan XAMPP.
# import module
import mysql.connector
# koneksi server
db1 = mysql.connector.connect(
host='localhost',
user='bakti',
passwd='123'
)
# membuat objek 'cursor` sebagai kata kunci eksekusi SQL
cursor1 = db1.cursor()
# Perintah SQL
sql='''CREATE DATABASE IF NOT EXISTS dashboard'''
# Eksekusi perintah SQL
cursor1.execute(sql)
5.2 Import Tabel ke Database
Berikut ini adalah cara untuk menambahkan tabel ke database dengan menggunakan file csv yang telah desediakan.
5.2.1 Impot Data CSV
import pandas as pd
# read data CSV
link1 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/users.csv'
link2 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/products.csv'
link3 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/orders.csv'
link4 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/order_items.csv'
link5 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/inventory_items.csv'
link6 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/events.csv'
link7 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/distribution_centers.csv'
df1 = pd.read_csv(link1, sep=';' )
df2 = pd.read_csv(link2, sep=';' )
df3 = pd.read_csv(link3, sep=';' )
df4 = pd.read_csv(link4, sep=';' )
df5 = pd.read_csv(link5, sep=';' )
df6 = pd.read_csv(link6, sep=';' )
df7 = pd.read_csv(link7, sep=';' )
5.2.2 Konversi Tanggal & Waktu
Merubah struktur tanggal dan waktu menggunakan Pandas.
df1['created_at'] = df1['created_at'].apply(pd.to_datetime)
df3[df3.columns[4:8]] = df3[df3.columns[4:8]].apply(pd.to_datetime)
df4[df4.columns[6:10]] = df4[df4.columns[6:10]].apply(pd.to_datetime)
df5[df5.columns[2:4]] = df5[df5.columns[2:4]].apply(pd.to_datetime)
df6['created_at'] = df6['created_at'].apply(pd.to_datetime)
5.2.3 Simpan Tabel ke Database
Berikut ini adalah langkah untun menyimpan tabel ke database.
import pymysql
from sqlalchemy import create_engine
# membuat engine
engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
.format(user="bakti",
pw="123",
db="dashboard"))
# df1.to_sql('users', engine, if_exists = 'append', index = True)
df1.to_sql('users', engine, if_exists='replace', index = False)
df2.to_sql('products', engine, if_exists='replace', index = False)
df3.to_sql('orders', engine, if_exists='replace', index = False)
df4.to_sql('order_items', engine, if_exists='replace', index = False)
df5.to_sql('inventory_items', engine, if_exists='replace', index = False)
df6.to_sql('events', engine, if_exists='replace', index = False)
df7.to_sql('distribution_centers', engine, if_exists='replace', index = False)
5.3 Latihan
5.3.1 Soal 1
Proporsi pelanggan berdasarkan jenis kelamin.
# Perintah SQL
sql = '''
SELECT
o.gender,
SUM(oi.sale_price*o.num_of_item) revenue,
SUM(o.num_of_item) quantity
FROM `order_items` oi
LEFT JOIN `orders` o
ON oi.order_id = o.order_id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1
ORDER BY 2
'''
# Eksekusi perintah SQL
df1=pd.read_sql(sql, engine)
df1
Visualisasi dengan Pie-Chart
5.3.2 Soal 2
Proporsi pelanggan berdasarkan kategori usianya?
# Perintah SQL
sql = '''
SELECT
CASE
WHEN u.age <15 THEN 'Kids'
WHEN u.age BETWEEN 15 AND 24 THEN 'Teenager'
WHEN u.age BETWEEN 25 AND 50 THEN 'Adult'
WHEN u.age >50 THEN 'Eldery' END AS age_group,
COUNT(DISTINCT oi.user_id) total_customer
FROM `order_items` oi
LEFT JOIN `users` u
ON oi.user_id = u.id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1
ORDER BY 2 DESC
'''
# Eksekusi perintah SQL
df2=pd.read_sql(sql, engine)
df2
Visualisasi dengan Bar-Chart
5.3.3 Soal 3
Urutkan jumlah pelanggan terbanyak berdasarkan Negara, dan Jenis Kelaminnya.
# Perintah SQL
sql = '''
WITH
cust AS (
SELECT
DISTINCT oi.user_id,
SUM(CASE WHEN u.gender = 'M' THEN 1 ELSE null END) AS male,
SUM(CASE WHEN u.gender = 'F' THEN 1 ELSE null END) AS female,
u.country AS country
FROM `order_items` AS oi
INNER JOIN `users` AS u
ON oi.user_id = u.id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1, 4
)
SELECT
c.country,
COUNT(DISTINCT c.user_id) AS customers_count,
COUNT(c.female) AS female,
COUNT(c.male) AS male
FROM cust AS c
GROUP BY 1
ORDER BY 2 DESC
'''
# Eksekusi perintah SQL
df3=pd.read_sql(sql, engine)
df3
5.3.4 Soal 4
Top 10 Brand apa paling banyak revenue dan juga paling banyak terjual?
# Perintah SQL
sql = '''
SELECT
p.brand,
SUM(oi.sale_price*o.num_of_item) AS revenue,
SUM(o.num_of_item) AS quantity
FROM `order_items` AS oi
LEFT JOIN `orders` AS o ON oi.order_id = o.order_id
LEFT JOIN `products` AS p ON oi.product_id = p.id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1
ORDER BY 3 DESC
LIMIT 10
'''
# Eksekusi perintah SQL
pd.read_sql(sql, engine)
5.3.6 Soal 6
Berikan alamat email dan user_id dari 10 pelanggan dengan total pembelian terbanyak.
# Perintah SQL
sql = '''
SELECT
oi.user_id,
u.email,
SUM(oi.sale_price*o.num_of_item) total_purchase
FROM `order_items` oi
LEFT JOIN `users` u ON oi.user_id = u.id
LEFT JOIN `orders` o ON oi.order_id = o.order_id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1, 2
ORDER BY 3 DESC
LIMIT 10
'''
# Eksekusi perintah SQL
pd.read_sql(sql, engine)