Bab 5 Visualisasi & Dashboard

Pertemuan kali ini kita akan fokus pada pelatihan visualisasi yang sudah perlah dipelajari pada wokshop sebelumnya yaitu Data Analytics with Python. Kita akan mulai dengan mengakses database terlebih dahulu, kemudian melakukan kueri sesuai kebutuhan analisis, selanjutnya menampilkannya dalam bentuk visual, dan pada akhirnya membentuk dashboard.

5.1 Buat Database Baru

Pertama-tama buatlah koneksi Python ke database MySQL menggunakan dan mengatifkan XAMPP.

# import module 
import mysql.connector

# koneksi server
db1 = mysql.connector.connect(
  host='localhost',
  user='bakti',
  passwd='123'
)

# membuat objek 'cursor` sebagai kata kunci eksekusi SQL
cursor1 = db1.cursor()

# Perintah SQL
sql='''CREATE DATABASE IF NOT EXISTS dashboard'''

# Eksekusi perintah  SQL
cursor1.execute(sql)

5.2 Import Tabel ke Database

Berikut ini adalah cara untuk menambahkan tabel ke database dengan menggunakan file csv yang telah desediakan.

5.2.1 Impot Data CSV

import pandas as pd

# read data CSV
link1 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/users.csv'
link2 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/products.csv'
link3 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/orders.csv'
link4 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/order_items.csv'
link5 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/inventory_items.csv'
link6 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/events.csv'
link7 = 'https://raw.githubusercontent.com/dsciencelabs/JNE_Data_Analytics_PySQL/main/data/data_latihan/distribution_centers.csv'

df1 = pd.read_csv(link1, sep=';' )
df2 = pd.read_csv(link2, sep=';' )
df3 = pd.read_csv(link3, sep=';' ) 
df4 = pd.read_csv(link4, sep=';' )
df5 = pd.read_csv(link5, sep=';' )
df6 = pd.read_csv(link6, sep=';' )
df7 = pd.read_csv(link7, sep=';' )

5.2.2 Konversi Tanggal & Waktu

Merubah struktur tanggal dan waktu menggunakan Pandas.

df1['created_at'] = df1['created_at'].apply(pd.to_datetime)
df3[df3.columns[4:8]] = df3[df3.columns[4:8]].apply(pd.to_datetime)
df4[df4.columns[6:10]] = df4[df4.columns[6:10]].apply(pd.to_datetime)
df5[df5.columns[2:4]] = df5[df5.columns[2:4]].apply(pd.to_datetime)
df6['created_at'] = df6['created_at'].apply(pd.to_datetime)

5.2.3 Simpan Tabel ke Database

Berikut ini adalah langkah untun menyimpan tabel ke database.

import pymysql
from sqlalchemy import create_engine

# membuat engine
engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
                       .format(user="bakti",
                               pw="123",
                               db="dashboard"))

# df1.to_sql('users', engine, if_exists = 'append', index = True)
df1.to_sql('users', engine, if_exists='replace', index = False)
df2.to_sql('products', engine, if_exists='replace', index = False)
df3.to_sql('orders', engine, if_exists='replace', index = False)
df4.to_sql('order_items', engine, if_exists='replace', index = False)
df5.to_sql('inventory_items', engine, if_exists='replace', index = False)
df6.to_sql('events', engine, if_exists='replace', index = False)
df7.to_sql('distribution_centers', engine, if_exists='replace', index = False)

5.3 Latihan

5.3.1 Soal 1

Proporsi pelanggan berdasarkan jenis kelamin.

# Perintah SQL
sql = ''' 
SELECT 
  o.gender,
  SUM(oi.sale_price*o.num_of_item) revenue,
  SUM(o.num_of_item) quantity
FROM `order_items` oi
LEFT JOIN `orders` o
ON oi.order_id = o.order_id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1
ORDER BY 2
'''

# Eksekusi perintah  SQL
df1=pd.read_sql(sql, engine)
df1

Visualisasi dengan Pie-Chart

import plotly.express as px
fig = px.pie(df1, values='revenue', names='gender', hole=.3,
      title='Revenue Pelanggan Berdasarkan Gender')
fig.show()

5.3.2 Soal 2

Proporsi pelanggan berdasarkan kategori usianya?

# Perintah SQL
sql = ''' 
SELECT
  CASE 
    WHEN u.age <15 THEN 'Kids'
    WHEN u.age BETWEEN 15 AND 24 THEN 'Teenager'
    WHEN u.age BETWEEN 25 AND 50 THEN 'Adult'
    WHEN u.age >50 THEN 'Eldery' END AS age_group,
  COUNT(DISTINCT oi.user_id) total_customer
FROM `order_items` oi
LEFT JOIN `users` u
ON oi.user_id = u.id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1
ORDER BY 2 DESC
'''

# Eksekusi perintah  SQL
df2=pd.read_sql(sql, engine)
df2

Visualisasi dengan Bar-Chart

fig = px.bar(df2, x='age_group', y='total_customer', color='age_group')
fig.show()

5.3.3 Soal 3

Urutkan jumlah pelanggan terbanyak berdasarkan Negara, dan Jenis Kelaminnya.

# Perintah SQL
sql = ''' 
WITH
cust AS (
  SELECT 
    DISTINCT oi.user_id,
    SUM(CASE WHEN u.gender = 'M' THEN 1 ELSE null END) AS male,
    SUM(CASE WHEN u.gender = 'F' THEN 1 ELSE null END) AS female,
    u.country AS country
  FROM `order_items` AS oi
  INNER JOIN `users` AS u  
  ON oi.user_id = u.id
  WHERE oi.status NOT IN ('Cancelled','Returned')
  GROUP BY 1, 4
)

SELECT
  c.country,
  COUNT(DISTINCT c.user_id) AS customers_count,
  COUNT(c.female) AS female,
  COUNT(c.male) AS male
FROM cust AS c
GROUP BY 1
ORDER BY 2 DESC
'''

# Eksekusi perintah  SQL
df3=pd.read_sql(sql, engine)
df3
import plotly.express as px
fig = px.bar(df3, x="country", 
                  y=["customers_count","female", "male"], text_auto=True)
fig.show()

5.3.4 Soal 4

Top 10 Brand apa paling banyak revenue dan juga paling banyak terjual?

# Perintah SQL
sql = ''' 
SELECT 
  p.brand,
  SUM(oi.sale_price*o.num_of_item) AS revenue,
  SUM(o.num_of_item) AS quantity
FROM `order_items` AS oi
  LEFT JOIN `orders` AS o ON oi.order_id = o.order_id
  LEFT JOIN `products` AS p ON oi.product_id = p.id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1
ORDER BY 3 DESC
LIMIT 10
'''

# Eksekusi perintah  SQL
pd.read_sql(sql, engine)

5.3.5 Soal 5

Media pemasaran terbaik yang sebaiknya digunakan Marketing?

# Perintah SQL
sql = ''' 
SELECT
  u.traffic_source, 
  COUNT(DISTINCT oi.user_id) total_customer
FROM `order_items` oi
LEFT JOIN `users` u
ON oi.user_id = u.id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1
ORDER BY 2 DESC
'''

# Eksekusi perintah  SQL
pd.read_sql(sql, engine)

5.3.6 Soal 6

Berikan alamat email dan user_id dari 10 pelanggan dengan total pembelian terbanyak.

# Perintah SQL
sql = ''' 
SELECT 
  oi.user_id,
  u.email,
  SUM(oi.sale_price*o.num_of_item) total_purchase
FROM `order_items` oi
  LEFT JOIN `users` u ON oi.user_id = u.id
  LEFT JOIN `orders` o ON oi.order_id = o.order_id
WHERE oi.status NOT IN ('Cancelled','Returned')
GROUP BY 1, 2
ORDER BY 3 DESC
LIMIT 10
'''

# Eksekusi perintah  SQL
pd.read_sql(sql, engine)

5.4 Tugas

Top 10 Merek produk yang paling banyak dibatalkan dan dikembalikan. Top 10 kategori produk yang paling banyak dibatalkan dan dikembalikan.