Skip to content

더미 데이터 생성

Redddy edited this page Oct 15, 2024 · 3 revisions

파이썬을 사용하여 더미 데이터 생성하기

DB커넥션과 더미 데이터 생성을 위한 외부 라이브러리 추가

  • mysql-connector-python: mysql 접속을 위한 라이브러리
  • faker: 그럴듯한 더미 데이터를 생성하기 위한 라이브러리

설치 명령어

pip install mysql-connector-python faker

schema.sql


파이썬 스크립트

import mysql.connector
from faker import Faker
import random

# MySQL database connection
connection = mysql.connector.connect(
    host="호스트",  # ex) localhost
    user="유저", # ex) root
    password="비밀번호",
    database="db 이름"
)

cursor = connection.cursor()
faker = Faker()

# Insert dummy data into the member table
def insert_member():
    query = """
        INSERT INTO member (created_at, access_token, profile_image, provider_login_id, provider_user_id, user_name) 
        VALUES (%s, %s, %s, %s, %s, %s)
    """
    values = (faker.date_time_this_year(), faker.sha256(), faker.image_url(), faker.domain_word(), faker.uuid4(), faker.user_name())
    cursor.execute(query, values)
    connection.commit()
    return cursor.lastrowid

# Insert dummy data into the pair_room table
def insert_pair_room():
    query = """
        INSERT INTO pair_room (access_code, created_at, navigator, driver, status, mission_url) 
        VALUES (%s, %s, %s, %s, %s, %s)
    """
    # Ensure navigator and driver are different and each is limited to 10 characters
    navigator = faker.user_name()[:10]
    driver = faker.user_name()[:10]
    mission_url = ["", faker.url()][random.randrange(0, 2)];
    while navigator == driver:
        driver = faker.user_name()[:10]  # Regenerate if navigator and driver are the same

    values = (faker.lexify('??????'), faker.date_time_this_year(), navigator, driver, random.choice(['IN_PROGRESS', 'COMPLETED']), mission_url)
    cursor.execute(query, values)
    connection.commit()
    return cursor.lastrowid

# Insert dummy data into the pair_room_member table
def insert_pair_room_member(pair_room_id, member_id):
    query = """
        INSERT INTO pair_room_member (pair_room_id, member_id)
        VALUES (%s, %s)
    """
    values = (pair_room_id, member_id)
    cursor.execute(query, values)
    connection.commit()

# Insert dummy data into the category table
def insert_category(pair_room_id):
    query = """
        INSERT INTO category (created_at, category_name, pair_room_id) 
        VALUES (%s, %s, %s)
    """
    values = (faker.date_time_this_year(), faker.word(), pair_room_id)
    cursor.execute(query, values)
    connection.commit()
    return cursor.lastrowid

# Insert dummy data into the reference_link table
def insert_reference_link(category_id, pair_room_id):
    query = """
        INSERT INTO reference_link (created_at, url, pair_room_id, category_id) 
        VALUES (%s, %s, %s, %s)
    """
    values = (faker.date_time_this_year(), faker.url(), pair_room_id, category_id)
    cursor.execute(query, values)
    connection.commit()
    return cursor.lastrowid

# Insert dummy data into the open_graph table
def insert_open_graph(reference_link_id):
    query = """
        INSERT INTO open_graph (created_at, description, head_title, image, open_graph_title, reference_link_id) 
        VALUES (%s, %s, %s, %s, %s, %s)
    """
    values = (faker.date_time_this_year(), faker.sentence(), faker.sentence(), faker.image_url(), faker.sentence(), reference_link_id)
    cursor.execute(query, values)
    connection.commit()

# Insert dummy data into the timer table
def insert_timer(pair_room_id):
    query = """
        INSERT INTO timer (pair_room_id, duration, remaining_time, created_at) 
        VALUES (%s, %s, %s, %s)
    """
    values = (pair_room_id, random.randint(10, 500), random.randint(0, 500), faker.date_time_this_year())
    cursor.execute(query, values)
    connection.commit()

# Insert dummy data into the todo table
def insert_todo(pair_room_id):
    query = """
        INSERT INTO todo (is_checked, sort, created_at, content, pair_room_id) 
        VALUES (%s, %s, %s, %s, %s)
    """
    values = (random.choice([0, 1]), random.uniform(1, 1000), faker.date_time_this_year(), faker.sentence(), pair_room_id)
    cursor.execute(query, values)
    connection.commit()

# Generate dummy data
def generate_data(n, m):
    for _ in range(n):
        # Insert member

        member_id = insert_member()

        for _ in range(m):
            # Insert pair_room
            pair_room_id = insert_pair_room()

            # Insert pair_room_member (relationship between pair_room and member)
            insert_pair_room_member(pair_room_id, member_id)

            # Insert timer
            insert_timer(pair_room_id)

            for _ in range(random.randrange(1, 5)):
                # Insert category
                category_id = insert_category(pair_room_id)

                for _ in range(random.randrange(1, 6)):
                    # Insert reference_link
                    reference_link_id = insert_reference_link(category_id, pair_room_id)

                    # Insert open_graph
                    insert_open_graph(reference_link_id)

            for _ in range(random.randrange(1, 15)):
                # Insert todo
                insert_todo(pair_room_id)

# Run the data generation
# n,m 
# n명의 멤버가 각각 m개의 방을 생성
generate_data(1000, 50) 

# Close the database connection
cursor.close()
connection.close()

파일 생성

touch script.py # 파일 생성
sudo nano script.py # 파일 편집기 실행
# 스크립트 작성
# ctrl + O 저장
# 파일명 확인하고 enter
# ctrl + X 파일 편집기 종료

파일 실행

python3 script.py
Clone this wiki locally