- MySQL元数据概览
- TABLES
- COLUMNS
- 联TABLES和COLUMNS
- 查建表语句
- E-R图
- 批量表抽样,查看具体数据(Python程序)
查看表注释、行数
SELECt `TABLE_NAME` -- 表名 ,`TABLE_TYPE` -- 表类型 ,`TABLE_COMMENT` -- 表注释 ,`TABLE_ROWS` -- 行数 ,`AVG_ROW_LENGTH` -- 平均行长 ,`CREATE_TIME` -- 创建时间 ,`UPDATE_TIME` -- 更新时间 FROM `information_schema`.`TABLES` WHERe `TABLE_SCHEMA`='库名';COLUMNS
查看列注释
SELECt `COLUMN_NAME` -- 列名 ,`COLUMN_DEFAULT` -- 列默认值 ,`IS_NULLABLE` -- 是否允许NULL ,`COLUMN_TYPE` -- 类型 ,`COLUMN_KEY` -- 列键 ,`COLUMN_COMMENT` -- 列注释 FROM `information_schema`.`COLUMNS` WHERe `TABLE_SCHEMA`='库名' AND `TABLE_NAME`='表名';
查看表的主键、时间字段、列数、注释缺失数量
SELECt
`TABLE_NAME` -- 表名
,COUNT(1) -- 列数
,TRIm("," FROM GROUP_CONCAt(DISTINCT IF(`COLUMN_KEY`='PRI',`COLUMN_NAME`,""))) -- 主键
,TRIm("," FROM GROUP_CONCAt(DISTINCT IF(`DATA_TYPE` IN ('timestamp','datetime'),`COLUMN_NAME`,""))) -- 时间字段
,SUM(ISNULL(`COLUMN_COMMENT`))+SUM(IF(COLUMN_COMMENT='',1,0)) -- 注释缺失数量
FROM `information_schema`.`COLUMNS`
WHERe `TABLE_SCHEMA`='库名'
GROUP BY `TABLE_NAME`;
联TABLES和COLUMNSGROUP_CONCAT为连接字符串的聚合函数,默认连接符是英文逗号
SELECt
a.`TABLE_NAME` AS `表名`
,a.`TABLE_COMMENT` AS `表注释`
,a.`TABLE_ROWS` AS `行数`
,a.`AVG_ROW_LENGTH` AS `平均行长`
,a.`CREATE_TIME` AS `创建时间`
,a.`UPDATE_TIME` AS `更新时间`
,b.`col_cnt` AS `列数`
,b.`pk` AS `主键`
,b.`t` AS `时间字段`
,b.`null_cnt` AS `缺失注释的列数`
FROM `information_schema`.`TABLES` a
INNER JOIN (
SELECt
`TABLE_NAME`
,COUNT(1) AS `col_cnt`
,TRIm("," FROM GROUP_CONCAt(DISTINCT IF(`COLUMN_KEY`='PRI',`COLUMN_NAME`,""))) AS `pk`
,TRIm("," FROM GROUP_CONCAt(DISTINCT IF(`DATA_TYPE` IN ('timestamp','datetime'),`COLUMN_NAME`,""))) AS `t`
,SUM(ISNULL(`COLUMN_COMMENT`))+SUM(IF(COLUMN_COMMENT='',1,0)) AS `null_cnt`
FROM `information_schema`.`COLUMNS`
WHERe `TABLE_SCHEMA`='库名'
GROUP BY `TABLE_NAME`
)b ON b.`TABLE_NAME`=a.`TABLE_NAME`
WHERe `TABLE_TYPE`='BASE TABLE';
查建表语句效果
单表
SHOW CREATE TABLE `表名`;
导出某个库的全部建表语句
mysqldump --no-data '库名' -u'root' -p > 文件名.sqlE-R图
批量表抽样,查看具体数据(Python程序)利用一些数据库工具来创建 E-R图,下面以 DBeaver 为例
from pymysql import Connection # conda install pymysql
import pandas as pd # conda install pandas openpyxl
class Mysql:
def __init__(self, **kwargs):
self.db = Connection(
user=kwargs.pop('user', 'root'),
password=kwargs.pop('password'),
host=kwargs.pop('host', 'localhost'),
database=kwargs.pop('database'),
port=kwargs.pop('port', 3306),
charset=kwargs.pop('charset', 'UTF8'))
def __del__(self):
self.db.close()
def db2sheets(self, queries, prefix):
"""
保存为多sheets的Excel文件
:param queries: 字典{sheet名:sql}
:param prefix: 文件名前缀
"""
writer = pd.ExcelWriter(prefix.replace('.xlsx', '') + '.xlsx')
for sheet_name, query in queries.items():
df = pd.read_sql_query(query, self.db)
# if 'url' in df.columns:df['url'] = "'" + df['url']
df.to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
if __name__ == '__main__':
db = '库名'
# 批量的表,粘到这里
tables = '''
tb1
tb2
tb3
'''.strip().split()
# 抽样SQL(可选择乱序或正序)
_queries = {t: 'SELECT * FROM {} ORDER BY RAND() LIMIT 99'.format(t) for t in tables}
# _queries = {t: 'SELECt * FROM {} LIMIT 99'.format(t) for t in tables}
# 导出为Excel
Mysql(pwd='密码', database=db).db2sheets(_queries, db)



