diff --git a/devtools/create_data.py b/devtools/create_data.py index ad75e45..a5ade5b 100644 --- a/devtools/create_data.py +++ b/devtools/create_data.py @@ -17,6 +17,7 @@ class User(Base): name = Column(String(50)) age = Column(Integer) + with open(".deploy_mysqld") as f: url = f.readline().strip() @@ -29,13 +30,17 @@ class User(Base): session = Session() if len(sys.argv) > 1 and sys.argv[1] == "random": - ids = iter(random.sample(range(0, 2*1000*1000), 1000*1000)) + ids = iter(random.sample(range(0, 2 * 1000 * 1000), 1000 * 1000)) for i in range(1000): - session.bulk_insert_mappings(User, [{"id": next(ids), "name":"Hello", "age": 10} for j in range(1000)]) + session.bulk_insert_mappings( + User, [{"id": next(ids), "name": "Hello", "age": 10} for j in range(1000)] + ) elif len(sys.argv) > 1 and sys.argv[1] == "drop": User.__table__.drop(bind=engine) else: for i in range(1000): - session.bulk_insert_mappings(User, [{"name":"Hello", "age": 10} for i in range(1000)]) + session.bulk_insert_mappings( + User, [{"name": "Hello", "age": 10} for i in range(1000)] + ) session.commit() diff --git a/devtools/create_instant_table.py b/devtools/create_instant_table.py index fd6a4f0..02db2a6 100644 --- a/devtools/create_instant_table.py +++ b/devtools/create_instant_table.py @@ -8,42 +8,42 @@ with engine.connect() as conn: conn.exec_driver_sql("use test") - conn.exec_driver_sql(''' + conn.exec_driver_sql(""" DROP TABLE IF EXISTS test_for_instant; -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" CREATE TABLE test_for_instant ( id int(11) primary key auto_increment, name varchar(255) NOT NULL, drop1 int(11) NOT NULL, drop2 varchar(255) not null default 'drop2' ) -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" insert into test_for_instant(name, drop1, drop2) values ("original record", 12, "original drop 2 column"); -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" alter table test_for_instant add column add1 varchar(255) default null -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" alter table test_for_instant add column add2 varchar(255) default 'add2 default' -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" insert into test_for_instant (name, drop1, drop2, add2) values ('insert after alter', 93, 'drop2 after alter', 'add2') -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" alter table test_for_instant drop column drop1 -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" alter table test_for_instant add column drop1 int(11) not null default '99' -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" insert into test_for_instant (name, drop1, drop2, add2) values ('insert after first add', 23, 'drop2 after first add', 'add2') -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" alter table test_for_instant add column add3 varchar(255) not null -''') - conn.exec_driver_sql(''' +""") + conn.exec_driver_sql(""" insert into test_for_instant (name, drop1, drop2, add2, add3) values ('insert after second add', 23, 'drop2 after second add', 'add2', 'add3') -''') +""") conn.commit() diff --git a/devtools/create_table.py b/devtools/create_table.py index 83a0368..b9ed96c 100644 --- a/devtools/create_table.py +++ b/devtools/create_table.py @@ -20,7 +20,9 @@ all_type = [ t for t in dir(dmysql.types) - if t.isupper() and t not in ["ARRAY", "NULLTYPE", "STRINGTYPE", "DECIMAL"] and "CHAR" not in t + if t.isupper() + and t not in ["ARRAY", "NULLTYPE", "STRINGTYPE", "DECIMAL"] + and "CHAR" not in t ] Base = declarative_base() all_type_column = [ @@ -33,15 +35,9 @@ ) all_type_column.append(" ENUM = Column(dmysql.ENUM('hello', 'world', 'a'))") all_type_column.append(" SET = Column(dmysql.SET('a', 'b', 'c'))") -all_type_column.append( - " DECIMAL = Column(dmysql.types.DECIMAL(10, 2))" -) -all_type_column.append( - " CHAR = Column(dmysql.types.CHAR(20), nullable=True)" -) -all_type_column.append( - " VARBINARY = Column(dmysql.VARBINARY(203))" -) +all_type_column.append(" DECIMAL = Column(dmysql.types.DECIMAL(10, 2))") +all_type_column.append(" CHAR = Column(dmysql.types.CHAR(20), nullable=True)") +all_type_column.append(" VARBINARY = Column(dmysql.VARBINARY(203))") all_type_column.append( " int_def_col = Column(dmysql.types.BIGINT, server_default=text('42'))" ) @@ -60,59 +56,61 @@ Base.metadata.create_all(engine) with sessionmaker(bind=engine)() as session: - test_data = AllType(BIGINT=98283201, - BIT = 1, - DATETIME='2024-01-01 09:00:01', - DOUBLE = 3.1415926, - FLOAT = 6.189, - INTEGER = 8621, + test_data = AllType( + BIGINT=98283201, + BIT=1, + DATETIME="2024-01-01 09:00:01", + DOUBLE=3.1415926, + FLOAT=6.189, + INTEGER=8621, DECIMAL=910.79, LONGBLOB=text("repeat('x', 100)"), - LONGTEXT = text("repeat('g', 3)"), - MEDIUMBLOB = text("NULL"), - MEDIUMINT = 999999, - MEDIUMTEXT = text("NULL"), - NUMERIC = 10.9, - REAL = 1092.892, - SMALLINT = 981, - TEXT = "TEXT", - TIME = '03:04:00', - TIMESTAMP = "2024-07-24 09:05:28", - YEAR = 2024, - ENUM = "a", - SET = "a,b,c", - TINYBLOB = b"TINYBLOB", - TINYINT = 99, - TINYTEXT = "TINYTEXT", - CHAR = "09283012", - VARBINARY = b"VARBINARY", + LONGTEXT=text("repeat('g', 3)"), + MEDIUMBLOB=text("NULL"), + MEDIUMINT=999999, + MEDIUMTEXT=text("NULL"), + NUMERIC=10.9, + REAL=1092.892, + SMALLINT=981, + TEXT="TEXT", + TIME="03:04:00", + TIMESTAMP="2024-07-24 09:05:28", + YEAR=2024, + ENUM="a", + SET="a,b,c", + TINYBLOB=b"TINYBLOB", + TINYINT=99, + TINYTEXT="TINYTEXT", + CHAR="09283012", + VARBINARY=b"VARBINARY", ) - test_data2 = AllType(BIGINT=98283201, - BIT = 1, - DATETIME='2024-01-01 09:00:01', - DOUBLE = 3.1415926, - FLOAT = 6.189, - INTEGER = 8621, + test_data2 = AllType( + BIGINT=98283201, + BIT=1, + DATETIME="2024-01-01 09:00:01", + DOUBLE=3.1415926, + FLOAT=6.189, + INTEGER=8621, DECIMAL=910.79, LONGBLOB=text("repeat('x', 100)"), - LONGTEXT = text("repeat('g', 3)"), - MEDIUMBLOB = text("NULL"), - MEDIUMINT = 999999, - MEDIUMTEXT = text("NULL"), - NUMERIC = 10.9, - REAL = 1092.892, - SMALLINT = 981, - TEXT = "TEXT", - TIME = '03:04:00', - TIMESTAMP = "2024-07-24 09:05:28", - YEAR = 2024, - ENUM = "a", - SET = "a,b,c", - TINYBLOB = b"TINYBLOB", - TINYINT = 99, - TINYTEXT = "TINYTEXT", - CHAR = text("NULL"), - VARBINARY = b"VARBINARY", + LONGTEXT=text("repeat('g', 3)"), + MEDIUMBLOB=text("NULL"), + MEDIUMINT=999999, + MEDIUMTEXT=text("NULL"), + NUMERIC=10.9, + REAL=1092.892, + SMALLINT=981, + TEXT="TEXT", + TIME="03:04:00", + TIMESTAMP="2024-07-24 09:05:28", + YEAR=2024, + ENUM="a", + SET="a,b,c", + TINYBLOB=b"TINYBLOB", + TINYINT=99, + TINYTEXT="TINYTEXT", + CHAR=text("NULL"), + VARBINARY=b"VARBINARY", ) session.add(test_data) session.add(test_data2) diff --git a/devtools/deploy_mysqld.py b/devtools/deploy_mysqld.py index 2e019de..2fa5d03 100644 --- a/devtools/deploy_mysqld.py +++ b/devtools/deploy_mysqld.py @@ -9,15 +9,18 @@ from testcontainers.mysql import MySqlContainer from docker.models.containers import Container + @click.group() def main(): pass + @main.command() def list(): data = load_deploy() pprint(data) + @main.command() @click.option("--version", type=click.STRING) def clean(version): @@ -38,6 +41,7 @@ def clean(version): with open(".deploy_mysqld", "w") as f: dump_deploy(data, f) + @dataclass class Instance: url: str @@ -45,6 +49,7 @@ class Instance: cmd: str datadir: str + def load_deploy(): if os.path.exists(".deploy_mysqld"): with open(".deploy_mysqld", "r") as f: @@ -59,19 +64,22 @@ def load_deploy(): return {} return {} + def dump_deploy(data, f): for k in data: data[k] = asdict(data[k]) json.dump(data, f) + def mDeploy(version): deploy_container = load_deploy() if version in deploy_container: - print(f"a container of mysqld[{version}] has been deploy at {deploy_container[version]}") + print( + f"a container of mysqld[{version}] has been deploy at {deploy_container[version]}" + ) return - os.environ["TESTCONTAINERS_RYUK_DISABLED"] = "true" mContainer = MySqlContainer(f"mysql:{version}") datadir = os.getcwd() + f"/datadir/{version}" @@ -81,10 +89,10 @@ def mDeploy(version): mysql = mContainer.start() with open(".deploy_mysqld", "w") as f: deploy_container[version] = Instance( - url=mysql.get_connection_url(), - container_id=f"{mysql._container.short_id}", - cmd=f"mysql -h 127.0.0.1 -P{mysql.get_exposed_port(mysql.port)} -u{mysql.username} -p{mysql.password}", - datadir=datadir, + url=mysql.get_connection_url(), + container_id=f"{mysql._container.short_id}", + cmd=f"mysql -h 127.0.0.1 -P{mysql.get_exposed_port(mysql.port)} -u{mysql.username} -p{mysql.password}", + datadir=datadir, ) dump_deploy(deploy_container, f) @@ -105,7 +113,6 @@ def connect(version): os.system(deploy_container.get(version).cmd) + if __name__ == "__main__": main() - - diff --git a/pyproject.toml b/pyproject.toml index d0a729f..32043e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dev-dependencies = [ "pymysql>=1.1.1", "requests==2.31.0", "cryptography>=43.0.0", + "python-lsp-server>=1.12.0", ] [tool.hatch.metadata] diff --git a/src/pyinnodb/cli/frm.py b/src/pyinnodb/cli/frm.py index b1ed703..917ce3d 100644 --- a/src/pyinnodb/cli/frm.py +++ b/src/pyinnodb/cli/frm.py @@ -5,6 +5,7 @@ from pyinnodb.sdi import table from pyinnodb.disk_struct.index import MIndexPage + @main.command() @click.pass_context @click.argument("frmfile") @@ -17,8 +18,7 @@ def frm(ctx, frmfile): for i, col in enumerate(frm_header.cols): cols.append(col.to_dd_column(col.name, i, frm_header.column_labels)) - - #print(frm_header.keys[0][0].key_parts) + # print(frm_header.keys[0][0].key_parts) keys, key_name, key_comment = frm_header.keys[0] idx = keys.to_dd_index(key_name.decode(), frm_header.cols) diff --git a/src/pyinnodb/cli/iter_record.py b/src/pyinnodb/cli/iter_record.py index 6839e0c..e7cb154 100644 --- a/src/pyinnodb/cli/iter_record.py +++ b/src/pyinnodb/cli/iter_record.py @@ -24,6 +24,7 @@ def list_first_page(ctx, pageno): f.seek(first_entry.seek_loc()) # print(MIndexEntryNode.parse_stream(f)) + @main.command() @click.pass_context @click.option("--primary-key", type=click.STRING, default="") @@ -32,9 +33,9 @@ def list_first_page(ctx, pageno): @click.option("--with-hist/--no-with-hist", type=click.BOOL, default=False) @click.option("--datadir", type=click.Path(exists=False), default=None) def search(ctx, primary_key, pageno, hidden_col, with_hist, datadir): - ''' search the primary-key(int support only now) ''' + """search the primary-key(int support only now)""" f = ctx.obj["fn"] - #print("search start cost:", time.time() - ctx.obj["start_time"]) + # print("search start cost:", time.time() - ctx.obj["start_time"]) fsp_page: MFspPage = ctx.obj["fsp_page"] f.seek(fsp_page.sdi_page_no * const.PAGE_SIZE) sdi_page = MSDIPage.parse_stream(f) @@ -72,7 +73,9 @@ def search(ctx, primary_key, pageno, hidden_col, with_hist, datadir): history = [] while rptr is not None: hist, rptr = rptr.last_version( - undo_map, primary_key_col, disk_data_layout, + undo_map, + primary_key_col, + disk_data_layout, ) history.append(hist) for h in history: @@ -80,16 +83,23 @@ def search(ctx, primary_key, pageno, hidden_col, with_hist, datadir): return + def primary_key_only(key_len: int): def value_parser(rh: MRecordHeader, f): print(rh, f.read(key_len)) + return value_parser @main.command() @click.pass_context @click.option("--garbage/--no-garbage", default=False, help="include garbage mark data") -@click.option("--hidden-col/--no-hidden-col", type=click.BOOL, default=False, help="show the DB_ROLL_PTR and DB_TRX_ID") +@click.option( + "--hidden-col/--no-hidden-col", + type=click.BOOL, + default=False, + help="show the DB_ROLL_PTR and DB_TRX_ID", +) @click.option("--pageno", default=None, type=click.INT, help="iterate on pageno only") # @click.option("--primary-key-len", type=click.INT, help="primary key only if not 0", default=0) @click.option("--sdi-idx", type=click.INT, default=0, help="idx of sdi") @@ -98,7 +108,7 @@ def iter_record(ctx, garbage, hidden_col, pageno, sdi_idx): by default, iter_record will iterate from the first leaf page output every record as a namedtuple whose filed is all the column - name of the ibd file. + name of the ibd file. """ f = ctx.obj["fn"] @@ -110,4 +120,3 @@ def iter_record(ctx, garbage, hidden_col, pageno, sdi_idx): dd_object.iter_record(f, hidden_col=hidden_col, garbage=garbage) return - diff --git a/src/pyinnodb/cli/main.py b/src/pyinnodb/cli/main.py index bfba0eb..a33528e 100644 --- a/src/pyinnodb/cli/main.py +++ b/src/pyinnodb/cli/main.py @@ -18,12 +18,10 @@ @click.option( "--log-level", type=click.Choice(["DEBUG", "ERROR", "INFO"]), default="ERROR" ) -@click.option( - "--validate-first/--no-validate-first", type=click.BOOL, default=False -) +@click.option("--validate-first/--no-validate-first", type=click.BOOL, default=False) @click.pass_context def main(ctx, fn, log_level, validate_first): - '''A ibd file parser for MySQL 8.0 above, help you to know innodb better. + """A ibd file parser for MySQL 8.0 above, help you to know innodb better. It offer several function bellow: a) validate the checksum of your ibd file; @@ -34,7 +32,7 @@ def main(ctx, fn, log_level, validate_first): many other function to explore your ibd file - ''' + """ # pid = os.getpid() # start_time = os.stat(f"/proc/{pid}").st_ctime # print("cost to startup:", time.time() - start_time) @@ -56,10 +54,11 @@ def main(ctx, fn, log_level, validate_first): continue checksum = const.page_checksum_crc32c(page_data) if checksum != fil.checksum: - print(f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]") + print( + f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]" + ) print("use validate to get a more detail output of the validation") sys.exit(1) except Exception as e: print(e) print("the file parse faile") - diff --git a/src/pyinnodb/cli/parse.py b/src/pyinnodb/cli/parse.py index 48ff666..ef30642 100644 --- a/src/pyinnodb/cli/parse.py +++ b/src/pyinnodb/cli/parse.py @@ -1,15 +1,20 @@ from . import * from pyinnodb import disk_struct + @main.command() @click.pass_context @click.option("--pageno", type=click.INT, help="pageno number to parse") -@click.option("--type", multiple=True, type=click.STRING, help="struct name to parse in sequence") +@click.option( + "--type", multiple=True, type=click.STRING, help="struct name to parse in sequence" +) @click.option("--offset", type=click.INT, default=0, help="page offset to start parse") -@click.option("--remain", type=click.INT, default=0, help="the bytes number to read after parse") +@click.option( + "--remain", type=click.INT, default=0, help="the bytes number to read after parse" +) @click.option("--func", type=click.STRING, default="") def parse(ctx, pageno, type, offset, remain, func): - ''' explore the ibd file flexibly ''' + """explore the ibd file flexibly""" f = ctx.obj["fn"] f.seek(pageno * const.PAGE_SIZE) if offset != 0: diff --git a/src/pyinnodb/cli/sdi.py b/src/pyinnodb/cli/sdi.py index 4127403..2c66067 100644 --- a/src/pyinnodb/cli/sdi.py +++ b/src/pyinnodb/cli/sdi.py @@ -8,6 +8,7 @@ import zlib import json + @main.command() @click.pass_context @click.option("--pageno", type=click.INT) diff --git a/src/pyinnodb/cli/sql.py b/src/pyinnodb/cli/sql.py index 4c6f966..3587d2f 100644 --- a/src/pyinnodb/cli/sql.py +++ b/src/pyinnodb/cli/sql.py @@ -16,14 +16,13 @@ @click.option("--mode", type=click.Choice(["sdi", "ddl", "dump"]), default="ddl") @click.option("--sdi-idx", type=click.INT, default=0) def tosql(ctx, mode, sdi_idx): - ''' dump the ddl/dml/sdi of the ibd table + """dump the ddl/dml/sdi of the ibd table - ddl) output the create table ddl; - dump) output the dml of ibd file; + ddl) output the create table ddl; + dump) output the dml of ibd file; sdi) output the dd_object stored in the SDIPage as json format - ''' + """ - f = ctx.obj["fn"] fsp_page = ctx.obj["fsp_page"] logger.debug("fsp header is %s", fsp_page.fsp_header) @@ -61,7 +60,9 @@ def tosql(ctx, mode, sdi_idx): parts = table_object.gen_sql_for_partition() desc = f"ENGINE={table_object.engine} DEFAULT CHARSET={table_collation.CHARACTER_SET_NAME} COLLATE={table_collation.COLLATION_NAME}" comment = ( - "\nCOMMENT '" + table_object.comment + "'" if table_object.comment else "" + "\nCOMMENT '" + table_object.comment + "'" + if table_object.comment + else "" ) print( f"CREATE TABLE {table_name} ({columns_dec}) {desc} {chr(10)+parts if parts else ''}{comment}" @@ -71,11 +72,14 @@ def tosql(ctx, mode, sdi_idx): root_page_no = int(table_object.indexes[0].private_data.get("root", 4)) f.seek(root_page_no * const.PAGE_SIZE) root_index_page = MIndexPage.parse_stream(f) - first_leaf_page_no = root_index_page.get_first_leaf_page(f, table_object.get_primary_key_col()) + first_leaf_page_no = root_index_page.get_first_leaf_page( + f, table_object.get_primary_key_col() + ) if first_leaf_page_no is None: print("no data") return values = [] + def transfter(nd): vs = [] for field in nd: @@ -83,16 +87,22 @@ def transfter(nd): vs.append(repr(json.dumps(field))) elif field is None: vs.append("NULL") - elif isinstance(field, date) or isinstance(field, timedelta) or isinstance(field, datetime): + elif ( + isinstance(field, date) + or isinstance(field, timedelta) + or isinstance(field, datetime) + ): vs.append(f"'{str(field)}'") elif isinstance(field, MGeo): - d = field.build().hex()# .zfill(50) - vs.append('0x'+d) + d = field.build().hex() # .zfill(50) + vs.append("0x" + d) else: vs.append(repr(field)) values.append(f"({','.join(vs)})") - default_value_parser = MIndexPage.default_value_parser(table_object, transfter) + default_value_parser = MIndexPage.default_value_parser( + table_object, transfter + ) while first_leaf_page_no != 4294967295: f.seek(first_leaf_page_no * const.PAGE_SIZE) index_page = MIndexPage.parse_stream(f) @@ -100,7 +110,9 @@ def transfter(nd): first_leaf_page_no = index_page.fil.next_page table_name = f"`{table_object.schema_ref}`.`{table_object.name}`" - print(f"INSERT INTO {table_name}({','.join(table_object.DataClass._fields)}) values {', '.join(values)}") + print( + f"INSERT INTO {table_name}({','.join(table_object.DataClass._fields)}) values {', '.join(values)}" + ) return diff --git a/src/pyinnodb/cli/static_usage.py b/src/pyinnodb/cli/static_usage.py index 441255c..8430d84 100644 --- a/src/pyinnodb/cli/static_usage.py +++ b/src/pyinnodb/cli/static_usage.py @@ -14,7 +14,7 @@ @click.pass_context @click.option("--kind", type=click.Choice(["type", "lsn", "ratio"]), default="type") def list_page(ctx, kind): - ''' show page type of every page ''' + """show page type of every page""" f = ctx.obj["fn"] fsp_page = ctx.obj["fsp_page"] lsns = [] @@ -38,12 +38,15 @@ def list_page(ctx, kind): if len(lsns) > 0: color.heatmap_matrix_width_high(lsns, 64, int((len(lsns) / 64) + 1), "Page NO.") if len(ratios) > 0: - color.ratio_matrix_width_high(ratios, 64, int((len(ratios)/64) + 1), "Page NO.") + color.ratio_matrix_width_high( + ratios, 64, int((len(ratios) / 64) + 1), "Page NO." + ) + @main.command() @click.pass_context def static_page_usage(ctx): - ''' show the page usage of every inode ''' + """show the page usage of every inode""" f = ctx.obj["fn"] f.seek(0) fsp_page = MFspPage.parse_stream(f) diff --git a/src/pyinnodb/cli/systab.py b/src/pyinnodb/cli/systab.py index a620144..7a518f4 100644 --- a/src/pyinnodb/cli/systab.py +++ b/src/pyinnodb/cli/systab.py @@ -9,6 +9,7 @@ from pyinnodb.const import util from pyinnodb import const + @main.command() @click.pass_context def sys_tablespace(ctx): @@ -32,7 +33,7 @@ def sys_tablespace(ctx): if sdi["dd_object_type"] == "Table": dd_object = Table(**sdi["dd_object"]) if dd_object.name in ["tablespace_files"]: - records = dd_object.iter_record(f, transfter=lambda x:x) + records = dd_object.iter_record(f, transfter=lambda x: x) for r in records: if r.se_private_data is not None: private_data = const.line_to_dict(r.se_private_data, ";", "=") diff --git a/src/pyinnodb/cli/undo.py b/src/pyinnodb/cli/undo.py index 8569d49..ab48470 100644 --- a/src/pyinnodb/cli/undo.py +++ b/src/pyinnodb/cli/undo.py @@ -7,6 +7,7 @@ from pyinnodb.disk_struct.index import MSDIPage from pyinnodb.sdi.table import Column, Table + @main.command() @click.pass_context @click.option("--datadir", type=click.Path(exists=True)) @@ -18,7 +19,7 @@ def undo_tablespaces(ctx, datadir): @click.pass_context @click.option("--pageno", type=click.INT, default=9) def undo_list(ctx, pageno): - ''' dump the undo page ''' + """dump the undo page""" f = ctx.obj["fn"] fsp_page = ctx.obj["fsp_page"] f.seek(pageno * const.PAGE_SIZE) @@ -31,10 +32,12 @@ def undo_list(ctx, pageno): @click.pass_context @click.option("--pageno", type=click.INT, help="page number the pointer stored at") @click.option("--offset", type=click.INT, help="page offset the pointer stored at") -@click.option("--insert", type=click.INT, help="insert flag: 1: pointer is an insert undo type") +@click.option( + "--insert", type=click.INT, help="insert flag: 1: pointer is an insert undo type" +) @click.option("--rsegid", type=click.INT, help="undo tablespace id of the pointer") def undo_record(ctx, pageno, offset, insert, rsegid): - ''' show the history version of an RollbackPointer ''' + """show the history version of an RollbackPointer""" f = ctx.obj["fn"] fsp_page: MFspPage = ctx.obj["fsp_page"] f.seek(fsp_page.sdi_page_no * const.PAGE_SIZE) @@ -74,7 +77,7 @@ def undo_record(ctx, pageno, offset, insert, rsegid): @click.pass_context @click.option("--pageno", type=click.INT) def rseg_array(ctx, pageno): - ''' show the RSEGArrayPage ''' + """show the RSEGArrayPage""" f = ctx.obj["fn"] f.seek(pageno * const.PAGE_SIZE) page = MRSEGArrayPage.parse_stream(f) diff --git a/src/pyinnodb/cli/validate.py b/src/pyinnodb/cli/validate.py index 308c11e..83c3d35 100644 --- a/src/pyinnodb/cli/validate.py +++ b/src/pyinnodb/cli/validate.py @@ -7,7 +7,7 @@ @main.command() @click.pass_context def validate(ctx): - ''' output the calculated checksum and the checksum stored in page ''' + """output the calculated checksum and the checksum stored in page""" f = ctx.obj["fn"] fsp_page = ctx.obj["fsp_page"] f.seek(0) diff --git a/src/pyinnodb/color.py b/src/pyinnodb/color.py index 42f52c4..37d057d 100644 --- a/src/pyinnodb/color.py +++ b/src/pyinnodb/color.py @@ -54,7 +54,8 @@ def ansi_color(color, text): block_char_v = ["░", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"] block_char_h = ["░", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█"] -char = ["╭", "╮","│","╰","╯","─"] +char = ["╭", "╮", "│", "╰", "╯", "─"] + def ratio_matrix_width_high(data, w, h, prefix=""): idxs = [0 if v == -1 else int(v * 8) for v in data] @@ -67,10 +68,11 @@ def ratio_matrix_width_high(data, w, h, prefix=""): for i in range(h): l = f"{char[2]}{''.join([next(ic) for i in range(w)])}{char[2]}" if prefix != "": - l = str(i*w).rjust(len(prefix)) + l + l = str(i * w).rjust(len(prefix)) + l print(l) print(bottom) + def heatmap_matrix_width_high(data, w, h, prefix=""): max_data, min_data = max(data), min(data) step = (max_data - min_data) / len(ansi_heatmap_color) @@ -82,7 +84,7 @@ def f(v): colors = [ansi_color(ansi_heatmap_color[f(v)], block_char_h[-1]) for v in data] ic = itertools.chain(iter(colors), itertools.repeat(" ")) - line = w*char[-1] + line = w * char[-1] # top_legend = " " * 2 + "0" + " " * (w - 1) + str(w) # print(top_legend) top = prefix + f"{char[0]}{line}{char[1]}" @@ -91,14 +93,14 @@ def f(v): for i in range(h): l = f"{char[2]}{''.join([next(ic) for i in range(w)])}{char[2]}" if prefix != "": - l = str(i*w).rjust(len(prefix)) + l + l = str(i * w).rjust(len(prefix)) + l print(l) print(bottom) def heatmap_matrix_data(lines): high, width = len(lines), len(lines[0]) - line = width*char[-1] + line = width * char[-1] top = f"{char[0]}{line}{char[1]}" bottom = f"{char[3]}{line}{char[4]}" print(top) @@ -106,9 +108,10 @@ def heatmap_matrix_data(lines): print(f"{char[2]}{''.join(l)}{char[2]}") print(bottom) -if __name__ == "__main__": +if __name__ == "__main__": import itertools + data = [] gen = itertools.cycle(ansi_heatmap_color) for i in range(14): diff --git a/src/pyinnodb/const/__init__.py b/src/pyinnodb/const/__init__.py index 4480fbb..409047a 100644 --- a/src/pyinnodb/const/__init__.py +++ b/src/pyinnodb/const/__init__.py @@ -150,6 +150,7 @@ def show_seq_page_list(page_list): COLUMN_IDX_TYPE_UNIQ = 3 COLUMN_IDX_TYPE_MULT = 4 + # mach_read_next_compressed def read_compressed_mysql_int(stream): b0 = stream.read(1) @@ -171,6 +172,7 @@ def read_compressed_mysql_int(stream): else: return int.from_bytes(stream.read(3), "big") | 0xFF000000 + def mach_u64_read_next_compressed(stream): data = read_compressed_mysql_int(stream) return (data << 32) | int.from_bytes(stream.read(4), "big") diff --git a/src/pyinnodb/const/dd_column_type.py b/src/pyinnodb/const/dd_column_type.py index 0ac097a..499e209 100644 --- a/src/pyinnodb/const/dd_column_type.py +++ b/src/pyinnodb/const/dd_column_type.py @@ -84,7 +84,7 @@ def is_big(cls, t): DDColumnType.MEDIUM_BLOB, DDColumnType.LONG_BLOB, DDColumnType.BLOB, - #DDColumnType.STRING, + # DDColumnType.STRING, DDColumnType.JSON, DDColumnType.TINY_BLOB, DDColumnType.GEOMETRY, diff --git a/src/pyinnodb/const/define.py b/src/pyinnodb/const/define.py index 9c16d2b..8d548c8 100644 --- a/src/pyinnodb/const/define.py +++ b/src/pyinnodb/const/define.py @@ -56,14 +56,14 @@ FIL_PAGE_TYPE_ZBLOB2: "SUBSEQUENT FRESHLY ALLOCATED PAGE", FIL_PAGE_TYPE_UNKNOWN: "UNKNOWN TYPE PAGE", FIL_PAGE_COMPRESSED: "Compressed page", - FIL_PAGE_ENCRYPTED :"Encrypted page", - FIL_PAGE_COMPRESSED_AND_ENCRYPTED :"Compressed and Encrypted page", - FIL_PAGE_ENCRYPTED_RTREE :"Encrypted R-tree page", - FIL_PAGE_SDI_BLOB :"Uncompressed SDI BLOB page", - FIL_PAGE_SDI_ZBLOB :"Commpressed SDI BLOB page", - FIL_PAGE_TYPE_UNUSED :"Available for future use", - FIL_PAGE_TYPE_RSEG_ARRAY :"Rollback Segment Array page", - FIL_PAGE_TYPE_LOB_INDEX :"Index pages of uncompressed LOB", + FIL_PAGE_ENCRYPTED: "Encrypted page", + FIL_PAGE_COMPRESSED_AND_ENCRYPTED: "Compressed and Encrypted page", + FIL_PAGE_ENCRYPTED_RTREE: "Encrypted R-tree page", + FIL_PAGE_SDI_BLOB: "Uncompressed SDI BLOB page", + FIL_PAGE_SDI_ZBLOB: "Commpressed SDI BLOB page", + FIL_PAGE_TYPE_UNUSED: "Available for future use", + FIL_PAGE_TYPE_RSEG_ARRAY: "Rollback Segment Array page", + FIL_PAGE_TYPE_LOB_INDEX: "Index pages of uncompressed LOB", FIL_PAGE_TYPE_LOB_FIRST: "FIRST PAGE OF UNCOMPRESSED BLOB PAGE", FIL_PAGE_TYPE_LOB_INDEX: "INDEX PAGE OF UNCOMPRESSED BLOB PAGE", FIL_PAGE_TYPE_LOB_DATA: "DATA PAGE OF UNCOMPRESSED BLOB PAGE", diff --git a/src/pyinnodb/const/undo.py b/src/pyinnodb/const/undo.py index ad3d194..cd97447 100644 --- a/src/pyinnodb/const/undo.py +++ b/src/pyinnodb/const/undo.py @@ -1,5 +1,6 @@ from enum import Enum + class UndoPageType(Enum): UNDO_INSERT = 1 UNOD_UPDATE = 2 diff --git a/src/pyinnodb/const/util.py b/src/pyinnodb/const/util.py index 336a52a..295a09c 100644 --- a/src/pyinnodb/const/util.py +++ b/src/pyinnodb/const/util.py @@ -5,9 +5,11 @@ from os import path + def is_id_undo_tablespace(id): return id >= 4244167280 and id <= 4294967279 + def get_undo_tablespacefile(fn: str): result = {} with open(fn, "rb") as f: @@ -25,13 +27,13 @@ def get_undo_tablespacefile(fn: str): if dd_object.name != "tablespace_files": continue - for r in dd_object.iter_record(f, transfter=lambda x:x): + for r in dd_object.iter_record(f, transfter=lambda x: x): if r.se_private_data is None: continue private_data = const.line_to_dict(r.se_private_data, ";", "=") space_id = int(private_data.get("id", 0)) if is_id_undo_tablespace(space_id): fname = path.dirname(fn) + "/" + path.basename(r.file_name) - result[0xfffffff0-space_id] = open(fname, "rb") + result[0xFFFFFFF0 - space_id] = open(fname, "rb") return result diff --git a/src/pyinnodb/disk_struct/cfg.py b/src/pyinnodb/disk_struct/cfg.py index a7ff791..6d6e040 100644 --- a/src/pyinnodb/disk_struct/cfg.py +++ b/src/pyinnodb/disk_struct/cfg.py @@ -1,5 +1,6 @@ from ..mconstruct import * + class MCfgCol(CC): prtype: int = cfield(cs.Int32ub) mtype: int = cfield(cs.Int32ub) @@ -9,6 +10,7 @@ class MCfgCol(CC): ord_part: int = cfield(cs.Int32ub) max_prefix: int = cfield(cs.Int32ub) name: str = cfield(CLenString(4)) + def _post_parsed(self, stream, context, path): version = context.get("version", None) if version is None: @@ -50,6 +52,7 @@ def _post_build(self, obj, stream, context, path): class MCfgIndexField(CC): prefix_len: int = cfield(cs.Int32ub) fixed_len: int = cfield(cs.Int32ub) + def _post_parsed(self, stream, context, path): version = context.get("version", None) if version is None: @@ -114,11 +117,13 @@ def _post_parsed(self, stream, context, path): if self.version >= 2: self.space_flags = cs.Int32ub.parse_stream(stream) if self.version >= 6: - self.compression_type = cs.Int8ub.parse_stream(stream) # 0: NONE, 1: ZLIB, 2: LZ4 + self.compression_type = cs.Int8ub.parse_stream( + stream + ) # 0: NONE, 1: ZLIB, 2: LZ4 self.cols = [] for i in range(self.n_cols): - col = MCfgCol.parse_stream(stream, version = self.version) + col = MCfgCol.parse_stream(stream, version=self.version) self.cols.append(col) self.n_indexes = cs.Int32ub.parse_stream(stream) @@ -126,7 +131,6 @@ def _post_parsed(self, stream, context, path): for i in range(self.n_indexes): self.indexes.append(MCfgIndex.parse_stream(stream, version=self.version)) - def _post_build(self, obj, stream, context, path): if obj.version >= 5: stream.write(cs.Int32ub.build(obj.n_instant_nullable)) @@ -148,4 +152,3 @@ def _post_build(self, obj, stream, context, path): stream.write(cs.Int32ub.build(self.n_indexes)) for idx in self.indexes: stream.write(idx.build(version=obj.version)) - diff --git a/src/pyinnodb/disk_struct/data.py b/src/pyinnodb/disk_struct/data.py index 3d3a7e4..c77d7f1 100644 --- a/src/pyinnodb/disk_struct/data.py +++ b/src/pyinnodb/disk_struct/data.py @@ -1,9 +1,11 @@ from ..mconstruct import * from datetime import timedelta, datetime, date + try: from datetime import UTC except: from datetime import timezone + UTC = timezone.utc import time @@ -74,55 +76,56 @@ def to_time(self) -> datetime: UTC, ) + class MPoint(CC): x: float = cfield(cs.Float64l) y: float = cfield(cs.Float64l) + class MGPoint(CC): byteorder: int = cfield(cs.Int8ul) point_type: int = cfield(cs.Int32ul) def _post_parsed(self, stream, context, path): - if self.point_type == 1: # POINT - self.x = cs.Float64l.parse_stream(stream) - self.y = cs.Float64l.parse_stream(stream) - elif self.point_type == 2: # LINESTRING - self.points = [] - self.size = cs.Int32ul.parse_stream(stream) + if self.point_type == 1: # POINT + self._set_show_field("x", cs.Float64l.parse_stream(stream)) + self._set_show_field("y", cs.Float64l.parse_stream(stream)) + elif self.point_type == 2: # LINESTRING + self._set_show_field("size", cs.Int32ul.parse_stream(stream)) + self._set_show_field("points", []) for i in range(self.size): self.points.append(MPoint.parse_stream(stream)) - elif self.point_type == 3: # POLYGON - self.polygon = [] - self.psize = cs.Int32ul.parse_stream(stream) + elif self.point_type == 3: # POLYGON + self._set_show_field("psize", cs.Int32ul.parse_stream(stream)) + self._set_show_field("polygon", []) for i in range(self.psize): size = cs.Int32ul.parse_stream(stream) points = [] for j in range(size): points.append(MPoint.parse_stream(stream)) self.polygon.append(points) - elif self.point_type == 4: # MULTIPOINT - self.size = cs.Int32ul.parse_stream(stream) - self.points = [] + elif self.point_type == 4: # MULTIPOINT + self._set_show_field("size", cs.Int32ul.parse_stream(stream)) + self._set_show_field("points", []) for i in range(self.size): self.points.append(MGPoint.parse_stream(stream)) elif self.point_type == 5: - self.size = cs.Int32ul.parse_stream(stream) - self.lines = [] + self._set_show_field("size", cs.Int32ul.parse_stream(stream)) + self._set_show_field("lines", []) for i in range(self.size): self.lines.append(MGPoint.parse_stream(stream)) elif self.point_type == 6: - self.size = cs.Int32ul.parse_stream(stream) - self.polygons = [] + self._set_show_field("size", cs.Int32ul.parse_stream(stream)) + self._set_show_field("polygons", []) for i in range(self.size): self.polygons.append(MGPoint.parse_stream(stream)) elif self.point_type == 7: - self.size = cs.Int32ul.parse_stream(stream) - self.geos = [] + self._set_show_field("size", cs.Int32ul.parse_stream(stream)) + self._set_show_field("geos", []) for i in range(self.size): self.geos.append(MGPoint.parse_stream(stream)) - def _post_build(self, obj, stream, context, path): if self.point_type == 1: stream.write(cs.Float64l.build(obj.x)) @@ -159,6 +162,7 @@ def _post_build(self, obj, stream, context, path): for l in self.geos: stream.write(l.build()) + class MGeo(CC): SRID: int = cfield(cs.Int32ub) GP: MGPoint = cfield(MGPoint) diff --git a/src/pyinnodb/disk_struct/first_page.py b/src/pyinnodb/disk_struct/first_page.py index cd36b96..67e4eb1 100644 --- a/src/pyinnodb/disk_struct/first_page.py +++ b/src/pyinnodb/disk_struct/first_page.py @@ -7,19 +7,21 @@ blob_hdr_size = 8 -class MBlobHdr(CC): # only used in MySQL 5 + +class MBlobHdr(CC): # only used in MySQL 5 part_len: int = cfield(cs.Int32ub) - next_page_no: int = cfield(cs.Int32ub) # FIL_NULL if none 0xffffffff + next_page_no: int = cfield(cs.Int32ub) # FIL_NULL if none 0xffffffff def get_data(self, stream): data = stream.read(self.part_len) - if self.next_page_no == 0xffffffff: + if self.next_page_no == 0xFFFFFFFF: return data else: stream.seek(self.next_page_no * const.PAGE_SIZE + 38) blob_header = MBlobHdr.parse_stream(stream) return data + blob_header.get_data(stream) + def btr_blob_get_next_page_no(): return 0 @@ -73,8 +75,6 @@ def get_data(self, stream): return data - - stream.seek(self.index_entry[0].page_no * const.PAGE_SIZE + self.sizeof()) first_page_data = stream.read(self.data_len) for i in range(1, self.index_list.length): diff --git a/src/pyinnodb/disk_struct/fsp.py b/src/pyinnodb/disk_struct/fsp.py index 286470d..79a4a58 100644 --- a/src/pyinnodb/disk_struct/fsp.py +++ b/src/pyinnodb/disk_struct/fsp.py @@ -11,6 +11,7 @@ FSP_FLAGS_POS_SDI = 8 FSP_FLAGS_MASK_SDI = 0x4000 + class MFspHeader(CC): space_id: int = cfield(cs.Int32ub) unused: int = cfield(cs.Int32ub) @@ -31,7 +32,6 @@ def has_sdi_page(self): return ((self.flags & FSP_FLAGS_MASK_SDI) >> FSP_FLAGS_POS_SDI) > 0 - class MFspPage(CC): fil: MFil = cfield(MFil) fsp_header: MFspHeader = cfield(MFspHeader) diff --git a/src/pyinnodb/disk_struct/index.py b/src/pyinnodb/disk_struct/index.py index acc7adc..9f539fe 100644 --- a/src/pyinnodb/disk_struct/index.py +++ b/src/pyinnodb/disk_struct/index.py @@ -62,6 +62,7 @@ class MFsegHeader(CC): # inode_entry = MInodeEntry.parse_stream(f) # return inode_entry.first_page() + class MSystemRecord(CC): info_flags: int = cfield(cs.BitsInteger(4)) record_owned_num: int = cfield(cs.BitsInteger(4)) @@ -81,8 +82,9 @@ class MIndexSystemRecord(CC): infimum: MSystemRecord = cfield(MSystemRecord) supremum: MSystemRecord = cfield(MSystemRecord) + def get_rec_insert_state(rh: MRecordHeader, dd_object: Table): - if rh.instant_version != 0: ## is_versioned + if rh.instant_version != 0: ## is_versioned # if version == 0: # return INSERTED_AFTER_UPGRADE_BEFORE_INSTANT_ADD_NEW_IMPLEMENTATION # else: @@ -107,11 +109,18 @@ class MIndexPage(CC): system_records: MIndexSystemRecord = cfield(MIndexSystemRecord) @classmethod - def default_value_parser(cls, dd_object: Table, transfter = None, hidden_col=False): + def default_value_parser(cls, dd_object: Table, transfter=None, hidden_col=False): primary_data_layout_col = dd_object.get_disk_data_layout() + def value_parser(rh: MRecordHeader, f): cur = f.tell() - logger.debug("-------start parse-----------rh: %s, @cur: %d/(%d, %d)", rh, cur, int(cur / const.PAGE_SIZE), cur % const.PAGE_SIZE) + logger.debug( + "-------start parse-----------rh: %s, @cur: %d/(%d, %d)", + rh, + cur, + int(cur / const.PAGE_SIZE), + cur % const.PAGE_SIZE, + ) if const.RecordType(rh.record_type) == const.RecordType.NodePointer: next_page_no = const.parse_mysql_int(f.read(4)) return @@ -123,28 +132,51 @@ def value_parser(rh: MRecordHeader, f): f.seek(-1, 1) data_schema_version = int.from_bytes(f.read(1), "big") - logger.debug("record header is instant, with data version: %d", data_schema_version) - - cols_disk_layout = [d for d in primary_data_layout_col if d[0].version_valid(data_schema_version)] - logger.debug("primary data layout is %s", ",".join(f"{c[0].name}({c[0].ordinal_position})" for c in primary_data_layout_col)) + logger.debug( + "record header is instant, with data version: %d", + data_schema_version, + ) + cols_disk_layout = [ + d + for d in primary_data_layout_col + if d[0].version_valid(data_schema_version) + ] + logger.debug( + "primary data layout is %s", + ",".join( + f"{c[0].name}({c[0].ordinal_position})" + for c in primary_data_layout_col + ), + ) if rh.instant == 1: f.seek(-1, 1) extra_byte = int.from_bytes(f.read(1), "big") cols_disk_layout = cols_disk_layout[:extra_byte] - logger.debug("instant col extra byte is %s, &0x80 is %s, len(cols) is %d", hex(extra_byte), extra_byte & 0x80, - len(cols_disk_layout)) - - nullable_cols = [d[0] for d in cols_disk_layout if d[1] == 4294967295 and d[0].is_nullable] + logger.debug( + "instant col extra byte is %s, &0x80 is %s, len(cols) is %d", + hex(extra_byte), + extra_byte & 0x80, + len(cols_disk_layout), + ) + + nullable_cols = [ + d[0] + for d in cols_disk_layout + if d[1] == 4294967295 and d[0].is_nullable + ] - logger.debug("cols_disk_layout is %s", ",".join(c[0].name for c in cols_disk_layout)) + logger.debug( + "cols_disk_layout is %s", ",".join(c[0].name for c in cols_disk_layout) + ) logger.debug("nullable_cols is %s", ",".join(c.name for c in nullable_cols)) - if rh.instant == 0 and rh.instant_version == 0: nullable_cols = [c for c in nullable_cols if not c.is_instant_col_80017] - cols_disk_layout = [d for d in cols_disk_layout if not d[0].is_instant_col_80017] + cols_disk_layout = [ + d for d in cols_disk_layout if not d[0].is_instant_col_80017 + ] nullcol_bitmask_size = int((len(nullable_cols) + 7) / 8) f.seek(-nullcol_bitmask_size - rh.instant_version - rh.instant, 1) @@ -154,14 +186,26 @@ def value_parser(rh: MRecordHeader, f): for i, c in enumerate(nullable_cols): if null_mask & (1 << i): null_col_data[c.ordinal_position] = 1 - logger.debug("null_col_data is %s, null_col size is %s, null_mask is %s", null_col_data, len(nullable_cols), null_bitmask) + logger.debug( + "null_col_data is %s, null_col size is %s, null_mask is %s", + null_col_data, + len(nullable_cols), + null_bitmask, + ) may_var_col = [ (i, c[0]) for i, c in enumerate(cols_disk_layout) - if DDColumnType.is_big(c[0].type) or DDColumnType.is_var(c[0].type, mysqld_version=dd_object.mysql_version_id) + if DDColumnType.is_big(c[0].type) + or DDColumnType.is_var( + c[0].type, mysqld_version=dd_object.mysql_version_id + ) ] - logger.debug("may_var_col is %s", ",".join(f"({i})({c.ordinal_position}){c.name}" for i, c in may_var_col)) - + logger.debug( + "may_var_col is %s", + ",".join( + f"({i})({c.ordinal_position}){c.name}" for i, c in may_var_col + ), + ) ## read var f.seek(-nullcol_bitmask_size, 1) @@ -196,17 +240,26 @@ def value_parser(rh: MRecordHeader, f): if len(p_value) > 100: p_value = p_value[:10] + "..." + p_value[-10:] - logger.debug("read_data: col[%s], col.type[%s], value[%s], i[%d], op[%d], vs[%s], from[%s],to[%s]", - col.name, col.type, p_value, i, col.ordinal_position, vs, cur_before%const.PAGE_SIZE, f.tell()%const.PAGE_SIZE) + logger.debug( + "read_data: col[%s], col.type[%s], value[%s], i[%d], op[%d], vs[%s], from[%s],to[%s]", + col.name, + col.type, + p_value, + i, + col.ordinal_position, + vs, + cur_before % const.PAGE_SIZE, + f.tell() % const.PAGE_SIZE, + ) if col.generation_expression_utf8 != "": continue disk_data_parsed[col.name] = col_value for col in dd_object.columns: if ( - (col.name in ["DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"] and not hidden_col) - or col.private_data.get("version_dropped", 0) != 0 - ): + col.name in ["DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"] + and not hidden_col + ) or col.private_data.get("version_dropped", 0) != 0: if col.name in disk_data_parsed: disk_data_parsed.pop(col.name) continue @@ -221,6 +274,7 @@ def value_parser(rh: MRecordHeader, f): else: return transfter(klass(**disk_data_parsed)) return + return value_parser def _post_parsed(self, stream, context, path): @@ -232,7 +286,11 @@ def _post_parsed(self, stream, context, path): def get_first_leaf_page(self, stream, primary_cols): infimum_offset = self.system_records.infimum.get_current_offset() next_page = self.fil.offset - stream.seek(next_page * const.PAGE_SIZE + infimum_offset + self.system_records.infimum.next_record_offset) + stream.seek( + next_page * const.PAGE_SIZE + + infimum_offset + + self.system_records.infimum.next_record_offset + ) stream.seek(-MRecordHeader.sizeof(), 1) rh = MRecordHeader.parse_stream(stream) rht = const.RecordType(rh.record_type) @@ -241,7 +299,7 @@ def get_first_leaf_page(self, stream, primary_cols): elif rht == const.RecordType.NodePointer: for c in primary_cols: c.read_data(stream) - + next_page = int.from_bytes(stream.read(4), "big") stream.seek(next_page * const.PAGE_SIZE) next_index_page = MIndexPage.parse_stream(stream) @@ -282,12 +340,25 @@ def binary_search_with_page_directory(self, key, stream): key_len = len(key) low, high = 0, len(self.page_directory) - 1 cnt = 0 - logger.debug("page dir is %s", ",".join(map(str, self.page_directory))) + logger.debug( + "page dir is %s, low: %d, high: %d", + ",".join(map(str, self.page_directory)), + low, + high, + ) while high > low + 1: - target = int((high + low)/2) + target = int((high + low) / 2) stream.seek(cur_post + self.page_directory[target]) record_key = stream.read(key_len) - logger.debug("low: %d, high: %d, target: %d, record_key: %s, key: %s, dir: %s", low, high, target, record_key, key, self.page_directory[target]) + logger.debug( + "low: %d, high: %d, target: %d, record_key: %s, key: %s, dir: %s", + low, + high, + target, + record_key, + key, + self.page_directory[target], + ) if record_key == key: return target, True elif key > record_key: @@ -317,7 +388,7 @@ def _post_parsed(self, stream, context, path): size = self.sizeof() stream.seek(const.PAGE_SIZE - size - 8, 1) self.fil_tailer = MFilTrailer.parse_stream(stream) - #self.ddl = next(self.iterate_sdi_record(stream)) + # self.ddl = next(self.iterate_sdi_record(stream)) def ddl(self, stream, idx): return list(self.iterate_sdi_record(stream))[idx] @@ -333,17 +404,20 @@ def iterate_sdi_record(self, stream): break fseg_header = MFsegHeader.parse_stream(stream) infimum = MSystemRecord.parse_stream(stream) - stream.seek(-8+infimum.next_record_offset+12, 1) + stream.seek(-8 + infimum.next_record_offset + 12, 1) cur_page_num = int.from_bytes(stream.read(4), byteorder="big") while cur_page_num != 4294967295: stream.seek(cur_page_num * const.PAGE_SIZE) sdi_page = MSDIPage.parse_stream(stream) - stream.seek(cur_page_num * const.PAGE_SIZE + sdi_page.system_records.infimum.get_current_offset()) + stream.seek( + cur_page_num * const.PAGE_SIZE + + sdi_page.system_records.infimum.get_current_offset() + ) next_offset = sdi_page.system_records.infimum.next_record_offset while next_offset != 0: - stream.seek(next_offset-MRecordHeader.sizeof(), 1) + stream.seek(next_offset - MRecordHeader.sizeof(), 1) rh = MRecordHeader.parse_stream(stream) if const.RecordType(rh.record_type) == const.RecordType.Supremum: break diff --git a/src/pyinnodb/disk_struct/json.py b/src/pyinnodb/disk_struct/json.py index ab04c6d..4137080 100644 --- a/src/pyinnodb/disk_struct/json.py +++ b/src/pyinnodb/disk_struct/json.py @@ -19,7 +19,7 @@ def _post_parsed(self, stream, context, path): cur = stream.tell() # seek loc after type, offset in the key entry or value entry is relative to cur if self.type == 0x04: data = int.from_bytes(stream.read(1), "big") - return {1: True, 2: False, 0: 'null'}.get(data, None) + return {1: True, 2: False, 0: "null"}.get(data, None) elif self.type == 0x05: return cs.Int16sl.parse_stream(stream) elif self.type == 0x06: diff --git a/src/pyinnodb/disk_struct/list.py b/src/pyinnodb/disk_struct/list.py index 1524f64..84d2555 100644 --- a/src/pyinnodb/disk_struct/list.py +++ b/src/pyinnodb/disk_struct/list.py @@ -3,6 +3,7 @@ from .. import const import sys + if sys.version_info.minor >= 9: from functools import cache else: diff --git a/src/pyinnodb/disk_struct/rollback.py b/src/pyinnodb/disk_struct/rollback.py index d8ad70e..c45cf7e 100644 --- a/src/pyinnodb/disk_struct/rollback.py +++ b/src/pyinnodb/disk_struct/rollback.py @@ -2,13 +2,16 @@ from ..mconstruct import * from .undo_log import MUndoRecordInsert, TRX_UNDO_DEL_MARK_REC -UNIV_SQL_NULL = ~0 +UNIV_SQL_NULL = ~0 UNIV_PAGE_SIZE_SHIFT_DEF = 14 UNIV_PAGE_SIZE_DEF = 1 << UNIV_PAGE_SIZE_SHIFT_DEF -UNIV_EXTERN_STORAGE_FIELD = int.from_bytes((UNIV_SQL_NULL - UNIV_PAGE_SIZE_DEF).to_bytes(4, "big", signed=True), "big") +UNIV_EXTERN_STORAGE_FIELD = int.from_bytes( + (UNIV_SQL_NULL - UNIV_PAGE_SIZE_DEF).to_bytes(4, "big", signed=True), "big" +) SPATIAL_STATUS_SHIFT = 12 SPATIAL_STATUS_MASK = 3 << SPATIAL_STATUS_SHIFT + class HistoryVersion: def __init__(self, trx_id, rollptr, upd): self.trx_id = trx_id @@ -27,6 +30,7 @@ def __str__(self): __repr__ = __str__ + class MRollbackPointer(CC): insert_flag: int = cfield(cs.BitsInteger(1)) rollback_seg_id: int = cfield(cs.BitsInteger(7)) @@ -50,10 +54,12 @@ def last_version(self, undo_map, primary_col, disk_data_layout): for c in primary_col: col_len = const.read_compressed_mysql_int(f) col_data_skip = f.read(col_len) - logger.debug(f"skip primary key: {c.name}, len: {col_len}, data: {col_data_skip}") + logger.debug( + f"skip primary key: {c.name}, len: {col_len}, data: {col_data_skip}" + ) # trx_undo_update_rec_get_update - type = undo_record_header.flag & 0xf + type = undo_record_header.flag & 0xF if type != TRX_UNDO_DEL_MARK_REC: n_fields = const.read_compressed_mysql_int(f) else: @@ -62,10 +68,10 @@ def last_version(self, undo_map, primary_col, disk_data_layout): for i in range(n_fields): col_no = const.read_compressed_mysql_int(f) len = const.read_compressed_mysql_int(f) - #if len > UNIV_EXTERN_STORAGE_FIELD: + # if len > UNIV_EXTERN_STORAGE_FIELD: # len = ((len - UNIV_EXTERN_STORAGE_FIELD) & (~SPATIAL_STATUS_MASK)) col = disk_data_layout[col_no][0] - if len == 4294967295: # UNIV_SQL_NULL & 0xffffffff + if len == 4294967295: # UNIV_SQL_NULL & 0xffffffff orig_data = None else: orig_data = col.read_data(f, len) @@ -74,4 +80,3 @@ def last_version(self, undo_map, primary_col, disk_data_layout): return hist, ptr else: return HistoryVersion(None, None, 0), None - diff --git a/src/pyinnodb/disk_struct/trx.py b/src/pyinnodb/disk_struct/trx.py index 2674493..9231130 100644 --- a/src/pyinnodb/disk_struct/trx.py +++ b/src/pyinnodb/disk_struct/trx.py @@ -3,10 +3,12 @@ from .list import MPointer import typing + class MRSegEntry(CC): space: int = cfield(cs.Int32ub) page: int = cfield(cs.Int32ub) + class MTrxSysPage(CC): fil: MFil = cfield(MFil) transaction_id: int = cfield(cs.Int64ub) diff --git a/src/pyinnodb/disk_struct/undo_log.py b/src/pyinnodb/disk_struct/undo_log.py index 96dd821..753bcae 100644 --- a/src/pyinnodb/disk_struct/undo_log.py +++ b/src/pyinnodb/disk_struct/undo_log.py @@ -6,12 +6,14 @@ # trx0undo.h + class MUndoHeader(CC): undo_page_type: int = cfield(cs.Int16ub) last_log_record_offset: int = cfield(cs.Int16ub) free_space_offset: int = cfield(cs.Int16ub) undo_page_list_node: MListNode = cfield(MListNode) + class MUndoSegmentHeader(CC): state: int = cfield(cs.Int16ub) last_log_offset: int = cfield(cs.Int16ub) @@ -19,14 +21,15 @@ class MUndoSegmentHeader(CC): pointer: MPointer = cfield(MPointer) undo_page_list_node_base: MListBaseNode = cfield(MListBaseNode) + class MUndoLogHeader(CC): - transaction_id : int = cfield(cs.Int64ub) + transaction_id: int = cfield(cs.Int64ub) transaction_number: int = cfield(cs.Int64ub) delete_mask_flag: int = cfield(cs.Int16ub) log_start_offset: int = cfield(cs.Int16ub) xid_flag: int = cfield(cs.Int8ub) ddl_transaction_flag: int = cfield(cs.Int8ub) - table_id : int = cfield(cs.Int64ub) # if ddl_transaction + table_id: int = cfield(cs.Int64ub) # if ddl_transaction next_undo_log_offset: int = cfield(cs.Int16ub) prev_undo_log_offset: int = cfield(cs.Int16ub) hist_list_node: MListNode = cfield(MListNode) @@ -35,16 +38,19 @@ class MUndoLogHeader(CC): trid_length: int = cfield(cs.Int32ub) bqual_length: int = cfield(cs.Int32ub) xid_data: str = cfield(cs.Bytes(128)) - + + class MUndoLogForInsert(CC): next_record_offset: int = cfield(cs.Int16ub) flag: int = cfield(cs.Int8ub) + class MUndoPage(CC): fil: MFil = cfield(MFil) page_header: MUndoHeader = cfield(MUndoHeader) seg_header: MUndoSegmentHeader = cfield(MUndoSegmentHeader) - #test_undo_log: MUndoLogForInsert = cfield(MUndoLogForInsert) + # test_undo_log: MUndoLogForInsert = cfield(MUndoLogForInsert) + class MRSEGHeader(CC): max_size: int = cfield(cs.Int32ub) @@ -53,28 +59,33 @@ class MRSEGHeader(CC): spaceid: int = cfield(cs.Int32ub) pointer: MPointer = cfield(MPointer) + class MRSEGArrayHeader(CC): - marker: bytes = cfield(cs.Bytes(4)) #'RSEH' + marker: bytes = cfield(cs.Bytes(4)) #'RSEH' array_size: int = cfield(cs.Int32ub) spaceid: int = cfield(cs.Int32ub) pointer: MPointer = cfield(MPointer) pagenos: typing.List[int] = cfield(carray(128, cs.Int32ub)) # reserve_space: bytes = cfield(cs.Bytes(200)) ## at the end + class MRSEGArrayPage(CC): fil: MFil = cfield(MFil) header: MRSEGArrayHeader = cfield(MRSEGArrayHeader) + class MRSEGPage(CC): fil: MFil = cfield(MFil) header: MRSEGHeader = cfield(MRSEGHeader) slots: typing.List[int] = cfield(carray(1024, cs.Int32ub)) + TRX_UNDO_MODIFY_BLOB = 64 TRX_UNDO_UPD_EXIST_REC = 12 TRX_UNDO_UPD_DEL_REC = 13 TRX_UNDO_DEL_MARK_REC = 14 + class MUndoRecordInsert(CC): # prev_record_offset: int = cfield(cs.Int16ub) next_record_offset: int = cfield(cs.Int16ub) @@ -85,7 +96,7 @@ class MUndoRecordInsert(CC): def _post_parsed(self, stream, context, path): # trx_undo_rec_get_undo_no if self.flag & TRX_UNDO_MODIFY_BLOB > 0: - assert stream.read(1) == b'\x00' + assert stream.read(1) == b"\x00" self.undo_number = const.read_compressed_mysql_int(stream) self.table_id = const.read_compressed_mysql_int(stream) diff --git a/src/pyinnodb/frm/frm.py b/src/pyinnodb/frm/frm.py index d45a9c4..5c6a723 100644 --- a/src/pyinnodb/frm/frm.py +++ b/src/pyinnodb/frm/frm.py @@ -13,16 +13,17 @@ class MFrmKeyParts(CC): - nr: int = cfield(cs.Int16ul) # -1: column idx of primary + nr: int = cfield(cs.Int16ul) # -1: column idx of primary offset: int = cfield(cs.Int16ul) flags: int = cfield(cs.Int8ul) key_type: int = cfield(cs.Int16ul) - length: int = cfield(cs.Int16ul) # primary key length + length: int = cfield(cs.Int16ul) # primary key length def _post_parsed(self, stream, context, path): - self.nr &= 0x3fff + self.nr &= 0x3FFF self.offset -= 1 + class FieldFlag(enum.Enum): DECIMAL = 1 BINARY = 1 @@ -44,12 +45,12 @@ class FieldFlag(enum.Enum): # defined, but not used in modern MySQL # SUM = 32768 MAYBE_NULL = 32768 - HEX_ESCAPE = 0X10000 + HEX_ESCAPE = 0x10000 PACK_SHIFT = 3 DEC_SHIFT = 8 MAX_DEC = 31 - NUM_SCREEN_TYPE = 0X7F01 - ALFA_SCREEN_TYPE = 0X7800 + NUM_SCREEN_TYPE = 0x7F01 + ALFA_SCREEN_TYPE = 0x7800 class MFrmColumn(CC): # 17 @@ -64,7 +65,9 @@ class MFrmColumn(CC): # 17 charset_id2: int = cfield(cs.Int8ul) comment_length: str = cfield(cs.Bytes(2)) - def to_dd_column(self, name: str, pos: int, labels: typing.List[typing.List[str]]) -> Column: + def to_dd_column( + self, name: str, pos: int, labels: typing.List[typing.List[str]] + ) -> Column: c = Column() c.hidden = const.column_hidden_type.ColumnHiddenType.HT_VISIBLE.value c.ordinal_position = pos + 1 @@ -82,21 +85,23 @@ def to_dd_column(self, name: str, pos: int, labels: typing.List[typing.List[str] c.numeric_precision -= 1 elif c.type == const.dd_column_type.DDColumnType.BIT.value: c.numeric_precision = self.length - elif c.type in [const.dd_column_type.DDColumnType.ENUM.value, - const.dd_column_type.DDColumnType.SET.value]: + elif c.type in [ + const.dd_column_type.DDColumnType.ENUM.value, + const.dd_column_type.DDColumnType.SET.value, + ]: if self.label_id <= len(labels): - for i, name in enumerate(labels[self.label_id-1]): - c.elements.append(ColumnElement(name=b64encode(name), index=i+1)) + for i, name in enumerate(labels[self.label_id - 1]): + c.elements.append(ColumnElement(name=b64encode(name), index=i + 1)) elif c.type == const.dd_column_type.DDColumnType.STRING.value: c.column_type_utf8 = f"char({self.length})" - c.is_nullable = bool(self.flags & FieldFlag.MAYBE_NULL.value) return c + class MFrmKey(CC): - flags: int = cfield(cs.Int16ul) # & 1 => uniq + flags: int = cfield(cs.Int16ul) # & 1 => uniq length: int = cfield(cs.Int16ul) parts_count: int = cfield(cs.Int8ul) algorithm: int = cfield(cs.Int8ul) @@ -115,7 +120,7 @@ def _post_parsed(self, stream, context, path): for i in range(self.parts_count): self.key_parts.append(MFrmKeyParts.parse_stream(stream)) - def to_dd_index(self, name:str, cols: typing.List[MFrmColumn]) -> Index: + def to_dd_index(self, name: str, cols: typing.List[MFrmColumn]) -> Index: idx = Index() idx.name = name idx.type = self.get_index_type() @@ -129,21 +134,25 @@ def to_dd_index(self, name:str, cols: typing.List[MFrmColumn]) -> Index: idx.elements.append(ie) if len(idx.elements) == 0: - pass # TODO: ROW_ID - - idx.elements.append(IndexElement( - length = 4294967295, - column_opx = len(cols), - hidden = True, - ordinal_position = len(cols) + 1, - )) - - idx.elements.append(IndexElement( - length = 4294967295, - column_opx = len(cols) + 1, - hidden = True, - ordinal_position = len(cols) + 2, - )) + pass # TODO: ROW_ID + + idx.elements.append( + IndexElement( + length=4294967295, + column_opx=len(cols), + hidden=True, + ordinal_position=len(cols) + 1, + ) + ) + + idx.elements.append( + IndexElement( + length=4294967295, + column_opx=len(cols) + 1, + hidden=True, + ordinal_position=len(cols) + 2, + ) + ) for i, c in enumerate(cols): if i in kp: @@ -189,11 +198,9 @@ def _post_parsed(self, stream, context, path): # table comment stream.seek(forminfo_offset + 46) table_comment_length = cs.Int8ub.parse_stream(stream) - if table_comment_length != 0xff: + if table_comment_length != 0xFF: table_comment = stream.read(table_comment_length) - - # other stream.seek(forminfo_offset + 258) column_count = cs.Int16ul.parse_stream(stream) @@ -215,7 +222,6 @@ def _post_parsed(self, stream, context, path): labels = stream.read(labels_length) comment = stream.read(comment_length) - if self.keyinfo_length == 0xFFFF: stream.seek(0x02F) self.keyinfo_length = cs.Int32ul.parse_stream(stream) @@ -239,8 +245,6 @@ def _post_parsed(self, stream, context, path): for i in range(key_count): keys.append(MFrmKey.parse_stream(key_stream)) - - key_extra = key_stream.read(key_extra_length) key_name, key_comment = key_extra.split(b"\x00", 1) @@ -251,7 +255,9 @@ def _post_parsed(self, stream, context, path): key_comments = [] for key in keys: if key.flags ^ 4096: - key_comments.append(CLenString(2, "little").parse_stream(comment_stream)) + key_comments.append( + CLenString(2, "little").parse_stream(comment_stream) + ) else: key_comments.append("") @@ -275,18 +281,15 @@ def _post_parsed(self, stream, context, path): column_names = names[1:-2].split(b"\xff") self.column_labels = [ - [v for v in g.split(b'\xff') if v] - for g in labels.split(b'\x00') if g + [v for v in g.split(b"\xff") if v] for g in labels.split(b"\x00") if g ] - _null_bytes = (null_fields + 1) // 8 for i, c_name in enumerate(column_names): cols[i].name = c_name.decode() - # 0: + orepr = orepr[:-1] + ", " + ", ".join(other) + ")" + return orepr + + klass.__repr__ = __repr__ + + def _set_show_field(self, k, v): + if getattr(self, SHOW_FIELDS, None) is None: + setattr(self, SHOW_FIELDS, []) + + setattr(self, k, v) + if k not in self._show_fields: + self._show_fields.append(k) + + klass._set_show_field = _set_show_field + return klass @@ -59,7 +84,9 @@ def __init__(self, cls, subcon): def _parse(self, stream, context, path): obj = super()._parse(stream, context, path) if getattr(obj, "_post_parsed", None) is not None: - obj._post_parsed(stream, context, path) + v = obj._post_parsed(stream, context, path) + if v is not None: + setattr(obj, "_post_value", v) return obj def _build(self, obj, stream, context, path): @@ -195,6 +222,7 @@ def cfield(subcon, default=None): ) return csfield(subcon) + class CLenString(cs.Construct): def __init__(self, len_size, byte_order="big"): super().__init__() @@ -206,11 +234,13 @@ def _parse(self, stream, context, path): data_size = int.from_bytes(len_bytes, self._byte_order) data = stream.read(data_size) return data + def _build(self, obj, stream, context, path): data_size = len(obj) stream.write(int.to_bytes(data_size, self._len_size, self._byte_order)) stream.write(obj) + class IntFromBytes(cs.Construct): def __init__(self, length, byte_order="big"): super().__init__() @@ -230,6 +260,13 @@ def _sizeof(self, context, path): if __name__ == "__main__": + @dataclasses.dataclass + class D: + age: int + name: str + + D.__str__ + class DD(CC): f: int = cfield(cs.Int16ub) diff --git a/src/pyinnodb/sdi/table.py b/src/pyinnodb/sdi/table.py index ade5f1b..469d062 100644 --- a/src/pyinnodb/sdi/table.py +++ b/src/pyinnodb/sdi/table.py @@ -6,6 +6,7 @@ import re import sys + if sys.version_info.minor >= 9: from functools import cache else: @@ -31,6 +32,7 @@ column_type_size = re.compile("[^(]*[(]([^)]*)[)]") + class Lob: def __init__(self, data, off_page): self.data = data @@ -147,7 +149,7 @@ def index_prefix(self, ie: IndexElement): return 0, False @cache - def version_valid(self, data_schema_version) -> bool : + def version_valid(self, data_schema_version) -> bool: va = int(self.private_data.get("version_added", 0)) vd = int(self.private_data.get("version_dropped", 0)) if data_schema_version < va: @@ -156,7 +158,6 @@ def version_valid(self, data_schema_version) -> bool : return False return True - @property @cache def is_hidden_from_user(self): @@ -168,7 +169,10 @@ def is_hidden_from_user(self): @property @cache def is_instant_col(self): - return "version_added" in self.private_data or "version_dropped" in self.private_data + return ( + "version_added" in self.private_data + or "version_dropped" in self.private_data + ) @property @cache @@ -246,7 +250,7 @@ def size(self): elif dtype == DDColumnType.SET: # bit mask return int((len(self.elements) + 7) / 8) - elif dtype == DDColumnType.STRING: # if column don't have varsize + elif dtype == DDColumnType.STRING: # if column don't have varsize sizes = column_type_size.findall(self.column_type_utf8) if len(sizes) == 0: return 0 @@ -414,13 +418,17 @@ def read_data(self, stream, size=None): elif dtype == DDColumnType.DECIMAL or dtype == DDColumnType.NEWDECIMAL: return self._read_new_decimal(stream) elif dtype == DDColumnType.STRING: - return self._read_varchar(stream, dsize).decode(errors='replace').strip() + return self._read_varchar(stream, dsize).decode(errors="replace").strip() elif dtype == DDColumnType.VARCHAR: - return self._read_varchar(stream, dsize).decode(errors='replace') - elif dtype in [DDColumnType.LONG_BLOB, DDColumnType.MEDIUM_BLOB, DDColumnType.BLOB]: - return self._read_varchar(stream, dsize).decode(errors='replace') + return self._read_varchar(stream, dsize).decode(errors="replace") + elif dtype in [ + DDColumnType.LONG_BLOB, + DDColumnType.MEDIUM_BLOB, + DDColumnType.BLOB, + ]: + return self._read_varchar(stream, dsize).decode(errors="replace") elif dtype == DDColumnType.TINY_BLOB: - return self._read_varchar(stream, dsize).decode(errors='replace') + return self._read_varchar(stream, dsize).decode(errors="replace") elif dtype == DDColumnType.TIME2: time_data = MTime2.parse_stream(stream) time_data.parse_fsp(stream, dsize - 3) # 3 = MTime2.sizeof() @@ -452,9 +460,9 @@ def read_data(self, stream, size=None): mask = self._read_int(stream, dsize, False) r = [] for m, v in self.element_map.items(): - if mask & (1 << (m-1)): - r.append(b64decode(v).decode(errors='replace')) - return ','.join(r) + if mask & (1 << (m - 1)): + r.append(b64decode(v).decode(errors="replace")) + return ",".join(r) elif dtype == DDColumnType.JSON: # data = stream.read(dsize) data = self._read_varchar(stream, dsize) @@ -462,7 +470,7 @@ def read_data(self, stream, size=None): data = data.data try: if len(data) == 0: - return 'null' + return "null" v = MJson.parse_stream(io.BufferedReader(io.BytesIO(data))) return v.get_json() except Exception as e: @@ -568,7 +576,9 @@ class ForeignKeys: elements: typing.List[ForeignElement] = None def __post_init__(self): - elements: typing.List[ForeignElement] = [ForeignElement(**c) for c in self.elements] + elements: typing.List[ForeignElement] = [ + ForeignElement(**c) for c in self.elements + ] self.elements = elements def gen(self, column_name: typing.List[str]): @@ -656,7 +666,6 @@ class Table: collation_id: int = 0 # tablespace_ref: ? - @property @cache def private_data(self): @@ -761,7 +770,7 @@ def get_disk_data_layout(self): data_layout_col = [] for i, c in enumerate(self.columns): data_layout_col.append((c, c_l.get(i, 4294967295))) - data_layout_col.sort(key = lambda c: c[0].private_data.get("physical_pos", 0)) + data_layout_col.sort(key=lambda c: c[0].private_data.get("physical_pos", 0)) return data_layout_col data_layout_col = [] @@ -774,7 +783,7 @@ def get_disk_data_layout(self): for ie in idx.elements: col = self.columns[ie.column_opx] prekey_len, ok = col.index_prefix(ie) - if ok: # prefix + if ok: # prefix data_layout_col.append((col, prekey_len)) else: data_layout_col.append((col, ie.length)) @@ -784,7 +793,7 @@ def get_disk_data_layout(self): for ie in idx.elements: col = self.columns[ie.column_opx] prekey_len, ok = col.index_prefix(ie) - if ok: # prefix + if ok: # prefix data_layout_col.append((col, prekey_len)) else: data_layout_col.append((col, ie.length)) @@ -794,7 +803,7 @@ def get_disk_data_layout(self): def build_primary_key_bytes(self, values) -> bytes: cols = self.get_disk_data_layout() - cols = [c for c in cols if c[1] != ~0&0xffffffff] + cols = [c for c in cols if c[1] != ~0 & 0xFFFFFFFF] buf = io.BytesIO() for i, c in enumerate(cols): if DDColumnType(c[0].type).is_int_number(): @@ -829,40 +838,70 @@ def search(self, f, primary_key, hidden_col): root_page_no = int(self.indexes[0].private_data.get("root", 4)) f.seek(root_page_no * const.PAGE_SIZE) root_index_page = MIndexPage.parse_stream(f) - first_leaf_page = root_index_page.get_first_leaf_page(f, self.get_primary_key_col()) + first_leaf_page = root_index_page.get_first_leaf_page( + f, self.get_primary_key_col() + ) if isinstance(primary_key, tuple): - primary_key = (self.build_primary_key_bytes(primary_key)) + primary_key = self.build_primary_key_bytes(primary_key) else: - primary_key = (self.build_primary_key_bytes((primary_key,))) - value_parser = MIndexPage.default_value_parser(self, hidden_col=hidden_col, transfter=lambda id: id) + primary_key = self.build_primary_key_bytes((primary_key,)) + value_parser = MIndexPage.default_value_parser( + self, hidden_col=hidden_col, transfter=lambda id: id + ) while first_leaf_page != 4294967295: f.seek(first_leaf_page * const.PAGE_SIZE) index_page = MIndexPage.parse_stream(f) f.seek(first_leaf_page * const.PAGE_SIZE) - page_dir_idx, match = index_page.binary_search_with_page_directory(primary_key, f) - f.seek(first_leaf_page * const.PAGE_SIZE + index_page.page_directory[page_dir_idx] - 5) + page_dir_idx, match = index_page.binary_search_with_page_directory( + primary_key, f + ) + f.seek( + first_leaf_page * const.PAGE_SIZE + + index_page.page_directory[page_dir_idx] + - 5 + ) end_rh = MRecordHeader.parse_stream(f) - if match and const.RecordType(end_rh.record_type) == const.RecordType.Conventional: # the key + logging.debug("end_rh is %s", end_rh) + if ( + match + and const.RecordType(end_rh.record_type) + == const.RecordType.Conventional + ): # the key return value_parser(end_rh, f) - elif match and const.RecordType(end_rh.record_type) == const.RecordType.NodePointer: + elif ( + match + and const.RecordType(end_rh.record_type) == const.RecordType.NodePointer + ): record_key = f.read(len(primary_key)) page_num = f.read(4) first_leaf_page = int.from_bytes(page_num, "big") else: - f.seek(first_leaf_page * const.PAGE_SIZE + index_page.page_directory[page_dir_idx+1] - 5) + f.seek( + first_leaf_page * const.PAGE_SIZE + + index_page.page_directory[page_dir_idx + 1] + - 5 + ) start_rh = MRecordHeader.parse_stream(f) owned = end_rh.num_record_owned - first_leaf_page = 4294967295 # no match if cur page is leaf then break loop - for i in range(owned+1): + first_leaf_page = ( + 4294967295 # no match if cur page is leaf then break loop + ) + for i in range(owned + 1): cur = f.tell() - if const.RecordType(start_rh.record_type) == const.RecordType.Conventional: + if ( + const.RecordType(start_rh.record_type) + == const.RecordType.Conventional + ): record_primary_key = f.read(len(primary_key)) if record_primary_key == primary_key: f.seek(-len(primary_key), 1) v = value_parser(start_rh, f) return v - elif const.RecordType(start_rh.record_type) == const.RecordType.NodePointer: + elif ( + const.RecordType(start_rh.record_type) + == const.RecordType.NodePointer + ): record_key = f.read(len(primary_key)) if record_key > primary_key: if i == 1: @@ -878,28 +917,33 @@ def search(self, f, primary_key, hidden_col): f.seek(start_rh.next_record_offset - 5, 1) start_rh = MRecordHeader.parse_stream(f) - def iter_record(self, f, hidden_col=False, garbage=False, transfter=None): root_page_no = int(self.indexes[0].private_data.get("root", 4)) f.seek(root_page_no * const.PAGE_SIZE) root_index_page = MIndexPage.parse_stream(f) - first_leaf_page = root_index_page.get_first_leaf_page(f, self.get_primary_key_col()) + first_leaf_page = root_index_page.get_first_leaf_page( + f, self.get_primary_key_col() + ) if first_leaf_page is None: return - default_value_parser = MIndexPage.default_value_parser(self, hidden_col=hidden_col, transfter=transfter) + default_value_parser = MIndexPage.default_value_parser( + self, hidden_col=hidden_col, transfter=transfter + ) result = [] while first_leaf_page != 4294967295: f.seek(first_leaf_page * const.PAGE_SIZE) index_page = MIndexPage.parse_stream(f) - result.extend(index_page.iterate_record_header(f, value_parser = default_value_parser, garbage=garbage)) + result.extend( + index_page.iterate_record_header( + f, value_parser=default_value_parser, garbage=garbage + ) + ) first_leaf_page = index_page.fil.next_page - return result - def __post_init__(self): cols: typing.List[Column] = [Column(**c) for c in self.columns] self.columns = cols @@ -1064,36 +1108,40 @@ def should_ext(): column_spec_size = {"DB_ROW_ID": 6, "DB_TRX_ID": 6, "DB_ROLL_PTR": 7} + def get_sys_col(name, pos): if name == "DB_TRX_ID": return Column( - name = "DB_TRX_ID", - type = 10, - hidden = 2, - ordinal_position = pos, - char_length = 6, - has_no_default = False, - default_value = "", - default_value_utf8_null = True, - collation_id = 63, - is_explicit_collation = False) + name="DB_TRX_ID", + type=10, + hidden=2, + ordinal_position=pos, + char_length=6, + has_no_default=False, + default_value="", + default_value_utf8_null=True, + collation_id=63, + is_explicit_collation=False, + ) elif name == "DB_ROLL_PTR": return Column( - name = name, - type = 9, - hidden = 2, - ordinal_position = pos, - char_length = 6, - has_no_default = False, - default_value = "", - default_value_utf8_null = True, - collation_id = 63, - is_explicit_collation = False) + name=name, + type=9, + hidden=2, + ordinal_position=pos, + char_length=6, + has_no_default=False, + default_value="", + default_value_utf8_null=True, + collation_id=63, + is_explicit_collation=False, + ) pass elif name == "DB_ROW_ID": pass return + if __name__ == "__main__": import json diff --git a/tests/containerOp.py b/tests/containerOp.py index a8906ed..1bd8869 100644 --- a/tests/containerOp.py +++ b/tests/containerOp.py @@ -10,7 +10,6 @@ from .context import * - class ContainerOp(object): def __init__( self, @@ -54,7 +53,9 @@ def containerOp(): try: mContainer = MySqlContainer("mysql:8.0.35") - mContainer.with_volume_mapping(os.getcwd() + "/" + "datadir_test", "/var/lib/mysql", "rw") + mContainer.with_volume_mapping( + os.getcwd() + "/" + "datadir_test", "/var/lib/mysql", "rw" + ) mysql = mContainer.__enter__() # with MySqlContainer("mysql:8.0.35") as mysql: engine = sqlalchemy.create_engine(mysql.get_connection_url()) diff --git a/tests/context.py b/tests/context.py index 5a1b3d3..807a938 100644 --- a/tests/context.py +++ b/tests/context.py @@ -31,15 +31,18 @@ MysqlFile = namedtuple("MysqlFile", "mysql8ibd mysql5ibd mysql5frm mysql8instantibd") + def download_test_file(): import requests import tarfile + testfile = "https://github.com/user-attachments/files/16426987/mysql_test.tgz" resp = requests.get(testfile) fileobj = BytesIO(resp.content) tar = tarfile.open(fileobj=fileobj) tar.extractall(test_mysql8_ibd.parent.parent) + @pytest.fixture def mysqlfile(): if not test_mysql8_ibd.exists(): @@ -56,5 +59,3 @@ def mysqlfile(): mysql5frm=ff5, mysql8instantibd=f8ins, ) - - diff --git a/tests/test_instant.py b/tests/test_instant.py index faaef89..feddd99 100644 --- a/tests/test_instant.py +++ b/tests/test_instant.py @@ -3,6 +3,7 @@ from pyinnodb.disk_struct.index import MSDIPage from pyinnodb.disk_struct.fsp import MFspPage + def test_parse_mysql8_instant(mysqlfile: MysqlFile): f = mysqlfile.mysql8instantibd fsp_page = MFspPage.parse_stream(f) @@ -13,8 +14,22 @@ def test_parse_mysql8_instant(mysqlfile: MysqlFile): result = dd_object.iter_record(f, transfter=lambda x: x) cls = dd_object.DataClass - r1 = cls(id=1, name='original record', drop2='original drop 2 column', add1=None, add2='add2 default', drop1=99) - r2 = cls(id=2, name='insert after alter', drop2='drop2 after alter', add1=None, add2='add2', drop1=99) + r1 = cls( + id=1, + name="original record", + drop2="original drop 2 column", + add1=None, + add2="add2 default", + drop1=99, + ) + r2 = cls( + id=2, + name="insert after alter", + drop2="drop2 after alter", + add1=None, + add2="add2", + drop1=99, + ) assert len(result) == 2 assert r1 == result[0] assert r2 == result[1] diff --git a/tests/test_parse.py b/tests/test_parse.py index db3d459..96111bf 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -7,33 +7,47 @@ timeformat = "%Y-%m-%d %H:%M:%S" + def test_parse_mysql8(mysqlfile: MysqlFile): fsp_page = MFspPage.parse_stream(mysqlfile.mysql8ibd) mysqlfile.mysql8ibd.seek(fsp_page.sdi_page_no * const.PAGE_SIZE) sdi_page = MSDIPage.parse_stream(mysqlfile.mysql8ibd) dd_object = Table(**sdi_page.ddl(mysqlfile.mysql8ibd, 0)["dd_object"]) - + result = dd_object.iter_record(mysqlfile.mysql8ibd, transfter=lambda x: x) cls = dd_object.DataClass - test_data = cls(id=1, BIGINT=98283201, - BIT = 1, DATETIME=datetime.datetime.strptime('2024-01-01 09:00:01',timeformat), - DOUBLE = 3.1415926, FLOAT = 6.189000129699707 , - INTEGER = 8621, DECIMAL=Decimal('910.79'), - LONGBLOB= 'x' * 100, - LONGTEXT = 'g' * 3, - MEDIUMBLOB = None, - MEDIUMINT = 999999, - MEDIUMTEXT = None, - NUMERIC = Decimal('11'), REAL = 1092.892, - SMALLINT = 981, TEXT = "TEXT", - TIME = datetime.timedelta(seconds=11040), - TIMESTAMP = datetime.datetime.strptime("2024-07-24 09:05:28", timeformat).replace(tzinfo=datetime.timezone.utc), - YEAR = 2024, ENUM = b"a", SET = "a,b,c", - TINYBLOB = "TINYBLOB", - TINYINT = 99, TINYTEXT = "TINYTEXT", - CHAR = "09283012", VARBINARY = "VARBINARY", - int_def_col = 42, str_def_col='world', + test_data = cls( + id=1, + BIGINT=98283201, + BIT=1, + DATETIME=datetime.datetime.strptime("2024-01-01 09:00:01", timeformat), + DOUBLE=3.1415926, + FLOAT=6.189000129699707, + INTEGER=8621, + DECIMAL=Decimal("910.79"), + LONGBLOB="x" * 100, + LONGTEXT="g" * 3, + MEDIUMBLOB=None, + MEDIUMINT=999999, + MEDIUMTEXT=None, + NUMERIC=Decimal("11"), + REAL=1092.892, + SMALLINT=981, + TEXT="TEXT", + TIME=datetime.timedelta(seconds=11040), + TIMESTAMP=datetime.datetime.strptime("2024-07-24 09:05:28", timeformat).replace( + tzinfo=datetime.timezone.utc + ), + YEAR=2024, + ENUM=b"a", + SET="a,b,c", + TINYBLOB="TINYBLOB", + TINYINT=99, + TINYTEXT="TINYTEXT", + CHAR="09283012", + VARBINARY="VARBINARY", + int_def_col=42, + str_def_col="world", ) assert len(result) == 2 assert test_data == result[0] -