From cdc23e74689d16da76c7ad498357308efee5edb1 Mon Sep 17 00:00:00 2001 From: BodgeMaster <> Date: Sat, 15 Oct 2022 23:05:26 +0200 Subject: [PATCH] lib/nbt: Implement parser --- src/lib/nbt.cpp | 365 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib/nbt.hpp | 1 + 2 files changed, 366 insertions(+) diff --git a/src/lib/nbt.cpp b/src/lib/nbt.cpp index 6b1e540..5c09a60 100644 --- a/src/lib/nbt.cpp +++ b/src/lib/nbt.cpp @@ -1168,6 +1168,371 @@ namespace NBT { } + // the same comment about blindly passing up error codes applies to this function + // FIXME: memory leak when returning errors + ErrorOr> deserializeRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) { + std::vector contents; + // get contained data length by reading it manually because + // the function that does it normally can't deal with + // headerless tags + // + // add one byte to position to skip the type byte + ErrorOr elementCount = Helper::readInt32(data, dataSize, initialPosition+1); + if (elementCount.isError) { + return ErrorOr>(true, elementCount.errorCode); + } + + uint8_t contentType = data[initialPosition]; + // contained type byte + 4 length bytes = 5 + *processedDataSize = 5; + switch (contentType) { + case TagType::END: { + // everything except content has been touched at this point + // and a list of end tags has no content that could be read + for (int32_t i=0; i nextInt = Helper::readInt8(data, dataSize, initialPosition+*processedDataSize); + if (nextInt.isError) { + return ErrorOr>(true, nextInt.errorCode); + } + contents.push_back(new Tag::Int8("", nextInt.value)); + // The below code would produce a warning on GCC and Clang + // about the computed value not being used. While this does + // apply inside this function, it is ultimately not true + // as the pointer is used both inside and outside of the + // function. + *processedDataSize += 1; + } + break; + } + case TagType::INT16: { + for (int32_t i=0; i nextInt = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize); + if (nextInt.isError) { + return ErrorOr>(true, nextInt.errorCode); + } + contents.push_back(new Tag::Int16("", nextInt.value)); + *processedDataSize += 2; + } + break; + } + case TagType::INT32: { + for (int32_t i=0; i nextInt = Helper::readInt32(data, dataSize, initialPosition+*processedDataSize); + if (nextInt.isError) { + return ErrorOr>(true, nextInt.errorCode); + } + contents.push_back(new Tag::Int32("", nextInt.value)); + *processedDataSize += 4; + } + break; + } + case TagType::FLOAT: { + for (int32_t i=0; i nextFloat = Helper::readFloat(data, dataSize, initialPosition+*processedDataSize); + if (nextFloat.isError) { + return ErrorOr>(true, nextFloat.errorCode); + } + contents.push_back(new Tag::Float("", nextFloat.value)); + *processedDataSize += 4; + } + break; + } + case TagType::INT64: { + for (int32_t i=0; i nextInt = Helper::readInt64(data, dataSize, initialPosition+*processedDataSize); + if (nextInt.isError) { + return ErrorOr>(true, nextInt.errorCode); + } + contents.push_back(new Tag::Int64("", nextInt.value)); + *processedDataSize += 8; + } + break; + } + case TagType::DOUBLE: { + for (int32_t i=0; i nextDouble = Helper::readDouble(data, dataSize, initialPosition+*processedDataSize); + if (nextDouble.isError) { + return ErrorOr>(true, nextDouble.errorCode); + } + contents.push_back(new Tag::Double("", nextDouble.value)); + *processedDataSize += 8; + } + break; + } + case TagType::INT8_ARRAY: { + for (int32_t i=0; i> nextArray = Helper::readInt8Array(data, dataSize, initialPosition+*processedDataSize); + if (nextArray.isError) { + return ErrorOr>(true, nextArray.errorCode); + } + contents.push_back(new Tag::Int8Array("", nextArray.value)); + *processedDataSize += (uint64_t) nextArray.value.size(); + } + break; + } + case TagType::STRING: { + for (int32_t i=0; i nextString = Helper::readString(data, dataSize, initialPosition+*processedDataSize); + if (nextString.isError) { + return ErrorOr>(true, nextString.errorCode); + } + contents.push_back(new Tag::String("", nextString.value)); + // this cannot be an error because it just got read + int16_t nextStringSize = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize).value; + *processedDataSize += (uint64_t) nextStringSize + 2; + } + break; + } + case TagType::LIST: { + uint64_t* containedDataSize = new uint64_t; + for (int32_t i=0; i> nextListContents = deserializeRawListContents(data, dataSize, initialPosition+*processedDataSize, containedDataSize); + if (nextListContents.isError) { + delete containedDataSize; + return ErrorOr>(true, nextListContents.errorCode); + } + contents.push_back(new Tag::List("", nextListContents.value)); + *processedDataSize += *containedDataSize; + } + delete containedDataSize; + break; + } + case TagType::COMPOUND: { + uint64_t* containedDataSize = new uint64_t; + for (int32_t i=0; i> nextCompoundData = deserialize(data, dataSize, initialPosition+*processedDataSize, containedDataSize); + if (nextCompoundData.isError) { + delete containedDataSize; + return ErrorOr>(true, nextCompoundData.errorCode); + } + contents.push_back(new Tag::Compound("", nextCompoundData.value)); + *processedDataSize += *containedDataSize; + } + delete containedDataSize; + break; + } + case TagType::INT32_ARRAY: { + for (int32_t i=0; i> nextArray = Helper::readInt32Array(data, dataSize, initialPosition+*processedDataSize); + if (nextArray.isError) { + return ErrorOr>(true, nextArray.errorCode); + } + contents.push_back(new Tag::Int32Array("", nextArray.value)); + *processedDataSize += (uint64_t) nextArray.value.size() * 4; + } + break; + } + case TagType::INT64_ARRAY: { + for (int32_t i=0; i> nextArray = Helper::readInt64Array(data, dataSize, initialPosition+*processedDataSize); + if (nextArray.isError) { + return ErrorOr>(true, nextArray.errorCode); + } + contents.push_back(new Tag::Int64Array("", nextArray.value)); + *processedDataSize += (uint64_t) nextArray.value.size() * 8; + } + break; + } + default: + return ErrorOr>(true, ErrorCodes::INVALID_TYPE); + } + return ErrorOr>(contents); + } + + // comment about blindly passing up error codes applies here + // + // The return value of this function is a vector of tags + // instead of a compound tag due to a spec extension that allows + // for any bare tag to be valid NBT data without a containing + // compound tag. This also just makes the implementation easier. + ErrorOr> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){ + if (initialPosition >= dataSize) { + if (processedDataSize!=nullptr) *processedDataSize=0; + return ErrorOr>(true, ErrorCodes::OUT_OF_RANGE); + + // An interesting question at this point is whether we should + // consider empty input valid or invalid NBT data. + // + // The original spec says that the top-most tag is always a + // compound (or in more recent times, the Microsoft-commercialized + // in-game-purchase-enabling version also allows list tags) + // which automatically means that no data is invalid data... + // I don't see a reason why having a different tag as the top-most + // tag shouldn't be valid NBT in which case we have to face the + // question whether no data is invalid or just empty NBT data. + // + // This seems like a reasonable extension to the spec to me and + // it should be backwards compatible AFAIK. + // + // - BodgeMaster + } + + std::vector tags = std::vector(); + ErrorOr> returnValue; + uint64_t currentPosition = initialPosition; + while (currentPosition nextTagSize = Helper::totalTagSize(data, dataSize, currentPosition); + if (nextTagSize.isError) { + if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) { + ErrorOr tagName = Helper::readString(data, dataSize, currentPosition+1); + if (tagName.isError) { + returnValue = ErrorOr>(true, tagName.errorCode); + goto returnNow; + } + + // used seek to the start of the list's/compound’s contents + // + // there is no way this is an error bc it gets + // checked while trying to parse the string above + int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value; + + uint64_t* processedTagSize = new uint64_t; + *processedTagSize = 0; + + if (data[currentPosition]==TagType::LIST) { + // type byte + two name size bytes = 3 + ErrorOr> listData = deserializeRawListContents(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize); + if (listData.isError) { + delete processedTagSize; + returnValue = ErrorOr>(true, listData.errorCode); + goto returnNow; + } + tags.push_back(new Tag::List(tagName.value, listData.value)); + *processedTagSize += (uint64_t) nameSize + 3; + } + if (data[currentPosition]==TagType::COMPOUND) { + // type byte + two name size bytes = 3 + ErrorOr> compoundData = deserialize(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize); + if (compoundData.isError) { + delete processedTagSize; + returnValue = ErrorOr>(true, compoundData.errorCode); + goto returnNow; + } + tags.push_back(new Tag::Compound(tagName.value, compoundData.value)); + *processedTagSize += (uint64_t) nameSize + 3; + } + currentPosition += *processedTagSize; + + delete processedTagSize; + continue; + } + returnValue = ErrorOr>(true, nextTagSize.errorCode); + goto returnNow; + } + + if (currentPosition + nextTagSize.value > dataSize) { + returnValue = ErrorOr>(true, ErrorCodes::OVERRUN); + goto returnNow; + } + + // recursion abort condition + if (data[currentPosition]==TagType::END) { + // not appending an end tag as it is built into + // the compound anyway + currentPosition++; + returnValue = ErrorOr>(tags); + goto returnNow; + } + + // nameSize cannot be an error here bc it got checked in + // nextTagSize() already + int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value; + + ErrorOr name = Helper::readString(data, dataSize, currentPosition+1); + if (name.isError) { + returnValue = ErrorOr>(true, name.errorCode); + goto returnNow; + } + + // Overrun / out of range errors have already been ruled out by + // checking the tag size against the total amount of data. + switch (data[currentPosition]) { + case TagType::INT8: { + int8_t content = Helper::readInt8(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Int8(name.value, content)); + break; + } + case TagType::INT16: { + int16_t content = Helper::readInt16(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Int16(name.value, content)); + break; + } + case TagType::INT32: { + int32_t content = Helper::readInt32(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Int32(name.value, content)); + break; + } + case TagType::INT64: { + int64_t content = Helper::readInt64(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Int64(name.value, content)); + break; + } + case TagType::FLOAT: { + float content = Helper::readFloat(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Float(name.value, content)); + break; + } + case TagType::DOUBLE: { + double content = Helper::readDouble(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Double(name.value, content)); + break; + } + case TagType::INT8_ARRAY: { + std::vector content = Helper::readInt8Array(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Int8Array(name.value, content)); + break; + } + case TagType::STRING: { + ErrorOr content = Helper::readString(data, dataSize, currentPosition+nameSize+3); + if (content.isError) { + returnValue = ErrorOr>(true, content.errorCode); + goto returnNow; + } + tags.push_back(new Tag::String(name.value, content.value)); + break; + } + case TagType::INT32_ARRAY: { + std::vector content = Helper::readInt32Array(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Int32Array(name.value, content)); + break; + } + case TagType::INT64_ARRAY: { + std::vector content = Helper::readInt64Array(data, dataSize, currentPosition+nameSize+3).value; + tags.push_back(new Tag::Int64Array(name.value, content)); + break; + } + default: { + returnValue = ErrorOr>(true, ErrorCodes::UNKNOWN); + goto returnNow; + } + } + + currentPosition += nextTagSize.value; + } + returnValue = ErrorOr>(tags); + goto returnNow; + + returnNow: + if (processedDataSize!=nullptr) { + *processedDataSize = currentPosition-initialPosition; + } + if (returnValue.isError) { + for (uint64_t i=0; i> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr); bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr); }