From a86259037022208a72ab6265195c441707b4a10c Mon Sep 17 00:00:00 2001 From: BodgeMaster <> Date: Mon, 15 Aug 2022 05:15:17 +0200 Subject: [PATCH] lib/nbt: Start implementing the NBT validator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In theory, this is it. It’s just missing the portion that deals with lists and unit tests. Both will each likely require similar effort to this. --- src/lib/nbt.cpp | 122 ++++++++++++++++++++++++++++++++++++++++++++---- src/lib/nbt.hpp | 2 +- 2 files changed, 115 insertions(+), 9 deletions(-) diff --git a/src/lib/nbt.cpp b/src/lib/nbt.cpp index c83afa4..671c46f 100644 --- a/src/lib/nbt.cpp +++ b/src/lib/nbt.cpp @@ -385,9 +385,9 @@ namespace NBT { // be determined easily by looking at the contained data type and // size information but cases like string lists or compound lists // are significantly more difficult to deal with. Parsing their - // contents requires special attention anyway due the tag headers - // of contained tags being absent so they may as well get their - // own function for this as well. + // contents requires special attention anyway due to the tag headers + // of contained tags being absent so they may as well get treated + // separately for this as well. ErrorOr totalTagSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) { uint8_t nextTag; if (dataSize <= currentPosition) { @@ -569,10 +569,116 @@ namespace NBT { } } - bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition){ - //TODO: find out the size of the next tag - //TODO: consume tag - //TODO: recurse if tag compound and return if tag end - return false; + bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){ + if (initialPosition >= dataSize) { + // Yes, this *could* return an instance of ErrorOr with + // ErrorCodes::OVERRUN but we only care to know if what is + // at that position is valid NBT which it clearly isn't according + // to the original spec. + if (processedDataSize!=nullptr) *processedDataSize=0; + return false; + + // An interesting question at this point is whether we should + // consider empty input valid or invalid NBT data. + // + // The original spec says that the top-most tag is always a + // compound (or in more recent times, the Microsoft-commercialized + // in-game-purchase-enabling version also allows list tags) + // which automatically means that no data is invalid data... + // I don't see a reason why having a different tag as the top-most + // tag shouldn't be valid NBT in which case we have to face the + // question whether no data is invalid or just empty NBT data. + // + // This seems like a reasonable extension to the spec to me and + // it should be backwards compatible AFAIK. + // + // - BodgeMaster + } + uint64_t currentPosition = initialPosition; + #define return if (processedDataSize!=nullptr) *processedDataSize = currentPosition-initialPosition; return + while (currentPosition nextTagSize = helper::totalTagSize(data, dataSize, currentPosition); + if (nextTagSize.isError) { + if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) { + uint64_t* processedTagSize = new uint64_t; + *processedTagSize = 0; + + // attempt parsing the name + ErrorOr tagName = helper::readString(data, dataSize, currentPosition+1); + if (tagName.isError) return false; + + if (data[currentPosition]==TagType::LIST) { + //TODO: handle list + } + if (data[currentPosition]==TagType::COMPOUND) { + // seek to the start of the compound's contents + // + // there is no way this is an error bc it gets + // checked while trying to parse the string above + int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value; + + if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 1, processedTagSize)) return false; + *processedTagSize += (uint64_t) nameSize + 1; + } + currentPosition += *processedTagSize; + continue; + } + return false; + } + + if (currentPosition + nextTagSize.value > dataSize) return false; + + // recursion abort condition + if (data[currentPosition]==TagType::END) return true; + + // nameSize cannot be an error here bc it got checked in + // nextTagSize() already + int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value; + + // attempt parsing the name + // + // This shouldn't matter too much here as the only error condition + // the parser function deals with rn is an overrun which is already + // being guarded against with + // if (currentPosition + nextTagSize.value > dataSize) return false; + // It might, however, turn out to be a useful check in the future. + ErrorOr name = helper::readString(data, dataSize, currentPosition+1); + if (name.isError) return false; + + switch (data[0]) { + case TagType::INT8: + case TagType::INT16: + case TagType::INT32: + case TagType::INT64: + case TagType::FLOAT: + case TagType::DOUBLE: + case TagType::INT8_ARRAY: + break; + case TagType::STRING: { + // attempt parsing the content + // + // This shouldn't matter too much here as the only + // error condition the parser function deals with rn is + // an overrun which is already being guarded against with + // if (currentPosition + nextTagSize.value > dataSize) return false; + // It might, however, turn out to be a useful check + // in the future. + // + // type byte + two name size bytes = 3 + ErrorOr content = helper::readString(data, dataSize, currentPosition+nameSize+3); + if (content.isError) return false; + break; + } + case TagType::INT32_ARRAY: + case TagType::INT64_ARRAY: + break; + default: + return false; + } + + currentPosition += nextTagSize.value; + } + return true; + #undef return } } diff --git a/src/lib/nbt.hpp b/src/lib/nbt.hpp index 3da58c9..81a735e 100644 --- a/src/lib/nbt.hpp +++ b/src/lib/nbt.hpp @@ -113,5 +113,5 @@ namespace NBT { bool validate(uint8_t data[]); }; - bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition=0); + bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition, uint64_t* processedDataSize=nullptr); }