lib/nbt: Start implementing the NBT validator

In theory, this is it. It’s just missing the portion that deals with lists and unit tests. Both will each likely require similar effort to this.
2022-08-15 05:15:17 +02:00 · 2022-08-15 05:15:17 +02:00 · a862590370
parent 3995e97f03
commit a862590370
2 changed files with 115 additions and 9 deletions
--- a/src/lib/nbt.cpp
+++ b/src/lib/nbt.cpp
@ -385,9 +385,9 @@ namespace NBT {
        // be determined easily by looking at the contained data type and
        // size information but cases like string lists or compound lists
        // are significantly more difficult to deal with. Parsing their
-        // contents requires special attention anyway due the tag headers
-        // of contained tags being absent so they may as well get their
-        // own function for this as well.
+        // contents requires special attention anyway due to the tag headers
+        // of contained tags being absent so they may as well get treated
+        // separately for this as well.
        ErrorOr<uint64_t> totalTagSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            uint8_t nextTag;
            if (dataSize <= currentPosition) {
@ -569,10 +569,116 @@ namespace NBT {
        }
    }

-    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition){
-        //TODO: find out the size of the next tag
-        //TODO: consume tag
-        //TODO: recurse if tag compound and return if tag end
-        return false;
+    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
+        if (initialPosition >= dataSize) {
+            // Yes, this *could* return an instance of ErrorOr with
+            // ErrorCodes::OVERRUN but we only care to know if what is
+            // at that position is valid NBT which it clearly isn't according
+            // to the original spec.
+            if (processedDataSize!=nullptr) *processedDataSize=0;
+            return false;
+
+            // An interesting question at this point is whether we should
+            // consider empty input valid or invalid NBT data.
+            //
+            // The original spec says that the top-most tag is always a
+            // compound (or in more recent times, the Microsoft-commercialized
+            // in-game-purchase-enabling version also allows list tags)
+            // which automatically means that no data is invalid data...
+            // I don't see a reason why having a different tag as the top-most
+            // tag shouldn't be valid NBT in which case we have to face the
+            // question whether no data is invalid or just empty NBT data.
+            //
+            // This seems like a reasonable extension to the spec to me and
+            // it should be backwards compatible AFAIK.
+            //
+            // - BodgeMaster
+        }
+        uint64_t currentPosition = initialPosition;
+        #define return if (processedDataSize!=nullptr) *processedDataSize = currentPosition-initialPosition; return
+        while (currentPosition<dataSize) {
+            ErrorOr<uint64_t> nextTagSize = helper::totalTagSize(data, dataSize, currentPosition);
+            if (nextTagSize.isError) {
+                if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
+                    uint64_t* processedTagSize = new uint64_t;
+                    *processedTagSize = 0;
+
+                    // attempt parsing the name
+                    ErrorOr<tiny_utf8::string> tagName = helper::readString(data, dataSize, currentPosition+1);
+                    if (tagName.isError) return false;
+
+                    if (data[currentPosition]==TagType::LIST) {
+                        //TODO: handle list
+                    }
+                    if (data[currentPosition]==TagType::COMPOUND) {
+                        // seek to the start of the compound's contents
+                        //
+                        // there is no way this is an error bc it gets
+                        // checked while trying to parse the string above
+                        int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
+
+                        if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 1, processedTagSize)) return false;
+                        *processedTagSize += (uint64_t) nameSize + 1;
+                    }
+                    currentPosition += *processedTagSize;
+                    continue;
+                }
+                return false;
+            }
+
+            if (currentPosition + nextTagSize.value > dataSize) return false;
+
+            // recursion abort condition
+            if (data[currentPosition]==TagType::END) return true;
+
+            // nameSize cannot be an error here bc it got checked in
+            // nextTagSize() already
+            int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
+
+            // attempt parsing the name
+            //
+            // This shouldn't matter too much here as the only error condition
+            // the parser function deals with rn is an overrun which is already
+            // being guarded against with
+            // if (currentPosition + nextTagSize.value > dataSize) return false;
+            // It might, however, turn out to be a useful check in the future.
+            ErrorOr<tiny_utf8::string> name = helper::readString(data, dataSize, currentPosition+1);
+            if (name.isError) return false;
+
+            switch (data[0]) {
+                case TagType::INT8:
+                case TagType::INT16:
+                case TagType::INT32:
+                case TagType::INT64:
+                case TagType::FLOAT:
+                case TagType::DOUBLE:
+                case TagType::INT8_ARRAY:
+                    break;
+                case TagType::STRING: {
+                    // attempt parsing the content
+                    //
+                    // This shouldn't matter too much here as the only
+                    // error condition the parser function deals with rn is
+                    // an overrun which is already being guarded against with
+                    // if (currentPosition + nextTagSize.value > dataSize) return false;
+                    // It might, however, turn out to be a useful check
+                    // in the future.
+                    //
+                    // type byte + two name size bytes = 3
+                    ErrorOr<tiny_utf8::string> content = helper::readString(data, dataSize, currentPosition+nameSize+3);
+                    if (content.isError) return false;
+                    break;
+                }
+                case TagType::INT32_ARRAY:
+                case TagType::INT64_ARRAY:
+                    break;
+                default:
+                    return false;
+            }
+
+            currentPosition += nextTagSize.value;
+        }
+        return true;
+        #undef return
    }
 }
--- a/src/lib/nbt.hpp
+++ b/src/lib/nbt.hpp
@ -113,5 +113,5 @@ namespace NBT {
        bool validate(uint8_t data[]);
    };

-    bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition=0);
+    bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition, uint64_t* processedDataSize=nullptr);
 }