lib/nbt: Validator: Fix bytes not being added up correctly in multiple places

lib/nbt: NBT validator: Fix wrong function declaration in the header, fix not using the currentPosition variable when accessing data
lib/nbt: fix a bug in NBT::helper::readString() which caused it to asuume that dataSize is the size of the string
2022-08-15 10:51:50 +02:00 · 2022-08-15 09:53:06 +02:00 · 2022-08-15 09:51:46 +02:00 · 2022-08-15 08:50:07 +02:00 · 2022-08-15 05:20:05 +02:00 · 2022-08-15 02:07:00 +02:00
3 changed files with 255 additions and 12 deletions
--- a/src/lib/javacompat.cpp
+++ b/src/lib/javacompat.cpp
@ -75,6 +75,7 @@ namespace JavaCompat {
            output.push_back(stdString[i]);
        }

+        //FIXME: endian-dependent implementation
        #ifdef FOSSVG_BIG_ENDIAN
            output[0] = *sizeBytes;
            output[1] = *(sizeBytes+1);
@ -83,7 +84,7 @@ namespace JavaCompat {
                output[0] = *(sizeBytes+1);
                output[1] = *sizeBytes;
            #else
-                #error "NBT::helper::writeInt16: An implementation for your endianness is unavailable."
+                #error "JavaCompat::exportJavaString: An implementation for your endianness is unavailable."
            #endif
        #endif

--- a/src/lib/nbt.cpp
+++ b/src/lib/nbt.cpp
@ -146,11 +146,16 @@ namespace NBT {
        }

        ErrorOr<tiny_utf8::string> readString(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
-            if(dataSize > 0xFFFF){
+            if(currentPosition > dataSize){
                return ErrorOr<tiny_utf8::string>(true, ErrorCodes::OVERRUN);
            }

-            ErrorOr<tiny_utf8::string> output = JavaCompat::importJavaString(data+currentPosition, (uint16_t) dataSize);
+            ErrorOr<int16_t> stringSize = readInt16(data, dataSize, currentPosition);
+            if (stringSize.isError) {
+                return ErrorOr<tiny_utf8::string>(true, stringSize.errorCode);
+            }
+
+            ErrorOr<tiny_utf8::string> output = JavaCompat::importJavaString(data+currentPosition, stringSize.value);
            if(output.isError){
                return ErrorOr<tiny_utf8::string>(true, output.errorCode);
            }
@ -385,9 +390,9 @@ namespace NBT {
        // be determined easily by looking at the contained data type and
        // size information but cases like string lists or compound lists
        // are significantly more difficult to deal with. Parsing their
-        // contents requires special attention anyway due the tag headers
-        // of contained tags being absent so they may as well get their
-        // own function for this as well.
+        // contents requires special attention anyway due to the tag headers
+        // of contained tags being absent so they may as well get treated
+        // separately for this as well.
        ErrorOr<uint64_t> totalTagSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            uint8_t nextTag;
            if (dataSize <= currentPosition) {
@ -569,10 +574,247 @@ namespace NBT {
        }
    }

-    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition){
-        //TODO: find out the size of the next tag
-        //TODO: consume tag
-        //TODO: recurse if tag compound and return if tag end
-        return false;
+    bool validateRawList(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
+        ErrorOr<int32_t> elementCount = helper::containedDataLength(data, dataSize, initialPosition);
+        if (elementCount.isError) {
+            return false;
+        }
+        // there is no way this is an error bc it gets checked while trying
+        // to get the element count
+        int16_t nameSize = helper::readInt16(data, dataSize, initialPosition+1).value;
+        // type byte + two name size bytes = 3
+        uint8_t contentType = data[initialPosition + nameSize + 3];
+        // type byte + two name size bytes + contained type byte + 4 length bytes = 8
+        *processedDataSize = (uint64_t) nameSize + 8;
+        switch (contentType) {
+            case TagType::END:
+                // everything except content has been touched at this point
+                // and a list of end tags has no content
+                return true;
+            case TagType::INT8: {
+                *processedDataSize += (uint64_t) elementCount.value;
+                return initialPosition + *processedDataSize < dataSize;
+            }
+            case TagType::INT16: {
+                *processedDataSize += (uint64_t) elementCount.value * 2;
+                return initialPosition + *processedDataSize < dataSize;
+            }
+            case TagType::INT32:
+            case TagType::FLOAT: {
+                *processedDataSize += (uint64_t) elementCount.value * 4;
+                return initialPosition + *processedDataSize < dataSize;
+            }
+            case TagType::INT64:
+            case TagType::DOUBLE: {
+                *processedDataSize += (uint64_t) elementCount.value * 8;
+                return initialPosition + *processedDataSize < dataSize;
+            }
+            case TagType::INT8_ARRAY: {
+                for (int32_t i=0; i<elementCount.value; i++) {
+                    ErrorOr<std::vector<int8_t>> nextArray = helper::readInt8Array(data, dataSize, initialPosition+*processedDataSize);
+                    if (nextArray.isError) {
+                        return false;
+                    }
+                    *processedDataSize += (uint64_t) nextArray.value.size();
+                }
+                return true;
+            }
+            case TagType::STRING: {
+                for (int32_t i=0; i<elementCount.value; i++) {
+                    ErrorOr<tiny_utf8::string> nextString = helper::readString(data, dataSize, initialPosition+*processedDataSize);
+                    if (nextString.isError) {
+                        return false;
+                    }
+                    // this cannot be an error because it just got checked
+                    int16_t nextStringSize = helper::readInt16(data, dataSize, initialPosition+*processedDataSize).value;
+                    *processedDataSize += (uint64_t) nextStringSize + 2;
+                }
+                return true;
+            }
+            case TagType::LIST: {
+                uint64_t* containedDataSize = new uint64_t;
+                for (int32_t i=0; i<elementCount.value; i++) {
+                    *containedDataSize = 0;
+                    if (validateRawList(data, dataSize, initialPosition+*processedDataSize, containedDataSize)) {
+                        *processedDataSize += *containedDataSize;
+                    } else {
+                        delete containedDataSize;
+                        return false;
+                    }
+                }
+                delete containedDataSize;
+                return true;
+            }
+            case TagType::COMPOUND: {
+                uint64_t* containedDataSize = new uint64_t;
+                for (int32_t i=0; i<elementCount.value; i++) {
+                    *containedDataSize = 0;
+                    if (validateRawNBTData(data, dataSize, initialPosition, containedDataSize)) {
+                        *processedDataSize += *containedDataSize;
+                    } else {
+                        delete containedDataSize;
+                        return false;
+                    }
+                }
+                delete containedDataSize;
+                return true;
+            }
+            case TagType::INT32_ARRAY: {
+                for (int32_t i=0; i<elementCount.value; i++) {
+                    ErrorOr<std::vector<int32_t>> nextArray = helper::readInt32Array(data, dataSize, initialPosition+*processedDataSize);
+                    if (nextArray.isError) {
+                        return false;
+                    }
+                    *processedDataSize += (uint64_t) nextArray.value.size() * 4;
+                }
+                return true;
+            }
+            case TagType::INT64_ARRAY: {
+                for (int32_t i=0; i<elementCount.value; i++) {
+                    ErrorOr<std::vector<int64_t>> nextArray = helper::readInt64Array(data, dataSize, initialPosition+*processedDataSize);
+                    if (nextArray.isError) {
+                        return false;
+                    }
+                    *processedDataSize += (uint64_t) nextArray.value.size() * 8;
+                }
+                return true;
+            }
+            default:
+                return false;
+        }
+    }
+
+    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
+        if (initialPosition >= dataSize) {
+            // Yes, this *could* return an instance of ErrorOr with
+            // ErrorCodes::OVERRUN but we only care to know if what is
+            // at that position is valid NBT which it clearly isn't according
+            // to the original spec.
+            if (processedDataSize!=nullptr) *processedDataSize=0;
+            return false;
+
+            // An interesting question at this point is whether we should
+            // consider empty input valid or invalid NBT data.
+            //
+            // The original spec says that the top-most tag is always a
+            // compound (or in more recent times, the Microsoft-commercialized
+            // in-game-purchase-enabling version also allows list tags)
+            // which automatically means that no data is invalid data...
+            // I don't see a reason why having a different tag as the top-most
+            // tag shouldn't be valid NBT in which case we have to face the
+            // question whether no data is invalid or just empty NBT data.
+            //
+            // This seems like a reasonable extension to the spec to me and
+            // it should be backwards compatible AFAIK.
+            //
+            // - BodgeMaster
+        }
+
+        uint64_t currentPosition = initialPosition;
+        #define return if (processedDataSize!=nullptr) *processedDataSize = currentPosition-initialPosition; return
+        while (currentPosition<dataSize) {
+            ErrorOr<uint64_t> nextTagSize = helper::totalTagSize(data, dataSize, currentPosition);
+            if (nextTagSize.isError) {
+                if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
+                    // attempt parsing the name
+                    ErrorOr<tiny_utf8::string> tagName = helper::readString(data, dataSize, currentPosition+1);
+                    if (tagName.isError) {
+                        return false;
+                    }
+
+                    uint64_t* processedTagSize = new uint64_t;
+                    *processedTagSize = 0;
+
+                    if (data[currentPosition]==TagType::LIST) {
+                        if (!validateRawList(data, dataSize, currentPosition, processedTagSize)) {
+                            delete processedTagSize;
+                            return false;
+                        }
+                    }
+                    if (data[currentPosition]==TagType::COMPOUND) {
+                        // seek to the start of the compound's contents
+                        //
+                        // there is no way this is an error bc it gets
+                        // checked while trying to parse the string above
+                        int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
+
+                        // type byte + two name size bytes = 3
+                        if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize)) {
+                            delete processedTagSize;
+                            return false;
+                        }
+                        *processedTagSize += (uint64_t) nameSize + 3;
+                    }
+                    currentPosition += *processedTagSize;
+
+                    delete processedTagSize;
+                    continue;
+                }
+                return false;
+            }
+
+            if (currentPosition + nextTagSize.value > dataSize) {
+                return false;
+            }
+
+            // recursion abort condition
+            if (data[currentPosition]==TagType::END) {
+                currentPosition++;
+                return true;
+            }
+
+            // nameSize cannot be an error here bc it got checked in
+            // nextTagSize() already
+            int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
+
+            // attempt parsing the name
+            //
+            // This shouldn't matter too much here as the only error condition
+            // the parser function deals with rn is an overrun which is already
+            // being guarded against with
+            // if (currentPosition + nextTagSize.value > dataSize) return false;
+            // It might, however, turn out to be a useful check in the future.
+            ErrorOr<tiny_utf8::string> name = helper::readString(data, dataSize, currentPosition+1);
+            if (name.isError) {
+                return false;
+            }
+
+            switch (data[currentPosition]) {
+                case TagType::INT8:
+                case TagType::INT16:
+                case TagType::INT32:
+                case TagType::INT64:
+                case TagType::FLOAT:
+                case TagType::DOUBLE:
+                case TagType::INT8_ARRAY:
+                    break;
+                case TagType::STRING: {
+                    // attempt parsing the content
+                    //
+                    // This shouldn't matter too much here as the only
+                    // error condition the parser function deals with rn is
+                    // an overrun which is already being guarded against with
+                    // if (currentPosition + nextTagSize.value > dataSize) return false;
+                    // It might, however, turn out to be a useful check
+                    // in the future.
+                    //
+                    // type byte + two name size bytes = 3
+                    ErrorOr<tiny_utf8::string> content = helper::readString(data, dataSize, currentPosition+nameSize+3);
+                    if (content.isError) {
+                        return false;
+                    }
+                    break;
+                }
+                case TagType::INT32_ARRAY:
+                case TagType::INT64_ARRAY:
+                    break;
+                default:
+                    return false;
+            }
+
+            currentPosition += nextTagSize.value;
+        }
+        return true;
+        #undef return
    }
 }
--- a/src/lib/nbt.hpp
+++ b/src/lib/nbt.hpp
@ -113,5 +113,5 @@ namespace NBT {
        bool validate(uint8_t data[]);
    };

-    bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition=0);
+    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
 }
Author	SHA1	Message	Date
BodgeMaster	25bec4c587	lib/nbt: Validator: Fix bytes not being added up correctly in multiple places	2022-08-15 10:51:50 +02:00
BodgeMaster	589cf1ddaf	lib/nbt: NBT validator: Fix wrong function declaration in the header, fix not using the currentPosition variable when accessing data	2022-08-15 09:53:06 +02:00
BodgeMaster	884a5239c6	lib/nbt: fix a bug in NBT::helper::readString() which caused it to asuume that dataSize is the size of the string	2022-08-15 09:51:46 +02:00
BodgeMaster	9190cad80d	lib/nbt: finish implementation of validateRawNBTData() and fix a critical macro-induced bug I did a `#define return` and then tried to `if () return;` everywhere...	2022-08-15 08:50:07 +02:00
BodgeMaster	a862590370	lib/nbt: Start implementing the NBT validator In theory, this is it. It’s just missing the portion that deals with lists and unit tests. Both will each likely require similar effort to this.	2022-08-15 05:20:05 +02:00
BodgeMaster	3995e97f03	lib/javacompat: Make the endianness error message refer to the correct function	2022-08-15 02:07:00 +02:00