From a86259037022208a72ab6265195c441707b4a10c Mon Sep 17 00:00:00 2001
From: BodgeMaster <>
Date: Mon, 15 Aug 2022 05:15:17 +0200
Subject: [PATCH] lib/nbt: Start implementing the NBT validator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In theory, this is it. It’s just missing the portion that deals with lists
and unit tests. Both will each likely require similar effort to this.
---
 src/lib/nbt.cpp | 122 ++++++++++++++++++++++++++++++++++++++++++++----
 src/lib/nbt.hpp |   2 +-
 2 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/src/lib/nbt.cpp b/src/lib/nbt.cpp
index c83afa4..671c46f 100644
--- a/src/lib/nbt.cpp
+++ b/src/lib/nbt.cpp
@@ -385,9 +385,9 @@ namespace NBT {
         // be determined easily by looking at the contained data type and
         // size information but cases like string lists or compound lists
         // are significantly more difficult to deal with. Parsing their
-        // contents requires special attention anyway due the tag headers
-        // of contained tags being absent so they may as well get their
-        // own function for this as well.
+        // contents requires special attention anyway due to the tag headers
+        // of contained tags being absent so they may as well get treated
+        // separately for this as well.
         ErrorOr<uint64_t> totalTagSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
             uint8_t nextTag;
             if (dataSize <= currentPosition) {
@@ -569,10 +569,116 @@ namespace NBT {
         }
     }
 
-    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition){
-        //TODO: find out the size of the next tag
-        //TODO: consume tag
-        //TODO: recurse if tag compound and return if tag end
-        return false;
+    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
+        if (initialPosition >= dataSize) {
+            // Yes, this *could* return an instance of ErrorOr with
+            // ErrorCodes::OVERRUN but we only care to know if what is
+            // at that position is valid NBT which it clearly isn't according
+            // to the original spec.
+            if (processedDataSize!=nullptr) *processedDataSize=0;
+            return false;
+
+            // An interesting question at this point is whether we should
+            // consider empty input valid or invalid NBT data.
+            //
+            // The original spec says that the top-most tag is always a
+            // compound (or in more recent times, the Microsoft-commercialized
+            // in-game-purchase-enabling version also allows list tags)
+            // which automatically means that no data is invalid data...
+            // I don't see a reason why having a different tag as the top-most
+            // tag shouldn't be valid NBT in which case we have to face the
+            // question whether no data is invalid or just empty NBT data.
+            //
+            // This seems like a reasonable extension to the spec to me and
+            // it should be backwards compatible AFAIK.
+            //
+            // - BodgeMaster
+        }
+        uint64_t currentPosition = initialPosition;
+        #define return if (processedDataSize!=nullptr) *processedDataSize = currentPosition-initialPosition; return
+        while (currentPosition<dataSize) {
+            ErrorOr<uint64_t> nextTagSize = helper::totalTagSize(data, dataSize, currentPosition);
+            if (nextTagSize.isError) {
+                if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
+                    uint64_t* processedTagSize = new uint64_t;
+                    *processedTagSize = 0;
+
+                    // attempt parsing the name
+                    ErrorOr<tiny_utf8::string> tagName = helper::readString(data, dataSize, currentPosition+1);
+                    if (tagName.isError) return false;
+
+                    if (data[currentPosition]==TagType::LIST) {
+                        //TODO: handle list
+                    }
+                    if (data[currentPosition]==TagType::COMPOUND) {
+                        // seek to the start of the compound's contents
+                        //
+                        // there is no way this is an error bc it gets
+                        // checked while trying to parse the string above
+                        int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
+
+                        if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 1, processedTagSize)) return false;
+                        *processedTagSize += (uint64_t) nameSize + 1;
+                    }
+                    currentPosition += *processedTagSize;
+                    continue;
+                }
+                return false;
+            }
+
+            if (currentPosition + nextTagSize.value > dataSize) return false;
+
+            // recursion abort condition
+            if (data[currentPosition]==TagType::END) return true;
+
+            // nameSize cannot be an error here bc it got checked in
+            // nextTagSize() already
+            int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
+
+            // attempt parsing the name
+            //
+            // This shouldn't matter too much here as the only error condition
+            // the parser function deals with rn is an overrun which is already
+            // being guarded against with
+            // if (currentPosition + nextTagSize.value > dataSize) return false;
+            // It might, however, turn out to be a useful check in the future.
+            ErrorOr<tiny_utf8::string> name = helper::readString(data, dataSize, currentPosition+1);
+            if (name.isError) return false;
+
+            switch (data[0]) {
+                case TagType::INT8:
+                case TagType::INT16:
+                case TagType::INT32:
+                case TagType::INT64:
+                case TagType::FLOAT:
+                case TagType::DOUBLE:
+                case TagType::INT8_ARRAY:
+                    break;
+                case TagType::STRING: {
+                    // attempt parsing the content
+                    //
+                    // This shouldn't matter too much here as the only
+                    // error condition the parser function deals with rn is
+                    // an overrun which is already being guarded against with
+                    // if (currentPosition + nextTagSize.value > dataSize) return false;
+                    // It might, however, turn out to be a useful check
+                    // in the future.
+                    //
+                    // type byte + two name size bytes = 3
+                    ErrorOr<tiny_utf8::string> content = helper::readString(data, dataSize, currentPosition+nameSize+3);
+                    if (content.isError) return false;
+                    break;
+                }
+                case TagType::INT32_ARRAY:
+                case TagType::INT64_ARRAY:
+                    break;
+                default:
+                    return false;
+            }
+
+            currentPosition += nextTagSize.value;
+        }
+        return true;
+        #undef return
     }
 }
diff --git a/src/lib/nbt.hpp b/src/lib/nbt.hpp
index 3da58c9..81a735e 100644
--- a/src/lib/nbt.hpp
+++ b/src/lib/nbt.hpp
@@ -113,5 +113,5 @@ namespace NBT {
         bool validate(uint8_t data[]);
     };
 
-    bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition=0);
+    bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition, uint64_t* processedDataSize=nullptr);
 }