lib/nbt: Start implementing NBT validator

2022-08-08 14:17:35 +02:00 · 2022-08-08 14:17:35 +02:00 · c7dd5471dd
parent 8048dc8891
commit c7dd5471dd
3 changed files with 184 additions and 7 deletions
--- a/src/lib/error.hpp
+++ b/src/lib/error.hpp
@ -78,10 +78,12 @@ namespace ErrorCodes {
    // when dealing with maps
    const uint8_t UNKNOWN_KEY = 5;
    //mismatched size in java strings
    const uint8_t MISMATCHEDSIZE = 6;
    const uint8_t NOT_YET_KNOWN = 7;
    const uint8_t UNIMPLEMENTED = 254;
    const uint8_t UNKNOWN = 255;
    //mismatched size in java strings
    const uint8_t MISMATCHEDSIZE = 6;
 }
--- a/src/lib/nbt.cpp
+++ b/src/lib/nbt.cpp
@ -424,9 +424,165 @@ namespace NBT {
        }
    }
-    bool validateRawNBTData(uint8_t data[], uint64_t dataSize){
+    ErrorOr<uint8_t> nextTagType(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
-        //state machine?
+        if (dataSize <= currentPosition) {
-        //TODO: implement
+            return ErrorOr<uint8_t>(true, ErrorCodes::OVERRUN);
        } else {
            return ErrorOr<uint8_t>(data[currentPosition]);
        }
    }
    //FIXME: instead of blindly passing the error code upwards, choose one that
    // is applicable to the situation (for example replace OUT_OF_RANGE with
    // OVERRUN where appropriate)
    ErrorOr<uint64_t> nextTagTotalSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
        ErrorOr<uint8_t> nextTag = nextTagType(data, dataSize, currentPosition);
        if (nextTag.isError) {
            return ErrorOr<uint64_t>(true, nextTag.errorCode);
        }
        // deal with compound tags separately
        if (nextTag.value == TagType::COMPOUND) return ErrorOr<uint64_t>(false, ErrorCodes::NOT_YET_KNOWN);
        // deal with end tag before trying to access the name
        if (nextTag.value == TagType::END) return ErrorOr<uint64_t>(1);
        // get name size
        ErrorOr<uint16_t> nameSize = (uint16_t) helper::readInt16(data, dataSize, currentPosition+1);
        if (nameSize.isError) {
            return ErrorOr<uint64_t>(true, nameSize.errorCode);
        }
        switch (nextTag.value) {
            case TagType::INT8:
                // type byte + name size + data byte -> 4 bytes
                return ErrorOr<uint64_t>((uint64_t) nameSize.value+4);
            case TagType::INT16:
                // type byte + name size + 2 data bytes -> 5 bytes
                return ErrorOr<uint64_t>((uint64_t) nameSize.value+5);
            case TagType::INT32:
                // type byte + name size + 4 data bytes -> 7 bytes
                return ErrorOr<uint64_t>((uint64_t) nameSize.value+7);
            case TagType::INT64:
                // type byte + name size + 8 data bytes -> 11 bytes
                return ErrorOr<uint64_t>((uint64_t) nameSize.value+11);
            case TagType::FLOAT:
                // type byte + name size + 4 data bytes -> 7 bytes
                return ErrorOr<uint64_t>((uint64_t) nameSize.value+7);
            case TagType::DOUBLE:
                // type byte + name size + 8 data bytes -> 11 bytes
                return ErrorOr<uint64_t>((uint64_t) nameSize.value+11);
            case TagType::INT8_ARRAY:
                // type byte + name size + 4 size bytes -> 7 bytes
                uint64_t totalSize = (uint64_t) nameSize.value+7;
                // add size of actual data (1 byte per entry)
                ErrorOr<int32_t> arraySize = helper::readInt32(data, dataSize, currentPosition+totalSize);
                if (arraySize.isError) {
                    return ErrorOr<uint64_t>(true, arraySize.errorCode);
                }
                totalSize += (uint64_t) arraySize.value;
                return ErrorOr<uint64_t>(totalSize);
            case TagType::STRING:
                // type byte + name size + 2 size bytes -> 5 bytes
                uint64_t totalSize = (uint64_t) nameSize.value+5;
                // add size of actual data
                ErrorOr<int16_t> stringSize = helper::readInt16(data, dataSize, currentPosition+totalSize);
                if (stringSize.isError) {
                    return ErrorOr<uint64_t>(true, stringSize.errorCode);
                }
                totalSize += (uint64_t) stringSize.value;
                return ErrorOr<uint64_t>(totalSize);
            case TagType::LIST:
                // type byte + name size + type prefix + 4 size bytes -> 8 bytes
                uint64_t totalSize = (uint64_t) nameSize.value+8;
                // determine size of actual data
                ErrorOr<uint8_t> containedType = nextTagType(data, dataSize, currentPosition+totalSize-1);
                if (containedType.isError) {
                    return ErrorOr<uint64_t>(true, containedType.errorCode);
                }
                ErrorOr<int16_t> listSize = helper::readInt16(data, dataSize, currentPosition+totalSize);
                if (listSize.isError) {
                    return ErrorOr<uint64_t>(true, listSize.errorCode);
                }
                // Can we just multiply list size with data type size?
                if (containedType.value == TagType::END || containedType.value == TagType::INT8 || containedType.value == TagType::INT16 || containedType.value == TagType::INT32 || containedType.value == TagType::INT64 || containedType.value == TagType::FLOAT || containedType.value == TagType::DOUBLE) {
                    uint8_t factor;
                    switch (containedType.value) {
                        case TagType::END:
                            factor = 1;
                        case TagType::INT8:
                            factor = 1;
                        case TagType::INT16:
                            factor = 2;
                        case TagType::INT32:
                            factor = 4;
                        case TagType::INT64:
                            factor = 8;
                        case TagType::FLOAT:
                            factor = 4;
                        case TagType::DOUBLE:
                            factor = 8;
                        default:
                            // How would you even get here?
                            return ErrorOr<uint64_t>(true, ErrorCodes::UNKNOWN);
                    }
                    totalSize += listSize*factor;
                    return ErrorOr<uint64_t>(totalSize);
                } else {
                    if (containedType.value == TagType::COMPOUND || containedType.value == TagType::LIST) return ErrorOr<uint64_t>(false, ErrorCodes::NOT_YET_KNOWN);
                    //TODO: INT8_ARRAY, STRING, INT32_ARRAY, INT64_ARRAY
                }
                return ErrorOr<uint64_t>(true, ErrorCodes::UNKNOWN);
            case TagType::INT32_ARRAY:
                // type byte + name size + 4 size bytes -> 7 bytes
                uint64_t totalSize = (uint64_t) nameSize.value+7;
                // add size of actual data (4 bytes per entry)
                ErrorOr<int16_t> arraySize = helper::readInt16(data, dataSize, currentPosition+totalSize);
                if (arraySize.isError) {
                    return ErrorOr<uint64_t>(true, arraySize.errorCode);
                }
                totalSize += (uint64_t) arraySize.value*4;
                return ErrorOr<uint64_t>(totalSize);
            case TagType::INT64_ARRAY:
                // type byte + name size + 4 size bytes -> 7 bytes
                uint64_t totalSize = (uint64_t) nameSize.value+7;
                // add size of actual data (8 bytes per entry)
                ErrorOr<int16_t> arraySize = helper::readInt16(data, dataSize, currentPosition+totalSize);
                if (arraySize.isError) {
                    return ErrorOr<uint64_t>(true, arraySize.errorCode);
                }
                totalSize += (uint64_t) arraySize.value*8;
                return ErrorOr<uint64_t>(totalSize);
            // fall-through in case of unknown tag or parsing error
            default:
                return ErrorOr<uint64_t>(true, ErrorCodes::UNKNOWN);
        }
    }
    ErrorOr<uint32_t> nextTagDataSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition){
        ErrorOr<uint8_t> nextTag = nexttagType(data, dataSize, currentPosition);
        if (nextTag.isError) {
            return ErrorOr<int64_t>(true, nextTag.errorCode);
        }
        // deal with compound tags separately
        if (nextTag.value == TagType::COMPOUND) return ErrorOr<uint64_t>(true, ErrorCodes::NOT_YET_KNOWN);
        // deal with end tag before trying to access the name
        if (nextTag.value == TagType::END) return 0;
        //TODO: implement for all the remaining types
        // fall-through in case of unknown tag or parsing error
        return ErrorOr<uint32_t>(true, ErrorCodes::UNKNOWN);
    }
    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition){
        //TODO: find out the size of the next tag
        //TODO: consume tag
        //TODO: recurse if tag compound and return if tag end
        return false;
    }
 }
--- a/src/lib/nbt.hpp
+++ b/src/lib/nbt.hpp
@ -68,6 +68,22 @@ namespace NBT {
        void writeInt64Array(std::vector<uint8_t>* destination, int64_t data[], uint32_t dataSize);
    }
    namespace TagType {
        const uint8_t END        =  0;
        const uint8_t INT8       =  1;
        const uint8_t INT16      =  2;
        const uint8_t INT32      =  3;
        const uint8_t INT64      =  4;
        const uint8_t FLOAT      =  5;
        const uint8_t DOUBLE     =  6;
        const uint8_t INT8_ARRAY =  7;
        const uint8_t STRING     =  8;
        const uint8_t LIST       =  9;
        const uint8_t COMPOUND   = 10;
        const uint8_t INT32_ARRAY= 11;
        const uint8_t INT64_ARRAY= 12;
    }
    //Generic parent class to make declaration easier
    template <typename T>
    class Tag{
@ -94,5 +110,8 @@ namespace NBT {
        bool validate(uint8_t data[]);
    };
-    bool validateRawNBTData(uint8_t data[], int length);
+    ErrorOr<uint8_t> nextTagType(uint8_t data[], uint64_t dataSize, uint64_t currentPosition);
    ErrorOr<uint64_t> nextTagTotalSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition);
    ErrorOr<uint32_t> nextTagDataSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition);
    bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition=0);
 }