FOSS-VG/src/lib/nbt.cpp

// Copyright 2022, FOSS-VG Developers and Contributers
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, version 3.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// version 3 along with this program.
// If not, see https://www.gnu.org/licenses/agpl-3.0.en.html

#include <bit>
#include <cstdint>
#include <vector>
#include <tinyutf8/tinyutf8.h>
#include <iostream>

#include "nbt.hpp"
#include "error.hpp"
#include "javacompat.hpp"


#include "../../.endianness"
#ifdef FOSSVG_ENDIAN_BIG_WORD
    #error "Honeywell-316-style endianness is not supported. If you feel like it should, feel free to participate in the project to maintain it."
#endif
#ifdef FOSSVG_ENDIAN_LITTLE_WORD
    #error "PDP-11-style endianness is not supported. If you feel like it should, feel free to participate in the project to maintain it."
#endif
#ifdef FOSSVG_ENDIAN_UNKNOWN
    #error "The endianness of your system could not be determined. Please set it manually. FOSS-VG is currently implemented using some endian-specific functions."
#endif

namespace NBT {
    namespace helper {
        ErrorOr<int8_t> readInt8(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            if (dataSize<currentPosition+1) return ErrorOr<int8_t>(true, ErrorCodes::OUT_OF_RANGE);
            return ErrorOr<int8_t>((int8_t) data[currentPosition]);
        }

        ErrorOr<int16_t> readInt16(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            if (dataSize<currentPosition+2) return ErrorOr<int16_t>(true, ErrorCodes::OUT_OF_RANGE);
            return ErrorOr<int16_t>((int16_t) ((static_cast<int16_t>(data[currentPosition]) << 8) | static_cast<int16_t>(data[currentPosition+1])));
        }

        ErrorOr<int32_t> readInt32(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            if (dataSize<currentPosition+4) return ErrorOr<int32_t>(true, ErrorCodes::OUT_OF_RANGE);
            return ErrorOr<int32_t>((int32_t) (
                (static_cast<int32_t>(data[currentPosition  ]) << 24) |
                (static_cast<int32_t>(data[currentPosition+1]) << 16) |
                (static_cast<int32_t>(data[currentPosition+2]) <<  8) |
                 static_cast<int32_t>(data[currentPosition+3])
            ));
        }

        ErrorOr<int64_t> readInt64(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            if (dataSize<currentPosition+8) return ErrorOr<int64_t>(true, ErrorCodes::OUT_OF_RANGE);
            return ErrorOr<int64_t>((int64_t) (
                (static_cast<int64_t>(data[currentPosition  ]) << 56) |
                (static_cast<int64_t>(data[currentPosition+1]) << 48) |
                (static_cast<int64_t>(data[currentPosition+2]) << 40) |
                (static_cast<int64_t>(data[currentPosition+3]) << 32) |
                (static_cast<int64_t>(data[currentPosition+4]) << 24) |
                (static_cast<int64_t>(data[currentPosition+5]) << 16) |
                (static_cast<int64_t>(data[currentPosition+6]) <<  8) |
                 static_cast<int64_t>(data[currentPosition+7])
            ));
        }

        //FIXME: endian-dependent implementations
        ErrorOr<float> readFloat(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            float* value = new float;
            uint8_t* valueAsBytes = reinterpret_cast<uint8_t*>(value);
            if (dataSize<=currentPosition) return ErrorOr<float>(true, ErrorCodes::OUT_OF_RANGE);
            if (dataSize<currentPosition+4) return ErrorOr<float>(true, ErrorCodes::OVERRUN);
            #ifdef FOSSVG_BIG_ENDIAN
                *valueAsBytes   =   data[currentPosition];
                *(valueAsBytes+1) = data[currentPosition+1];
                *(valueAsBytes+2) = data[currentPosition+2];
                *(valueAsBytes+3) = data[currentPosition+3];
            #else
                #ifdef FOSSVG_LITTLE_ENDIAN
                    *valueAsBytes   =   data[currentPosition+3];
                    *(valueAsBytes+1) = data[currentPosition+2];
                    *(valueAsBytes+2) = data[currentPosition+1];
                    *(valueAsBytes+3) = data[currentPosition];
                #else
                    #error "NBT::helper::readFloat: An implementation for your endianness is unavailable."
                #endif
            #endif
            float dereferencedValue = *value;
            delete value;
            return ErrorOr<float>(dereferencedValue);
        }

        //FIXME: endian-dependent implementations
        ErrorOr<double> readDouble(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            double* value = new double;
            uint8_t* valueAsBytes = reinterpret_cast<uint8_t*>(value);
            if (dataSize<=currentPosition) return ErrorOr<double>(true, ErrorCodes::OUT_OF_RANGE);
            if (dataSize<currentPosition+8) return ErrorOr<double>(true, ErrorCodes::OVERRUN);
            #ifdef FOSSVG_BIG_ENDIAN
                *valueAsBytes   =   data[currentPosition];
                *(valueAsBytes+1) = data[currentPosition+1];
                *(valueAsBytes+2) = data[currentPosition+2];
                *(valueAsBytes+3) = data[currentPosition+3];
                *(valueAsBytes+4) = data[currentPosition+4];
                *(valueAsBytes+5) = data[currentPosition+5];
                *(valueAsBytes+6) = data[currentPosition+6];
                *(valueAsBytes+7) = data[currentPosition+7];
            #else
                #ifdef FOSSVG_LITTLE_ENDIAN
                    *valueAsBytes   =   data[currentPosition+7];
                    *(valueAsBytes+1) = data[currentPosition+6];
                    *(valueAsBytes+2) = data[currentPosition+5];
                    *(valueAsBytes+3) = data[currentPosition+4];
                    *(valueAsBytes+4) = data[currentPosition+3];
                    *(valueAsBytes+5) = data[currentPosition+2];
                    *(valueAsBytes+6) = data[currentPosition+1];
                    *(valueAsBytes+7) = data[currentPosition];
                #else
                    #error "NBT::helper::readDouble: An implementation for your endianness is unavailable."
                #endif
            #endif
            double dereferencedValue = *value;
            delete value;
            return ErrorOr<double>(dereferencedValue);
        }

        ErrorOr<std::vector<int8_t>> readInt8Array(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            // get size prefix
            ErrorOr<int32_t> size = readInt32(data, dataSize, currentPosition);
            if (size.isError) return ErrorOr<std::vector<int8_t>>(true, size.errorCode);

            // get content
            if (currentPosition+4+size.value > dataSize) return ErrorOr<std::vector<int8_t>>(true, ErrorCodes::OVERRUN);
            std::vector<int8_t> result = std::vector<int8_t>();
            for (int i=0; i<size.value; i++) {
                result.push_back(data[currentPosition+4+i]);
            }
            return ErrorOr<std::vector<int8_t>>(result);
        }

        ErrorOr<tiny_utf8::string> readString(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            if(dataSize > 0xFFFF){
                return ErrorOr<tiny_utf8::string>(true, ErrorCodes::OVERRUN);
            }

            ErrorOr<tiny_utf8::string> output = JavaCompat::importJavaString(data+currentPosition, (uint16_t) dataSize);
            if(output.isError){
                return ErrorOr<tiny_utf8::string>(true, output.errorCode);
            }
            return output;
        }

        ErrorOr<std::vector<int32_t>> readInt32Array(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            // get size prefix
            ErrorOr<int32_t> size = readInt32(data, dataSize, currentPosition);
            if (size.isError) return ErrorOr<std::vector<int32_t>>(true, size.errorCode);

            // get content
            if (currentPosition+4+(size.value*4) > dataSize) return ErrorOr<std::vector<int32_t>>(true, ErrorCodes::OVERRUN);
            std::vector<int32_t> result = std::vector<int32_t>();
            for (int i=0; i<size.value; i++) {
                ErrorOr<int32_t> nextInt32 = readInt32(data, dataSize, currentPosition+4+(i*4));
                if (nextInt32.isError) return ErrorOr<std::vector<int32_t>>(true, nextInt32.errorCode);
                result.push_back(nextInt32.value);
            }
            return ErrorOr<std::vector<int32_t>>(result);
        }

        ErrorOr<std::vector<int64_t>> readInt64Array(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            // get size prefix
            ErrorOr<int32_t> size = readInt32(data, dataSize, currentPosition);
            if (size.isError) return ErrorOr<std::vector<int64_t>>(true, size.errorCode);

            // get content
            if (currentPosition+4+(size.value*8) > dataSize) return ErrorOr<std::vector<int64_t>>(true, ErrorCodes::OVERRUN);
            std::vector<int64_t> result = std::vector<int64_t>();
            for (int i=0; i<size.value; i++) {
                ErrorOr<int64_t> nextInt64 = readInt64(data, dataSize, currentPosition+4+(i*8));
                if (nextInt64.isError) return ErrorOr<std::vector<int64_t>>(true, nextInt64.errorCode);
                result.push_back(nextInt64.value);
            }
            return ErrorOr<std::vector<int64_t>>(result);
        }

        void writeInt8(std::vector<uint8_t>* destination, int8_t data) {
            destination->push_back((uint8_t) data);
        }

        //FIXME: endian dependent implementation
        void writeInt16(std::vector<uint8_t>* destination, int16_t data) {
            int16_t* value = new int16_t;
            uint8_t* valueAsBytes = reinterpret_cast<uint8_t*>(value);
            *value = data;
            #ifdef FOSSVG_BIG_ENDIAN
                destination->push_back(*valueAsBytes);
                destination->push_back(*(valueAsBytes+1));
            #else
                #ifdef FOSSVG_LITTLE_ENDIAN
                    destination->push_back(*(valueAsBytes+1));
                    destination->push_back(*valueAsBytes);
                #else
                    #error "NBT::helper::writeInt16: An implementation for your endianness is unavailable."
                #endif
            #endif
            delete value;
        }

        //FIXME: endian dependent implementation
        void writeInt32(std::vector<uint8_t>* destination, int32_t data) {
            int32_t* value = new int32_t;
            uint8_t* valueAsBytes = reinterpret_cast<uint8_t*>(value);
            *value = data;
            #ifdef FOSSVG_BIG_ENDIAN
                destination->push_back(*valueAsBytes);
                destination->push_back(*(valueAsBytes+1));
                destination->push_back(*(valueAsBytes+2));
                destination->push_back(*(valueAsBytes+3));
            #else
                #ifdef FOSSVG_LITTLE_ENDIAN
                    destination->push_back(*(valueAsBytes+3));
                    destination->push_back(*(valueAsBytes+2));
                    destination->push_back(*(valueAsBytes+1));
                    destination->push_back(*valueAsBytes);
                #else
                    #error "NBT::helper::writeInt16: An implementation for your endianness is unavailable."
                #endif
            #endif
            delete value;
        }

        //FIXME: endian dependent implementation
        void writeInt64(std::vector<uint8_t>* destination, int64_t data) {
            int64_t* value = new int64_t;
            uint8_t* valueAsBytes = reinterpret_cast<uint8_t*>(value);
            *value = data;
            #ifdef FOSSVG_BIG_ENDIAN
                destination->push_back(*valueAsBytes);
                destination->push_back(*(valueAsBytes+1));
                destination->push_back(*(valueAsBytes+2));
                destination->push_back(*(valueAsBytes+3));
                destination->push_back(*(valueAsBytes+4));
                destination->push_back(*(valueAsBytes+5));
                destination->push_back(*(valueAsBytes+6));
                destination->push_back(*(valueAsBytes+7));
            #else
                #ifdef FOSSVG_LITTLE_ENDIAN
                    destination->push_back(*(valueAsBytes+7));
                    destination->push_back(*(valueAsBytes+6));
                    destination->push_back(*(valueAsBytes+5));
                    destination->push_back(*(valueAsBytes+4));
                    destination->push_back(*(valueAsBytes+3));
                    destination->push_back(*(valueAsBytes+2));
                    destination->push_back(*(valueAsBytes+1));
                    destination->push_back(*valueAsBytes);
                #else
                    #error "NBT::helper::writeInt16: An implementation for your endianness is unavailable."
                #endif
            #endif
            delete value;
        }

        //FIXME: endian-specific implementations
        void writeFloat(std::vector<uint8_t>* destination, float data) {
            float* value = new float;
            uint8_t* valueAsBytes = reinterpret_cast<uint8_t*>(value);
            *value = data;
            #ifdef FOSSVG_BIG_ENDIAN
                destination->push_back(*valueAsBytes);
                destination->push_back(*(valueAsBytes+1));
                destination->push_back(*(valueAsBytes+2));
                destination->push_back(*(valueAsBytes+3));
            #else
                #ifdef FOSSVG_LITTLE_ENDIAN
                    destination->push_back(*(valueAsBytes+3));
                    destination->push_back(*(valueAsBytes+2));
                    destination->push_back(*(valueAsBytes+1));
                    destination->push_back(*valueAsBytes);
                #else
                    #error "NBT::helper::writeInt16: An implementation for your endianness is unavailable."
                #endif
            #endif
            delete value;
        }

        //FIXME: endian-specific implementations
        void writeDouble(std::vector<uint8_t>* destination, double data) {
            double* value = new double;
            uint8_t* valueAsBytes = reinterpret_cast<uint8_t*>(value);
            *value = data;
            #ifdef FOSSVG_BIG_ENDIAN
                destination->push_back(*valueAsBytes);
                destination->push_back(*(valueAsBytes+1));
                destination->push_back(*(valueAsBytes+2));
                destination->push_back(*(valueAsBytes+3));
                destination->push_back(*(valueAsBytes+4));
                destination->push_back(*(valueAsBytes+5));
                destination->push_back(*(valueAsBytes+6));
                destination->push_back(*(valueAsBytes+7));
            #else
                #ifdef FOSSVG_LITTLE_ENDIAN
                    destination->push_back(*(valueAsBytes+7));
                    destination->push_back(*(valueAsBytes+6));
                    destination->push_back(*(valueAsBytes+5));
                    destination->push_back(*(valueAsBytes+4));
                    destination->push_back(*(valueAsBytes+3));
                    destination->push_back(*(valueAsBytes+2));
                    destination->push_back(*(valueAsBytes+1));
                    destination->push_back(*valueAsBytes);
                #else
                    #error "NBT::helper::writeInt16: An implementation for your endianness is unavailable."
                #endif
            #endif
            delete value;
        }

        void writeInt8Array(std::vector<uint8_t>* destination, std::vector<int8_t> data) {
            writeInt32(destination, data.size());
            for(int8_t datum: data){
                destination->push_back(datum);
            }
        }

        void writeInt8Array(std::vector<uint8_t>* destination, int8_t data[], uint32_t dataSize) {
            writeInt32(destination, dataSize);
            for(uint32_t i=0; i < dataSize; i++){
                destination->push_back(data[i]);
            }
        }

        void writeString(std::vector<uint8_t>* destination, tiny_utf8::string data) {
            ErrorOr<std::vector<uint8_t>> exportedString = JavaCompat::exportJavaString(data);
            if(exportedString.isError){
                std::cerr << "NBT::helpers::writeString encountered an error: " << (int) exportedString.errorCode << std::endl;
                std::abort();
            }
            *destination = exportedString.value;
        }

        void writeInt32Array(std::vector<uint8_t>* destination, std::vector<int32_t> data) {
            writeInt32(destination, data.size());
            for(int32_t element: data){
                writeInt32(destination, element);
            }
        }

        void writeInt32Array(std::vector<uint8_t>* destination, int32_t data[], uint32_t dataSize) {
            writeInt32(destination, dataSize);
            for(uint32_t i = 0; i<dataSize; i++){
                writeInt32(destination, data[i]);
            }
        }

        void writeInt64Array(std::vector<uint8_t>* destination, std::vector<int64_t> data) {
            writeInt32(destination, data.size());
            for(int64_t element: data){
                writeInt64(destination, element);
            }
        }

        void writeInt64Array(std::vector<uint8_t>* destination, int64_t data[], uint32_t dataSize) {
            writeInt32(destination, dataSize);
            for(uint32_t i = 0; i<dataSize; i++){
                writeInt64(destination, data[i]);
            }
        }

        //FIXME: instead of blindly passing the error code upwards, choose
        // one that is applicable to the situation (for example replace
        // OUT_OF_RANGE with OVERRUN where appropriate)
        //
        // The total size in bytes
        //
        // Does not work for compound tags and lists. This is an intended
        // feature as compound tags and lists need to be dealt with
        // separately to avoid unnecessarily long and complex code.
        //
        // Regardinng lists specifically: The size of some lists can can
        // be determined easily by looking at the contained data type and
        // size information but cases like string lists or compound lists
        // are significantly more difficult to deal with. Parsing their
        // contents requires special attention anyway due to the tag headers
        // of contained tags being absent so they may as well get treated
        // separately for this as well.
        ErrorOr<uint64_t> totalTagSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
            uint8_t nextTag;
            if (dataSize <= currentPosition) {
                return ErrorOr<uint64_t>(true, ErrorCodes::OVERRUN);
            } else {
                nextTag = data[currentPosition];
            }
            // deal with compound tags and lists separately
            if (nextTag == TagType::COMPOUND || nextTag == TagType::LIST) return ErrorOr<uint64_t>(true, ErrorCodes::NOT_YET_KNOWN);
            // deal with end tag before trying to access the name
            if (nextTag == TagType::END) return ErrorOr<uint64_t>(1);

            ErrorOr<int16_t> nameSize = helper::readInt16(data, dataSize, currentPosition+1);
            if (nameSize.isError) {
                return ErrorOr<uint64_t>(true, nameSize.errorCode);
            }
            // add type byte and name size bytes
            uint64_t prefixSize = (uint64_t) nameSize.value + 3;
            switch (nextTag) {
                case TagType::INT8:
                    return ErrorOr<uint64_t>(prefixSize+1);
                case TagType::INT16:
                    return ErrorOr<uint64_t>(prefixSize+2);
                case TagType::INT32:
                    return ErrorOr<uint64_t>(prefixSize+4);
                case TagType::INT64:
                    return ErrorOr<uint64_t>(prefixSize+8);
                case TagType::FLOAT:
                    return ErrorOr<uint64_t>(prefixSize+4);
                case TagType::DOUBLE:
                    return ErrorOr<uint64_t>(prefixSize+8);
                case TagType::INT8_ARRAY: {
                    ErrorOr<int32_t> arrayLength = helper::readInt32(data, dataSize, currentPosition+prefixSize);
                    if (arrayLength.isError) {
                        return ErrorOr<uint64_t>(true, arrayLength.errorCode);
                    }
                    return ErrorOr<uint64_t>((uint64_t) arrayLength.value + prefixSize + 4);
                }
                case TagType::STRING: {
                    ErrorOr<int16_t> stringSize = helper::readInt16(data, dataSize, currentPosition+prefixSize);
                    if (stringSize.isError) {
                        return ErrorOr<uint64_t>(true, stringSize.errorCode);
                    }
                    return ErrorOr<uint64_t>((uint64_t) stringSize.value + prefixSize + 2);
                }
                case TagType::INT32_ARRAY: {
                    ErrorOr<int32_t> arrayLength = helper::readInt32(data, dataSize, currentPosition+prefixSize);
                    if (arrayLength.isError) {
                        return ErrorOr<uint64_t>(true, arrayLength.errorCode);
                    }
                    return ErrorOr<uint64_t>((uint64_t) arrayLength.value*4 + prefixSize + 4);
                }
                case TagType::INT64_ARRAY: {
                    ErrorOr<int32_t> arrayLength = helper::readInt32(data, dataSize, currentPosition+prefixSize);
                    if (arrayLength.isError) {
                        return ErrorOr<uint64_t>(true, arrayLength.errorCode);
                    }
                    return ErrorOr<uint64_t>((uint64_t) arrayLength.value*8 + prefixSize + 4);
                }
                // unknown tag or parsing error
                default:
                    return ErrorOr<uint64_t>(true, ErrorCodes::UNKNOWN);
            }
        }

        //FIXME: instead of blindly passing the error code upwards, choose
        // one that is applicable to the situation (for example replace
        // OUT_OF_RANGE with OVERRUN where appropriate)
        //
        // Length is the number of stored elements, not to be confused with size
        // which is the size in bytes.
        ErrorOr<int32_t> containedDataLength(uint8_t data[], uint64_t dataSize, uint64_t currentPosition){

            uint8_t nextTag;
            if (dataSize <= currentPosition) {
                return ErrorOr<int32_t>(true, ErrorCodes::OVERRUN);
            } else {
                nextTag = data[currentPosition];
            }

            // deal with compound tags separately
            if (nextTag == TagType::COMPOUND) {
                return ErrorOr<int32_t>(true, ErrorCodes::NOT_YET_KNOWN);
            }

            // deal with end tag before trying to access the name
            if (nextTag == TagType::END) {
                return ErrorOr<int32_t>(0);
            }

            // tags that only ever hold one value
            if (nextTag == TagType::INT8 || nextTag == TagType::INT16 || nextTag == TagType::INT32 || nextTag == TagType::INT64 || nextTag == TagType::FLOAT || nextTag == TagType::DOUBLE) {
                return ErrorOr<int32_t>(1);
            }

            ErrorOr<int16_t> nameSize = helper::readInt16(data, dataSize, currentPosition+1);
            if (nameSize.isError) {
                return ErrorOr<int32_t>(true, nameSize.errorCode);
            }
            // add type byte and name size bytes
            uint64_t prefixSize = (uint64_t) nameSize.value + 3;
            switch (nextTag) {
                case TagType::INT8_ARRAY: {
                    return helper::readInt32(data, dataSize, currentPosition+prefixSize);
                }
                case TagType::STRING: {
                    ErrorOr<int16_t> stringSize = helper::readInt16(data, dataSize, currentPosition+prefixSize);
                    if (stringSize.isError) {
                        return ErrorOr<int32_t>(true, stringSize.errorCode);
                    }
                    return ErrorOr<int32_t>((int32_t) stringSize.value);
                }
                case TagType::LIST: {
                    // add an additional byte for the contained data type
                    return helper::readInt32(data, dataSize, currentPosition+prefixSize+1);
                }
                case TagType::INT32_ARRAY: {
                    return helper::readInt32(data, dataSize, currentPosition+prefixSize);
                }
                case TagType::INT64_ARRAY: {
                    return helper::readInt32(data, dataSize, currentPosition+prefixSize);
                }
                default:
                    // unknown tag or parsing error
                    return ErrorOr<int32_t>(true, ErrorCodes::UNKNOWN);
            }
        }
    }

    //Tag constructors

    template <typename T>
    Tag<T>::Tag(uint8_t tagType, tiny_utf8::string name, uint16_t nameSize, T content, uint32_t size)
        : tagType(tagType), name(name), nameSize(nameSize), content(content) ,size(size)
    {}

    End::End() : Tag::Tag(0, "", 0, 0, 0) {}

    Byte::Byte(tiny_utf8::string name, uint16_t nameSize, int8_t content)
        : Tag::Tag(1, name, nameSize, content, 1)
    {}

    Byte::Byte(uint8_t data[]){
        if(validate(data)){
            this->tagType = 1;

            uint8_t nameSizeSlice[] = {data[1], data[2]};

            ErrorOr<int16_t> readIntResult = helper::readInt16(nameSizeSlice, 2, 0);
            if(!readIntResult.isError){
                this->nameSize = readIntResult.value;
            }else{
                throw readIntResult.errorCode;
            }

            uint8_t nameSlice[this->nameSize+2];
            for(int i=0; i<this->nameSize+2; i++){
                nameSlice[i] = data[i+1];
            }

            ErrorOr<tiny_utf8::string> readStringResult = helper::readString(nameSlice, this->nameSize, 0);
            if(!readStringResult.isError){
                this->name = readStringResult.value;
            }else{
                throw readStringResult.errorCode;
            }

            //int8 needs only one byte
            this->content = data[this->nameSize+4];
        }
    }

    //more conditions will be added
    bool Byte::validate(uint8_t data[]){
        if(data[0] == 0x01){
            return true;
        }else{
            return false;
        }
    }

    bool validateRawList(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
        ErrorOr<int32_t> elementCount = helper::containedDataLength(data, dataSize, initialPosition);
        if (elementCount.isError) {
            return false;
        }
        // there is no way this is an error bc it gets checked while trying
        // to get the element count
        int16_t nameSize = helper::readInt16(data, dataSize, initialPosition+1).value;
        // type byte + two name size bytes = 3
        uint8_t contentType = data[initialPosition + nameSize + 3];
        // type byte + two name size bytes + contained type byte + 4 length bytes = 8
        *processedDataSize = 8;
        switch (contentType) {
            case TagType::END:
                // everything except content has been touched at this point
                // and a list of end tags has no content
                return true;
            case TagType::INT8: {
                *processedDataSize += (uint64_t) elementCount.value;
                return initialPosition + *processedDataSize < dataSize;
            }
            case TagType::INT16: {
                *processedDataSize += (uint64_t) elementCount.value * 2;
                return initialPosition + *processedDataSize < dataSize;
            }
            case TagType::INT32:
            case TagType::FLOAT: {
                *processedDataSize += (uint64_t) elementCount.value * 4;
                return initialPosition + *processedDataSize < dataSize;
            }
            case TagType::INT64:
            case TagType::DOUBLE: {
                *processedDataSize += (uint64_t) elementCount.value * 8;
                return initialPosition + *processedDataSize < dataSize;
            }
            case TagType::INT8_ARRAY: {
                for (int32_t i=0; i<elementCount.value; i++) {
                    ErrorOr<std::vector<int8_t>> nextArray = helper::readInt8Array(data, dataSize, initialPosition+*processedDataSize);
                    if (nextArray.isError) {
                        return false;
                    }
                    *processedDataSize += (uint64_t) nextArray.value.size();
                }
                return true;
            }
            case TagType::STRING: {
                for (int32_t i=0; i<elementCount.value; i++) {
                    ErrorOr<tiny_utf8::string> nextString = helper::readString(data, dataSize, initialPosition+*processedDataSize);
                    if (nextString.isError) {
                        return false;
                    }
                    // this cannot be an error because it just got checked
                    int16_t nextStringSize = helper::readInt16(data, dataSize, initialPosition+*processedDataSize).value;
                    *processedDataSize += (uint64_t) nextStringSize + 2;
                }
                return true;
            }
            case TagType::LIST: {
                uint64_t* containedDataSize = new uint64_t;
                for (int32_t i=0; i<elementCount.value; i++) {
                    *containedDataSize = 0;
                    if (validateRawList(data, dataSize, initialPosition+*processedDataSize, containedDataSize)) {
                        *processedDataSize += *containedDataSize;
                    } else {
                        delete containedDataSize;
                        return false;
                    }
                }
                delete containedDataSize;
                return true;
            }
            case TagType::COMPOUND: {
                uint64_t* containedDataSize = new uint64_t;
                for (int32_t i=0; i<elementCount.value; i++) {
                    *containedDataSize = 0;
                    if (validateRawNBTData(data, dataSize, initialPosition, containedDataSize)) {
                        *processedDataSize += *containedDataSize;
                    } else {
                        delete containedDataSize;
                        return false;
                    }
                }
                delete containedDataSize;
                return true;
            }
            case TagType::INT32_ARRAY: {
                for (int32_t i=0; i<elementCount.value; i++) {
                    ErrorOr<std::vector<int32_t>> nextArray = helper::readInt32Array(data, dataSize, initialPosition+*processedDataSize);
                    if (nextArray.isError) {
                        return false;
                    }
                    *processedDataSize += (uint64_t) nextArray.value.size() * 4;
                }
                return true;
            }
            case TagType::INT64_ARRAY: {
                for (int32_t i=0; i<elementCount.value; i++) {
                    ErrorOr<std::vector<int64_t>> nextArray = helper::readInt64Array(data, dataSize, initialPosition+*processedDataSize);
                    if (nextArray.isError) {
                        return false;
                    }
                    *processedDataSize += (uint64_t) nextArray.value.size() * 8;
                }
                return true;
            }
            default:
                return false;
        }
    }

    bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
        if (initialPosition >= dataSize) {
            // Yes, this *could* return an instance of ErrorOr with
            // ErrorCodes::OVERRUN but we only care to know if what is
            // at that position is valid NBT which it clearly isn't according
            // to the original spec.
            if (processedDataSize!=nullptr) *processedDataSize=0;
            return false;

            // An interesting question at this point is whether we should
            // consider empty input valid or invalid NBT data.
            //
            // The original spec says that the top-most tag is always a
            // compound (or in more recent times, the Microsoft-commercialized
            // in-game-purchase-enabling version also allows list tags)
            // which automatically means that no data is invalid data...
            // I don't see a reason why having a different tag as the top-most
            // tag shouldn't be valid NBT in which case we have to face the
            // question whether no data is invalid or just empty NBT data.
            //
            // This seems like a reasonable extension to the spec to me and
            // it should be backwards compatible AFAIK.
            //
            // - BodgeMaster
        }

        uint64_t currentPosition = initialPosition;
        #define return if (processedDataSize!=nullptr) *processedDataSize = currentPosition-initialPosition; return
        while (currentPosition<dataSize) {
            ErrorOr<uint64_t> nextTagSize = helper::totalTagSize(data, dataSize, currentPosition);
            if (nextTagSize.isError) {
                if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
                    // attempt parsing the name
                    ErrorOr<tiny_utf8::string> tagName = helper::readString(data, dataSize, currentPosition+1);
                    if (tagName.isError) {
                        return false;
                    }

                    uint64_t* processedTagSize = new uint64_t;
                    *processedTagSize = 0;

                    if (data[currentPosition]==TagType::LIST) {
                        if (!validateRawList(data, dataSize, currentPosition, processedTagSize)) {
                            delete processedTagSize;
                            return false;
                        }
                    }
                    if (data[currentPosition]==TagType::COMPOUND) {
                        // seek to the start of the compound's contents
                        //
                        // there is no way this is an error bc it gets
                        // checked while trying to parse the string above
                        int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;

                        if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 1, processedTagSize)) {
                            delete processedTagSize;
                            return false;
                        }
                        *processedTagSize += (uint64_t) nameSize + 1;
                    }
                    currentPosition += *processedTagSize;

                    delete processedTagSize;
                    continue;
                }
                return false;
            }

            if (currentPosition + nextTagSize.value > dataSize) {
                return false;
            }

            // recursion abort condition
            if (data[currentPosition]==TagType::END) {
                return true;
            }

            // nameSize cannot be an error here bc it got checked in
            // nextTagSize() already
            int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;

            // attempt parsing the name
            //
            // This shouldn't matter too much here as the only error condition
            // the parser function deals with rn is an overrun which is already
            // being guarded against with
            // if (currentPosition + nextTagSize.value > dataSize) return false;
            // It might, however, turn out to be a useful check in the future.
            ErrorOr<tiny_utf8::string> name = helper::readString(data, dataSize, currentPosition+1);
            if (name.isError) {
                return false;
            }

            switch (data[0]) {
                case TagType::INT8:
                case TagType::INT16:
                case TagType::INT32:
                case TagType::INT64:
                case TagType::FLOAT:
                case TagType::DOUBLE:
                case TagType::INT8_ARRAY:
                    break;
                case TagType::STRING: {
                    // attempt parsing the content
                    //
                    // This shouldn't matter too much here as the only
                    // error condition the parser function deals with rn is
                    // an overrun which is already being guarded against with
                    // if (currentPosition + nextTagSize.value > dataSize) return false;
                    // It might, however, turn out to be a useful check
                    // in the future.
                    //
                    // type byte + two name size bytes = 3
                    ErrorOr<tiny_utf8::string> content = helper::readString(data, dataSize, currentPosition+nameSize+3);
                    if (content.isError) {
                        return false;
                    }
                    break;
                }
                case TagType::INT32_ARRAY:
                case TagType::INT64_ARRAY:
                    break;
                default:
                    return false;
            }

            currentPosition += nextTagSize.value;
        }
        return true;
        #undef return
    }
}