Compare commits

...

6 Commits

Author SHA1 Message Date
BodgeMaster 25bec4c587 lib/nbt: Validator: Fix bytes not being added up correctly in multiple places 2022-08-15 10:51:50 +02:00
BodgeMaster 589cf1ddaf lib/nbt: NBT validator: Fix wrong function declaration in the header, fix not using the currentPosition variable when accessing data 2022-08-15 09:53:06 +02:00
BodgeMaster 884a5239c6 lib/nbt: fix a bug in NBT::helper::readString() which caused it to asuume that dataSize is the size of the string 2022-08-15 09:51:46 +02:00
BodgeMaster 9190cad80d lib/nbt: finish implementation of validateRawNBTData() and fix a critical macro-induced bug
I did a `#define return` and then tried to `if () return;` everywhere...
2022-08-15 08:50:07 +02:00
BodgeMaster a862590370 lib/nbt: Start implementing the NBT validator
In theory, this is it. It’s just missing the portion that deals with lists
and unit tests. Both will each likely require similar effort to this.
2022-08-15 05:20:05 +02:00
BodgeMaster 3995e97f03 lib/javacompat: Make the endianness error message refer to the correct function 2022-08-15 02:07:00 +02:00
3 changed files with 255 additions and 12 deletions

View File

@ -75,6 +75,7 @@ namespace JavaCompat {
output.push_back(stdString[i]);
}
//FIXME: endian-dependent implementation
#ifdef FOSSVG_BIG_ENDIAN
output[0] = *sizeBytes;
output[1] = *(sizeBytes+1);
@ -83,7 +84,7 @@ namespace JavaCompat {
output[0] = *(sizeBytes+1);
output[1] = *sizeBytes;
#else
#error "NBT::helper::writeInt16: An implementation for your endianness is unavailable."
#error "JavaCompat::exportJavaString: An implementation for your endianness is unavailable."
#endif
#endif

View File

@ -146,11 +146,16 @@ namespace NBT {
}
ErrorOr<tiny_utf8::string> readString(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
if(dataSize > 0xFFFF){
if(currentPosition > dataSize){
return ErrorOr<tiny_utf8::string>(true, ErrorCodes::OVERRUN);
}
ErrorOr<tiny_utf8::string> output = JavaCompat::importJavaString(data+currentPosition, (uint16_t) dataSize);
ErrorOr<int16_t> stringSize = readInt16(data, dataSize, currentPosition);
if (stringSize.isError) {
return ErrorOr<tiny_utf8::string>(true, stringSize.errorCode);
}
ErrorOr<tiny_utf8::string> output = JavaCompat::importJavaString(data+currentPosition, stringSize.value);
if(output.isError){
return ErrorOr<tiny_utf8::string>(true, output.errorCode);
}
@ -385,9 +390,9 @@ namespace NBT {
// be determined easily by looking at the contained data type and
// size information but cases like string lists or compound lists
// are significantly more difficult to deal with. Parsing their
// contents requires special attention anyway due the tag headers
// of contained tags being absent so they may as well get their
// own function for this as well.
// contents requires special attention anyway due to the tag headers
// of contained tags being absent so they may as well get treated
// separately for this as well.
ErrorOr<uint64_t> totalTagSize(uint8_t data[], uint64_t dataSize, uint64_t currentPosition) {
uint8_t nextTag;
if (dataSize <= currentPosition) {
@ -569,10 +574,247 @@ namespace NBT {
}
}
bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition){
//TODO: find out the size of the next tag
//TODO: consume tag
//TODO: recurse if tag compound and return if tag end
return false;
bool validateRawList(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
ErrorOr<int32_t> elementCount = helper::containedDataLength(data, dataSize, initialPosition);
if (elementCount.isError) {
return false;
}
// there is no way this is an error bc it gets checked while trying
// to get the element count
int16_t nameSize = helper::readInt16(data, dataSize, initialPosition+1).value;
// type byte + two name size bytes = 3
uint8_t contentType = data[initialPosition + nameSize + 3];
// type byte + two name size bytes + contained type byte + 4 length bytes = 8
*processedDataSize = (uint64_t) nameSize + 8;
switch (contentType) {
case TagType::END:
// everything except content has been touched at this point
// and a list of end tags has no content
return true;
case TagType::INT8: {
*processedDataSize += (uint64_t) elementCount.value;
return initialPosition + *processedDataSize < dataSize;
}
case TagType::INT16: {
*processedDataSize += (uint64_t) elementCount.value * 2;
return initialPosition + *processedDataSize < dataSize;
}
case TagType::INT32:
case TagType::FLOAT: {
*processedDataSize += (uint64_t) elementCount.value * 4;
return initialPosition + *processedDataSize < dataSize;
}
case TagType::INT64:
case TagType::DOUBLE: {
*processedDataSize += (uint64_t) elementCount.value * 8;
return initialPosition + *processedDataSize < dataSize;
}
case TagType::INT8_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int8_t>> nextArray = helper::readInt8Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return false;
}
*processedDataSize += (uint64_t) nextArray.value.size();
}
return true;
}
case TagType::STRING: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<tiny_utf8::string> nextString = helper::readString(data, dataSize, initialPosition+*processedDataSize);
if (nextString.isError) {
return false;
}
// this cannot be an error because it just got checked
int16_t nextStringSize = helper::readInt16(data, dataSize, initialPosition+*processedDataSize).value;
*processedDataSize += (uint64_t) nextStringSize + 2;
}
return true;
}
case TagType::LIST: {
uint64_t* containedDataSize = new uint64_t;
for (int32_t i=0; i<elementCount.value; i++) {
*containedDataSize = 0;
if (validateRawList(data, dataSize, initialPosition+*processedDataSize, containedDataSize)) {
*processedDataSize += *containedDataSize;
} else {
delete containedDataSize;
return false;
}
}
delete containedDataSize;
return true;
}
case TagType::COMPOUND: {
uint64_t* containedDataSize = new uint64_t;
for (int32_t i=0; i<elementCount.value; i++) {
*containedDataSize = 0;
if (validateRawNBTData(data, dataSize, initialPosition, containedDataSize)) {
*processedDataSize += *containedDataSize;
} else {
delete containedDataSize;
return false;
}
}
delete containedDataSize;
return true;
}
case TagType::INT32_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int32_t>> nextArray = helper::readInt32Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return false;
}
*processedDataSize += (uint64_t) nextArray.value.size() * 4;
}
return true;
}
case TagType::INT64_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int64_t>> nextArray = helper::readInt64Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return false;
}
*processedDataSize += (uint64_t) nextArray.value.size() * 8;
}
return true;
}
default:
return false;
}
}
bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
if (initialPosition >= dataSize) {
// Yes, this *could* return an instance of ErrorOr with
// ErrorCodes::OVERRUN but we only care to know if what is
// at that position is valid NBT which it clearly isn't according
// to the original spec.
if (processedDataSize!=nullptr) *processedDataSize=0;
return false;
// An interesting question at this point is whether we should
// consider empty input valid or invalid NBT data.
//
// The original spec says that the top-most tag is always a
// compound (or in more recent times, the Microsoft-commercialized
// in-game-purchase-enabling version also allows list tags)
// which automatically means that no data is invalid data...
// I don't see a reason why having a different tag as the top-most
// tag shouldn't be valid NBT in which case we have to face the
// question whether no data is invalid or just empty NBT data.
//
// This seems like a reasonable extension to the spec to me and
// it should be backwards compatible AFAIK.
//
// - BodgeMaster
}
uint64_t currentPosition = initialPosition;
#define return if (processedDataSize!=nullptr) *processedDataSize = currentPosition-initialPosition; return
while (currentPosition<dataSize) {
ErrorOr<uint64_t> nextTagSize = helper::totalTagSize(data, dataSize, currentPosition);
if (nextTagSize.isError) {
if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
// attempt parsing the name
ErrorOr<tiny_utf8::string> tagName = helper::readString(data, dataSize, currentPosition+1);
if (tagName.isError) {
return false;
}
uint64_t* processedTagSize = new uint64_t;
*processedTagSize = 0;
if (data[currentPosition]==TagType::LIST) {
if (!validateRawList(data, dataSize, currentPosition, processedTagSize)) {
delete processedTagSize;
return false;
}
}
if (data[currentPosition]==TagType::COMPOUND) {
// seek to the start of the compound's contents
//
// there is no way this is an error bc it gets
// checked while trying to parse the string above
int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
// type byte + two name size bytes = 3
if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize)) {
delete processedTagSize;
return false;
}
*processedTagSize += (uint64_t) nameSize + 3;
}
currentPosition += *processedTagSize;
delete processedTagSize;
continue;
}
return false;
}
if (currentPosition + nextTagSize.value > dataSize) {
return false;
}
// recursion abort condition
if (data[currentPosition]==TagType::END) {
currentPosition++;
return true;
}
// nameSize cannot be an error here bc it got checked in
// nextTagSize() already
int16_t nameSize = helper::readInt16(data, dataSize, currentPosition+1).value;
// attempt parsing the name
//
// This shouldn't matter too much here as the only error condition
// the parser function deals with rn is an overrun which is already
// being guarded against with
// if (currentPosition + nextTagSize.value > dataSize) return false;
// It might, however, turn out to be a useful check in the future.
ErrorOr<tiny_utf8::string> name = helper::readString(data, dataSize, currentPosition+1);
if (name.isError) {
return false;
}
switch (data[currentPosition]) {
case TagType::INT8:
case TagType::INT16:
case TagType::INT32:
case TagType::INT64:
case TagType::FLOAT:
case TagType::DOUBLE:
case TagType::INT8_ARRAY:
break;
case TagType::STRING: {
// attempt parsing the content
//
// This shouldn't matter too much here as the only
// error condition the parser function deals with rn is
// an overrun which is already being guarded against with
// if (currentPosition + nextTagSize.value > dataSize) return false;
// It might, however, turn out to be a useful check
// in the future.
//
// type byte + two name size bytes = 3
ErrorOr<tiny_utf8::string> content = helper::readString(data, dataSize, currentPosition+nameSize+3);
if (content.isError) {
return false;
}
break;
}
case TagType::INT32_ARRAY:
case TagType::INT64_ARRAY:
break;
default:
return false;
}
currentPosition += nextTagSize.value;
}
return true;
#undef return
}
}

View File

@ -113,5 +113,5 @@ namespace NBT {
bool validate(uint8_t data[]);
};
bool validateRawNBTData(uint8_t data[], int length, uint64_t initialPosition=0);
bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
}