Compare commits

..

3 Commits

Author SHA1 Message Date
BodgeMaster cdc23e7468 lib/nbt: Implement parser 2022-10-15 23:05:26 +02:00
BodgeMaster e9bfb6eeee resources/NBT_data: Add two files that are valid NBT by our extended spec
Since we allow loose tags to be valid NBT data, this is valid NBT by our spec.
This isn’t valid by Mojang’s spec.
2022-10-15 21:41:32 +02:00
BodgeMaster 8b62ec9c88 lib/nbt: Get rid of that ugly #define return hack
Instead of doing #define return, the boolean returnValue is set and
a goto statement is used to get to the code that does what the macro
used to do.
2022-10-15 18:55:58 +02:00
4 changed files with 401 additions and 14 deletions

Binary file not shown.

Binary file not shown.

View File

@ -1168,6 +1168,371 @@ namespace NBT {
} }
// the same comment about blindly passing up error codes applies to this function
// FIXME: memory leak when returning errors
ErrorOr<std::vector<Tag::Generic*>> deserializeRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
std::vector<Tag::Generic*> contents;
// get contained data length by reading it manually because
// the function that does it normally can't deal with
// headerless tags
//
// add one byte to position to skip the type byte
ErrorOr<int32_t> elementCount = Helper::readInt32(data, dataSize, initialPosition+1);
if (elementCount.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, elementCount.errorCode);
}
uint8_t contentType = data[initialPosition];
// contained type byte + 4 length bytes = 5
*processedDataSize = 5;
switch (contentType) {
case TagType::END: {
// everything except content has been touched at this point
// and a list of end tags has no content that could be read
for (int32_t i=0; i<elementCount.value; i++) {
contents.push_back(new Tag::End());
}
break;
}
case TagType::INT8: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int8_t> nextInt = Helper::readInt8(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int8("", nextInt.value));
// The below code would produce a warning on GCC and Clang
// about the computed value not being used. While this does
// apply inside this function, it is ultimately not true
// as the pointer is used both inside and outside of the
// function.
*processedDataSize += 1;
}
break;
}
case TagType::INT16: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int16_t> nextInt = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int16("", nextInt.value));
*processedDataSize += 2;
}
break;
}
case TagType::INT32: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int32_t> nextInt = Helper::readInt32(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int32("", nextInt.value));
*processedDataSize += 4;
}
break;
}
case TagType::FLOAT: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<float> nextFloat = Helper::readFloat(data, dataSize, initialPosition+*processedDataSize);
if (nextFloat.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextFloat.errorCode);
}
contents.push_back(new Tag::Float("", nextFloat.value));
*processedDataSize += 4;
}
break;
}
case TagType::INT64: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int64_t> nextInt = Helper::readInt64(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int64("", nextInt.value));
*processedDataSize += 8;
}
break;
}
case TagType::DOUBLE: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<double> nextDouble = Helper::readDouble(data, dataSize, initialPosition+*processedDataSize);
if (nextDouble.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextDouble.errorCode);
}
contents.push_back(new Tag::Double("", nextDouble.value));
*processedDataSize += 8;
}
break;
}
case TagType::INT8_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int8_t>> nextArray = Helper::readInt8Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
}
contents.push_back(new Tag::Int8Array("", nextArray.value));
*processedDataSize += (uint64_t) nextArray.value.size();
}
break;
}
case TagType::STRING: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<tiny_utf8::string> nextString = Helper::readString(data, dataSize, initialPosition+*processedDataSize);
if (nextString.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextString.errorCode);
}
contents.push_back(new Tag::String("", nextString.value));
// this cannot be an error because it just got read
int16_t nextStringSize = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize).value;
*processedDataSize += (uint64_t) nextStringSize + 2;
}
break;
}
case TagType::LIST: {
uint64_t* containedDataSize = new uint64_t;
for (int32_t i=0; i<elementCount.value; i++) {
*containedDataSize = 0;
ErrorOr<std::vector<Tag::Generic*>> nextListContents = deserializeRawListContents(data, dataSize, initialPosition+*processedDataSize, containedDataSize);
if (nextListContents.isError) {
delete containedDataSize;
return ErrorOr<std::vector<Tag::Generic*>>(true, nextListContents.errorCode);
}
contents.push_back(new Tag::List("", nextListContents.value));
*processedDataSize += *containedDataSize;
}
delete containedDataSize;
break;
}
case TagType::COMPOUND: {
uint64_t* containedDataSize = new uint64_t;
for (int32_t i=0; i<elementCount.value; i++) {
*containedDataSize = 0;
ErrorOr<std::vector<Tag::Generic*>> nextCompoundData = deserialize(data, dataSize, initialPosition+*processedDataSize, containedDataSize);
if (nextCompoundData.isError) {
delete containedDataSize;
return ErrorOr<std::vector<Tag::Generic*>>(true, nextCompoundData.errorCode);
}
contents.push_back(new Tag::Compound("", nextCompoundData.value));
*processedDataSize += *containedDataSize;
}
delete containedDataSize;
break;
}
case TagType::INT32_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int32_t>> nextArray = Helper::readInt32Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
}
contents.push_back(new Tag::Int32Array("", nextArray.value));
*processedDataSize += (uint64_t) nextArray.value.size() * 4;
}
break;
}
case TagType::INT64_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int64_t>> nextArray = Helper::readInt64Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
}
contents.push_back(new Tag::Int64Array("", nextArray.value));
*processedDataSize += (uint64_t) nextArray.value.size() * 8;
}
break;
}
default:
return ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::INVALID_TYPE);
}
return ErrorOr<std::vector<Tag::Generic*>>(contents);
}
// comment about blindly passing up error codes applies here
//
// The return value of this function is a vector of tags
// instead of a compound tag due to a spec extension that allows
// for any bare tag to be valid NBT data without a containing
// compound tag. This also just makes the implementation easier.
ErrorOr<std::vector<Tag::Generic*>> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
if (initialPosition >= dataSize) {
if (processedDataSize!=nullptr) *processedDataSize=0;
return ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::OUT_OF_RANGE);
// An interesting question at this point is whether we should
// consider empty input valid or invalid NBT data.
//
// The original spec says that the top-most tag is always a
// compound (or in more recent times, the Microsoft-commercialized
// in-game-purchase-enabling version also allows list tags)
// which automatically means that no data is invalid data...
// I don't see a reason why having a different tag as the top-most
// tag shouldn't be valid NBT in which case we have to face the
// question whether no data is invalid or just empty NBT data.
//
// This seems like a reasonable extension to the spec to me and
// it should be backwards compatible AFAIK.
//
// - BodgeMaster
}
std::vector<Tag::Generic*> tags = std::vector<Tag::Generic*>();
ErrorOr<std::vector<Tag::Generic*>> returnValue;
uint64_t currentPosition = initialPosition;
while (currentPosition<dataSize) {
ErrorOr<uint64_t> nextTagSize = Helper::totalTagSize(data, dataSize, currentPosition);
if (nextTagSize.isError) {
if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
ErrorOr<tiny_utf8::string> tagName = Helper::readString(data, dataSize, currentPosition+1);
if (tagName.isError) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, tagName.errorCode);
goto returnNow;
}
// used seek to the start of the list's/compounds contents
//
// there is no way this is an error bc it gets
// checked while trying to parse the string above
int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value;
uint64_t* processedTagSize = new uint64_t;
*processedTagSize = 0;
if (data[currentPosition]==TagType::LIST) {
// type byte + two name size bytes = 3
ErrorOr<std::vector<Tag::Generic*>> listData = deserializeRawListContents(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize);
if (listData.isError) {
delete processedTagSize;
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, listData.errorCode);
goto returnNow;
}
tags.push_back(new Tag::List(tagName.value, listData.value));
*processedTagSize += (uint64_t) nameSize + 3;
}
if (data[currentPosition]==TagType::COMPOUND) {
// type byte + two name size bytes = 3
ErrorOr<std::vector<Tag::Generic*>> compoundData = deserialize(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize);
if (compoundData.isError) {
delete processedTagSize;
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, compoundData.errorCode);
goto returnNow;
}
tags.push_back(new Tag::Compound(tagName.value, compoundData.value));
*processedTagSize += (uint64_t) nameSize + 3;
}
currentPosition += *processedTagSize;
delete processedTagSize;
continue;
}
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, nextTagSize.errorCode);
goto returnNow;
}
if (currentPosition + nextTagSize.value > dataSize) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::OVERRUN);
goto returnNow;
}
// recursion abort condition
if (data[currentPosition]==TagType::END) {
// not appending an end tag as it is built into
// the compound anyway
currentPosition++;
returnValue = ErrorOr<std::vector<Tag::Generic*>>(tags);
goto returnNow;
}
// nameSize cannot be an error here bc it got checked in
// nextTagSize() already
int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value;
ErrorOr<tiny_utf8::string> name = Helper::readString(data, dataSize, currentPosition+1);
if (name.isError) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, name.errorCode);
goto returnNow;
}
// Overrun / out of range errors have already been ruled out by
// checking the tag size against the total amount of data.
switch (data[currentPosition]) {
case TagType::INT8: {
int8_t content = Helper::readInt8(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int8(name.value, content));
break;
}
case TagType::INT16: {
int16_t content = Helper::readInt16(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int16(name.value, content));
break;
}
case TagType::INT32: {
int32_t content = Helper::readInt32(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int32(name.value, content));
break;
}
case TagType::INT64: {
int64_t content = Helper::readInt64(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int64(name.value, content));
break;
}
case TagType::FLOAT: {
float content = Helper::readFloat(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Float(name.value, content));
break;
}
case TagType::DOUBLE: {
double content = Helper::readDouble(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Double(name.value, content));
break;
}
case TagType::INT8_ARRAY: {
std::vector<int8_t> content = Helper::readInt8Array(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int8Array(name.value, content));
break;
}
case TagType::STRING: {
ErrorOr<tiny_utf8::string> content = Helper::readString(data, dataSize, currentPosition+nameSize+3);
if (content.isError) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, content.errorCode);
goto returnNow;
}
tags.push_back(new Tag::String(name.value, content.value));
break;
}
case TagType::INT32_ARRAY: {
std::vector<int32_t> content = Helper::readInt32Array(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int32Array(name.value, content));
break;
}
case TagType::INT64_ARRAY: {
std::vector<int64_t> content = Helper::readInt64Array(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int64Array(name.value, content));
break;
}
default: {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::UNKNOWN);
goto returnNow;
}
}
currentPosition += nextTagSize.value;
}
returnValue = ErrorOr<std::vector<Tag::Generic*>>(tags);
goto returnNow;
returnNow:
if (processedDataSize!=nullptr) {
*processedDataSize = currentPosition-initialPosition;
}
if (returnValue.isError) {
for (uint64_t i=0; i<tags.size(); i++) {
delete tags[i];
}
}
return returnValue;
}
bool validateRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) { bool validateRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
// get contained data length by reading it manually because // get contained data length by reading it manually because
// the function that does it normally can't deal with // the function that does it normally can't deal with
@ -1307,8 +1672,8 @@ namespace NBT {
// - BodgeMaster // - BodgeMaster
} }
bool returnValue;
uint64_t currentPosition = initialPosition; uint64_t currentPosition = initialPosition;
#define return if (processedDataSize!=nullptr) *processedDataSize = currentPosition-initialPosition; return
while (currentPosition<dataSize) { while (currentPosition<dataSize) {
ErrorOr<uint64_t> nextTagSize = Helper::totalTagSize(data, dataSize, currentPosition); ErrorOr<uint64_t> nextTagSize = Helper::totalTagSize(data, dataSize, currentPosition);
if (nextTagSize.isError) { if (nextTagSize.isError) {
@ -1316,7 +1681,8 @@ namespace NBT {
// attempt parsing the name // attempt parsing the name
ErrorOr<tiny_utf8::string> tagName = Helper::readString(data, dataSize, currentPosition+1); ErrorOr<tiny_utf8::string> tagName = Helper::readString(data, dataSize, currentPosition+1);
if (tagName.isError) { if (tagName.isError) {
return false; returnValue = false;
goto returnNow;
} }
// used seek to the start of the list's/compounds contents // used seek to the start of the list's/compounds contents
@ -1332,7 +1698,8 @@ namespace NBT {
// type byte + two name size bytes = 3 // type byte + two name size bytes = 3
if (!validateRawListContents(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize)) { if (!validateRawListContents(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize)) {
delete processedTagSize; delete processedTagSize;
return false; returnValue = false;
goto returnNow;
} }
*processedTagSize += (uint64_t) nameSize + 3; *processedTagSize += (uint64_t) nameSize + 3;
} }
@ -1340,7 +1707,8 @@ namespace NBT {
// type byte + two name size bytes = 3 // type byte + two name size bytes = 3
if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize)) { if (!validateRawNBTData(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize)) {
delete processedTagSize; delete processedTagSize;
return false; returnValue = false;
goto returnNow;
} }
*processedTagSize += (uint64_t) nameSize + 3; *processedTagSize += (uint64_t) nameSize + 3;
} }
@ -1349,17 +1717,20 @@ namespace NBT {
delete processedTagSize; delete processedTagSize;
continue; continue;
} }
return false; returnValue = false;
goto returnNow;
} }
if (currentPosition + nextTagSize.value > dataSize) { if (currentPosition + nextTagSize.value > dataSize) {
return false; returnValue = false;
goto returnNow;
} }
// recursion abort condition // recursion abort condition
if (data[currentPosition]==TagType::END) { if (data[currentPosition]==TagType::END) {
currentPosition++; currentPosition++;
return true; returnValue = true;
goto returnNow;
} }
// nameSize cannot be an error here bc it got checked in // nameSize cannot be an error here bc it got checked in
@ -1371,11 +1742,15 @@ namespace NBT {
// This shouldn't matter too much here as the only error condition // This shouldn't matter too much here as the only error condition
// the parser function deals with rn is an overrun which is already // the parser function deals with rn is an overrun which is already
// being guarded against with // being guarded against with
// if (currentPosition + nextTagSize.value > dataSize) return false; // if (currentPosition + nextTagSize.value > dataSize) {
// returnValue = false;
// goto returnNow;
// }
// It might, however, turn out to be a useful check in the future. // It might, however, turn out to be a useful check in the future.
ErrorOr<tiny_utf8::string> name = Helper::readString(data, dataSize, currentPosition+1); ErrorOr<tiny_utf8::string> name = Helper::readString(data, dataSize, currentPosition+1);
if (name.isError) { if (name.isError) {
return false; returnValue = false;
goto returnNow;
} }
switch (data[currentPosition]) { switch (data[currentPosition]) {
@ -1393,14 +1768,18 @@ namespace NBT {
// This shouldn't matter too much here as the only // This shouldn't matter too much here as the only
// error condition the parser function deals with rn is // error condition the parser function deals with rn is
// an overrun which is already being guarded against with // an overrun which is already being guarded against with
// if (currentPosition + nextTagSize.value > dataSize) return false; // if (currentPosition + nextTagSize.value > dataSize) {
// returnValue = false;
// goto returnNow;
// }
// It might, however, turn out to be a useful check // It might, however, turn out to be a useful check
// in the future. // in the future.
// //
// type byte + two name size bytes = 3 // type byte + two name size bytes = 3
ErrorOr<tiny_utf8::string> content = Helper::readString(data, dataSize, currentPosition+nameSize+3); ErrorOr<tiny_utf8::string> content = Helper::readString(data, dataSize, currentPosition+nameSize+3);
if (content.isError) { if (content.isError) {
return false; returnValue = false;
goto returnNow;
} }
break; break;
} }
@ -1408,12 +1787,19 @@ namespace NBT {
case TagType::INT64_ARRAY: case TagType::INT64_ARRAY:
break; break;
default: default:
return false; returnValue = false;
goto returnNow;
} }
currentPosition += nextTagSize.value; currentPosition += nextTagSize.value;
} }
return true; returnValue = true;
#undef return goto returnNow;
returnNow:
if (processedDataSize!=nullptr) {
*processedDataSize = currentPosition-initialPosition;
}
return returnValue;
} }
} }

View File

@ -301,5 +301,6 @@ namespace NBT {
}; };
} }
ErrorOr<std::vector<Tag::Generic*>> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr); bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
} }