lib/nbt: Implement parser
parent
e9bfb6eeee
commit
cdc23e7468
365
src/lib/nbt.cpp
365
src/lib/nbt.cpp
|
@ -1168,6 +1168,371 @@ namespace NBT {
|
|||
|
||||
}
|
||||
|
||||
// the same comment about blindly passing up error codes applies to this function
|
||||
// FIXME: memory leak when returning errors
|
||||
ErrorOr<std::vector<Tag::Generic*>> deserializeRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
|
||||
std::vector<Tag::Generic*> contents;
|
||||
// get contained data length by reading it manually because
|
||||
// the function that does it normally can't deal with
|
||||
// headerless tags
|
||||
//
|
||||
// add one byte to position to skip the type byte
|
||||
ErrorOr<int32_t> elementCount = Helper::readInt32(data, dataSize, initialPosition+1);
|
||||
if (elementCount.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, elementCount.errorCode);
|
||||
}
|
||||
|
||||
uint8_t contentType = data[initialPosition];
|
||||
// contained type byte + 4 length bytes = 5
|
||||
*processedDataSize = 5;
|
||||
switch (contentType) {
|
||||
case TagType::END: {
|
||||
// everything except content has been touched at this point
|
||||
// and a list of end tags has no content that could be read
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
contents.push_back(new Tag::End());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::INT8: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<int8_t> nextInt = Helper::readInt8(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextInt.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Int8("", nextInt.value));
|
||||
// The below code would produce a warning on GCC and Clang
|
||||
// about the computed value not being used. While this does
|
||||
// apply inside this function, it is ultimately not true
|
||||
// as the pointer is used both inside and outside of the
|
||||
// function.
|
||||
*processedDataSize += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::INT16: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<int16_t> nextInt = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextInt.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Int16("", nextInt.value));
|
||||
*processedDataSize += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::INT32: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<int32_t> nextInt = Helper::readInt32(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextInt.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Int32("", nextInt.value));
|
||||
*processedDataSize += 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::FLOAT: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<float> nextFloat = Helper::readFloat(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextFloat.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextFloat.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Float("", nextFloat.value));
|
||||
*processedDataSize += 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::INT64: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<int64_t> nextInt = Helper::readInt64(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextInt.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Int64("", nextInt.value));
|
||||
*processedDataSize += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::DOUBLE: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<double> nextDouble = Helper::readDouble(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextDouble.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextDouble.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Double("", nextDouble.value));
|
||||
*processedDataSize += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::INT8_ARRAY: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<std::vector<int8_t>> nextArray = Helper::readInt8Array(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextArray.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Int8Array("", nextArray.value));
|
||||
*processedDataSize += (uint64_t) nextArray.value.size();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::STRING: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<tiny_utf8::string> nextString = Helper::readString(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextString.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextString.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::String("", nextString.value));
|
||||
// this cannot be an error because it just got read
|
||||
int16_t nextStringSize = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize).value;
|
||||
*processedDataSize += (uint64_t) nextStringSize + 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::LIST: {
|
||||
uint64_t* containedDataSize = new uint64_t;
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
*containedDataSize = 0;
|
||||
|
||||
ErrorOr<std::vector<Tag::Generic*>> nextListContents = deserializeRawListContents(data, dataSize, initialPosition+*processedDataSize, containedDataSize);
|
||||
if (nextListContents.isError) {
|
||||
delete containedDataSize;
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextListContents.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::List("", nextListContents.value));
|
||||
*processedDataSize += *containedDataSize;
|
||||
}
|
||||
delete containedDataSize;
|
||||
break;
|
||||
}
|
||||
case TagType::COMPOUND: {
|
||||
uint64_t* containedDataSize = new uint64_t;
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
*containedDataSize = 0;
|
||||
ErrorOr<std::vector<Tag::Generic*>> nextCompoundData = deserialize(data, dataSize, initialPosition+*processedDataSize, containedDataSize);
|
||||
if (nextCompoundData.isError) {
|
||||
delete containedDataSize;
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextCompoundData.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Compound("", nextCompoundData.value));
|
||||
*processedDataSize += *containedDataSize;
|
||||
}
|
||||
delete containedDataSize;
|
||||
break;
|
||||
}
|
||||
case TagType::INT32_ARRAY: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<std::vector<int32_t>> nextArray = Helper::readInt32Array(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextArray.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Int32Array("", nextArray.value));
|
||||
*processedDataSize += (uint64_t) nextArray.value.size() * 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TagType::INT64_ARRAY: {
|
||||
for (int32_t i=0; i<elementCount.value; i++) {
|
||||
ErrorOr<std::vector<int64_t>> nextArray = Helper::readInt64Array(data, dataSize, initialPosition+*processedDataSize);
|
||||
if (nextArray.isError) {
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
|
||||
}
|
||||
contents.push_back(new Tag::Int64Array("", nextArray.value));
|
||||
*processedDataSize += (uint64_t) nextArray.value.size() * 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::INVALID_TYPE);
|
||||
}
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(contents);
|
||||
}
|
||||
|
||||
// comment about blindly passing up error codes applies here
|
||||
//
|
||||
// The return value of this function is a vector of tags
|
||||
// instead of a compound tag due to a spec extension that allows
|
||||
// for any bare tag to be valid NBT data without a containing
|
||||
// compound tag. This also just makes the implementation easier.
|
||||
ErrorOr<std::vector<Tag::Generic*>> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
|
||||
if (initialPosition >= dataSize) {
|
||||
if (processedDataSize!=nullptr) *processedDataSize=0;
|
||||
return ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::OUT_OF_RANGE);
|
||||
|
||||
// An interesting question at this point is whether we should
|
||||
// consider empty input valid or invalid NBT data.
|
||||
//
|
||||
// The original spec says that the top-most tag is always a
|
||||
// compound (or in more recent times, the Microsoft-commercialized
|
||||
// in-game-purchase-enabling version also allows list tags)
|
||||
// which automatically means that no data is invalid data...
|
||||
// I don't see a reason why having a different tag as the top-most
|
||||
// tag shouldn't be valid NBT in which case we have to face the
|
||||
// question whether no data is invalid or just empty NBT data.
|
||||
//
|
||||
// This seems like a reasonable extension to the spec to me and
|
||||
// it should be backwards compatible AFAIK.
|
||||
//
|
||||
// - BodgeMaster
|
||||
}
|
||||
|
||||
std::vector<Tag::Generic*> tags = std::vector<Tag::Generic*>();
|
||||
ErrorOr<std::vector<Tag::Generic*>> returnValue;
|
||||
uint64_t currentPosition = initialPosition;
|
||||
while (currentPosition<dataSize) {
|
||||
ErrorOr<uint64_t> nextTagSize = Helper::totalTagSize(data, dataSize, currentPosition);
|
||||
if (nextTagSize.isError) {
|
||||
if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
|
||||
ErrorOr<tiny_utf8::string> tagName = Helper::readString(data, dataSize, currentPosition+1);
|
||||
if (tagName.isError) {
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, tagName.errorCode);
|
||||
goto returnNow;
|
||||
}
|
||||
|
||||
// used seek to the start of the list's/compound’s contents
|
||||
//
|
||||
// there is no way this is an error bc it gets
|
||||
// checked while trying to parse the string above
|
||||
int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value;
|
||||
|
||||
uint64_t* processedTagSize = new uint64_t;
|
||||
*processedTagSize = 0;
|
||||
|
||||
if (data[currentPosition]==TagType::LIST) {
|
||||
// type byte + two name size bytes = 3
|
||||
ErrorOr<std::vector<Tag::Generic*>> listData = deserializeRawListContents(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize);
|
||||
if (listData.isError) {
|
||||
delete processedTagSize;
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, listData.errorCode);
|
||||
goto returnNow;
|
||||
}
|
||||
tags.push_back(new Tag::List(tagName.value, listData.value));
|
||||
*processedTagSize += (uint64_t) nameSize + 3;
|
||||
}
|
||||
if (data[currentPosition]==TagType::COMPOUND) {
|
||||
// type byte + two name size bytes = 3
|
||||
ErrorOr<std::vector<Tag::Generic*>> compoundData = deserialize(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize);
|
||||
if (compoundData.isError) {
|
||||
delete processedTagSize;
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, compoundData.errorCode);
|
||||
goto returnNow;
|
||||
}
|
||||
tags.push_back(new Tag::Compound(tagName.value, compoundData.value));
|
||||
*processedTagSize += (uint64_t) nameSize + 3;
|
||||
}
|
||||
currentPosition += *processedTagSize;
|
||||
|
||||
delete processedTagSize;
|
||||
continue;
|
||||
}
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, nextTagSize.errorCode);
|
||||
goto returnNow;
|
||||
}
|
||||
|
||||
if (currentPosition + nextTagSize.value > dataSize) {
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::OVERRUN);
|
||||
goto returnNow;
|
||||
}
|
||||
|
||||
// recursion abort condition
|
||||
if (data[currentPosition]==TagType::END) {
|
||||
// not appending an end tag as it is built into
|
||||
// the compound anyway
|
||||
currentPosition++;
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(tags);
|
||||
goto returnNow;
|
||||
}
|
||||
|
||||
// nameSize cannot be an error here bc it got checked in
|
||||
// nextTagSize() already
|
||||
int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value;
|
||||
|
||||
ErrorOr<tiny_utf8::string> name = Helper::readString(data, dataSize, currentPosition+1);
|
||||
if (name.isError) {
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, name.errorCode);
|
||||
goto returnNow;
|
||||
}
|
||||
|
||||
// Overrun / out of range errors have already been ruled out by
|
||||
// checking the tag size against the total amount of data.
|
||||
switch (data[currentPosition]) {
|
||||
case TagType::INT8: {
|
||||
int8_t content = Helper::readInt8(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Int8(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::INT16: {
|
||||
int16_t content = Helper::readInt16(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Int16(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::INT32: {
|
||||
int32_t content = Helper::readInt32(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Int32(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::INT64: {
|
||||
int64_t content = Helper::readInt64(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Int64(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::FLOAT: {
|
||||
float content = Helper::readFloat(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Float(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::DOUBLE: {
|
||||
double content = Helper::readDouble(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Double(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::INT8_ARRAY: {
|
||||
std::vector<int8_t> content = Helper::readInt8Array(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Int8Array(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::STRING: {
|
||||
ErrorOr<tiny_utf8::string> content = Helper::readString(data, dataSize, currentPosition+nameSize+3);
|
||||
if (content.isError) {
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, content.errorCode);
|
||||
goto returnNow;
|
||||
}
|
||||
tags.push_back(new Tag::String(name.value, content.value));
|
||||
break;
|
||||
}
|
||||
case TagType::INT32_ARRAY: {
|
||||
std::vector<int32_t> content = Helper::readInt32Array(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Int32Array(name.value, content));
|
||||
break;
|
||||
}
|
||||
case TagType::INT64_ARRAY: {
|
||||
std::vector<int64_t> content = Helper::readInt64Array(data, dataSize, currentPosition+nameSize+3).value;
|
||||
tags.push_back(new Tag::Int64Array(name.value, content));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::UNKNOWN);
|
||||
goto returnNow;
|
||||
}
|
||||
}
|
||||
|
||||
currentPosition += nextTagSize.value;
|
||||
}
|
||||
returnValue = ErrorOr<std::vector<Tag::Generic*>>(tags);
|
||||
goto returnNow;
|
||||
|
||||
returnNow:
|
||||
if (processedDataSize!=nullptr) {
|
||||
*processedDataSize = currentPosition-initialPosition;
|
||||
}
|
||||
if (returnValue.isError) {
|
||||
for (uint64_t i=0; i<tags.size(); i++) {
|
||||
delete tags[i];
|
||||
}
|
||||
}
|
||||
return returnValue;
|
||||
}
|
||||
|
||||
bool validateRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
|
||||
// get contained data length by reading it manually because
|
||||
// the function that does it normally can't deal with
|
||||
|
|
|
@ -301,5 +301,6 @@ namespace NBT {
|
|||
};
|
||||
}
|
||||
|
||||
ErrorOr<std::vector<Tag::Generic*>> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
|
||||
bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue