From 6e561df212b2da2c7f84e196477be3e4fdf85e86 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Wed, 13 Nov 2019 16:00:34 -0500 Subject: [PATCH 01/31] adding lambas to select statements for customer lists and reports --- src/dbtypes.h | 3 +- src/querycommon.h | 6 +- src/queryinterpreter.cpp | 107 ++++--- src/queryparserosl.cpp | 2 +- src/queryparserosl.h | 424 +++++++++++++++++++++++++-- src/result.cpp | 11 +- src/result.h | 2 +- src/rpc.h | 3 +- src/rpc_query.cpp | 609 ++++++++++++++++++++++++++++++++------- src/rpc_query.h | 2 + 10 files changed, 992 insertions(+), 177 deletions(-) diff --git a/src/dbtypes.h b/src/dbtypes.h index e64530e..fd105b7 100644 --- a/src/dbtypes.h +++ b/src/dbtypes.h @@ -81,7 +81,8 @@ namespace openset intProp = 1, doubleProp = 2, boolProp = 3, - textProp = 4 + textProp = 4, + runTimeTypeProp = 5 }; #pragma pack(push,1) diff --git a/src/querycommon.h b/src/querycommon.h index 6d5d488..c590f50 100644 --- a/src/querycommon.h +++ b/src/querycommon.h @@ -49,6 +49,7 @@ namespace openset quarter_date, year_number, year_date, + //lambda, }; enum class OpCode_e : int32_t @@ -312,7 +313,7 @@ namespace openset { "val", Modifiers_e::value }, { "variable", Modifiers_e::var }, { "var", Modifiers_e::var }, - { "lambda", Modifiers_e::var }, + //{ "lambda", Modifiers_e::lambda }, }; // Modifier to String (for debug output) static const unordered_map ModifierDebugStrings = { { Modifiers_e::sum, "SUM" }, @@ -340,6 +341,7 @@ namespace openset { Modifiers_e::quarter_date, "DT_QUARTER" }, { Modifiers_e::year_number, "YEAR" }, { Modifiers_e::year_date, "DT_YEAR" }, + //{ Modifiers_e::lambda, "LAMBDA"} }; // opCode to String (for debug output) static const unordered_map OpDebugStrings = { { OpCode_e::NOP, "NOP" }, @@ -670,6 +672,7 @@ namespace openset bool isSet { false }; bool isProp { false }; bool isRowObject { false }; + bool aggOnce { false }; // customer props, distinct counts and value selects are counted once per branch per person in a result int popRefs { 0 }; // reference counter for pops int pushRefs { 0 }; // reference counter for pushes int sortOrder { -1 }; // used for sorting in property order @@ -714,6 +717,7 @@ namespace openset isSet = source.isSet; isProp = source.isProp; isRowObject = source.isRowObject; + aggOnce = source.aggOnce; popRefs = source.popRefs; pushRefs = source.pushRefs; sortOrder = source.sortOrder; diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index f4e49a9..c0fbfe6 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -11,8 +11,8 @@ const int STACK_DEPTH = 64; openset::query::Interpreter::Interpreter(Macro_s& macros, const InterpretMode_e interpretMode) : macros(macros), - interpretMode(interpretMode), - rowKey() + rowKey(), + interpretMode(interpretMode) { stack = new cvar[STACK_DEPTH]; @@ -244,7 +244,7 @@ void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_ (resCol.modifier == Modifiers_e::var) ? fixToInt(resCol.value) : columns->cols[resCol.distinctColumn], - (resCol.schemaColumn == PROP_UUID || resCol.modifier == Modifiers_e::dist_count_person) ? + (resCol.aggOnce) ? 0 : (macros.useStampedRowIds ? columns->cols[PROP_STAMP] : @@ -254,73 +254,94 @@ void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_ continue; eventDistinct.emplace(distinctKey, 1); } - const auto resultIndex = resCol.index + segmentColumnShift; + + auto& resultColumnValue = resultColumns->columns[resCol.index + segmentColumnShift].value; + auto& resultColumnCount = resultColumns->columns[resCol.index + segmentColumnShift].count; + + const auto aggValue = resCol.lambdaIndex == -1 ? + columns->cols[resCol.column] : + resCol.value.getInt64(); + switch (resCol.modifier) { - case Modifiers_e::sum: - if (columns->cols[resCol.column] != NONE) - { - if (resultColumns->columns[resultIndex].value == NONE) - resultColumns->columns[resultIndex].value = columns->cols[resCol.column]; - else - resultColumns->columns[resultIndex].value += columns->cols[resCol.column]; - } - break; - case Modifiers_e::min: - if (columns->cols[resCol.column] != NONE && (resultColumns->columns[resultIndex].value == NONE || - resultColumns->columns[resultIndex].value > columns->cols[resCol.column])) - resultColumns->columns[resultIndex].value = columns->cols[resCol.column]; - break; - case Modifiers_e::max: - if (columns->cols[resCol.column] != NONE && (resultColumns->columns[resultIndex].value == NONE || - resultColumns->columns[resultIndex].value < columns->cols[resCol.column])) - resultColumns->columns[resultIndex].value = columns->cols[resCol.column]; - break; case Modifiers_e::avg: - if (columns->cols[resCol.column] != NONE) + case Modifiers_e::sum: + if (aggValue != NONE) { - if (resultColumns->columns[resultIndex].value == NONE) + if (resultColumnValue == NONE) { - resultColumns->columns[resultIndex].value = columns->cols[resCol.column]; - resultColumns->columns[resultIndex].count = 1; + resultColumnValue = aggValue; + resultColumnCount = 1; } else { - resultColumns->columns[resultIndex].value += columns->cols[resCol.column]; - resultColumns->columns[resultIndex].count++; + resultColumnValue += aggValue; + ++resultColumnCount; } } break; - case Modifiers_e::dist_count_person: case Modifiers_e::count: - if (columns->cols[resCol.column] != NONE) + case Modifiers_e::min: + if (aggValue != NONE && (resultColumnValue == NONE || resultColumnValue > aggValue)) + resultColumnValue = aggValue; + break; + case Modifiers_e::max: + if (aggValue != NONE && (resultColumnValue == NONE || resultColumnValue < aggValue)) + resultColumnValue = aggValue; + break; + case Modifiers_e::dist_count_person: + case Modifiers_e::count: + if (aggValue != NONE) { - if (resultColumns->columns[resultIndex].value == NONE) - resultColumns->columns[resultIndex].value = 1; + if (resultColumnValue == NONE) + resultColumnValue = 1; else - resultColumns->columns[resultIndex].value++; + ++resultColumnValue; } break; case Modifiers_e::value: - resultColumns->columns[resultIndex].value = columns->cols[resCol.column]; + resultColumnValue = aggValue; break; - case Modifiers_e::var: + /*case Modifiers_e::var: if (resultColumns->columns[resultIndex].value == NONE) resultColumns->columns[resultIndex].value = 1; //fixToInt(resCol.value); else resultColumns->columns[resultIndex].value++; //+= fixToInt(resCol.value); - break; + break;*/ default: break; } } }; - rowKey.clear(); // run property lambdas! + + rowKey.clear(); + + // run lambdas result columns if (macros.vars.columnLambdas.size()) - for (auto lambdaIndex : macros.vars.columnLambdas) - opRunner( - // call the property lambda - ¯os.code.front() + lambdaIndex, - currentRow); + { + for (auto varIndex : macros.vars.columnLambdas) + { + switch (macros.vars.columnVars[varIndex].schemaType) + { + case PropertyTypes_e::intProp: + macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getInt32(); + break; + case PropertyTypes_e::doubleProp: + macros.vars.columnVars[varIndex].value = round((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getDouble() * 10000.0); + break; + case PropertyTypes_e::textProp: + { + const auto tString = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getString(); + auto hash= MakeHash(tString); + result->addLocalText(hash, tString); // cache this text + macros.vars.columnVars[varIndex].value = hash; + } + break; + default: + macros.vars.columnVars[varIndex].value = 0; + } + } + } + auto depth = 0; for (const auto& item : marshalParams) { diff --git a/src/queryparserosl.cpp b/src/queryparserosl.cpp index b15357f..2484349 100644 --- a/src/queryparserosl.cpp +++ b/src/queryparserosl.cpp @@ -62,7 +62,7 @@ string openset::query::MacroDbg(Macro_s& macro) for (auto& v : macro.vars.userVars) { ss << padding(v.index, 3, true) << " | "; - ss << padding("'" + v.actual + "'", 20, false, ' ') << " | " << + ss << padding("'" + v.actual + "'", 22, false, ' ') << " | " << (v.isProp ? "is property" : ""); ss << endl; } diff --git a/src/queryparserosl.h b/src/queryparserosl.h index 6f40245..dd392eb 100644 --- a/src/queryparserosl.h +++ b/src/queryparserosl.h @@ -138,6 +138,13 @@ namespace openset::query } }; + enum class ParseMode_e + { + report, + segment, + customers + }; + enum class MiddleOp_e { push_user, @@ -313,6 +320,8 @@ namespace openset::query Debugger_s lastDebug; errors::Error error; + ParseMode_e parseMode { ParseMode_e::report }; + QueryParser() = default; ~QueryParser() = default; @@ -962,8 +971,8 @@ namespace openset::query return false; } - // select - int parseSelect(Blocks::Line& tokens, const int start) + // select when parseMode is report + int parseSelectReport(Blocks::Line& tokens, const int start) { const std::unordered_set newStatementWords = { "count", @@ -972,8 +981,7 @@ namespace openset::query "avg", "sum", "value", - "var", - "code" + //"var", }; auto idx = start + 1; @@ -993,7 +1001,7 @@ namespace openset::query throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, - "expecting an aggregate in `select` statement", + "select: expecting an aggregate type (report query permits: count, min, max, avg, sum, value)", lastDebug }; @@ -1002,24 +1010,257 @@ namespace openset::query throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, - "expecting a text value in `as` statement", + "select: expecting a property name after aggregate", lastDebug }; - auto modifier = ColumnModifiers.find(token)->second; const auto columnName = nextToken; // actual property name in table auto keyColumn = columnName; // distinct to itself auto asName = columnName; // aliased as itself + db::PropertyTypes_e type = db::PropertyTypes_e::runTimeTypeProp; + + idx += 2; + + token = tokens[idx]; + nextToken = idx + 1 >= static_cast(tokens.size()) ? std::string() : tokens[idx + 1]; + + if (token == "as") + { + + if (!nextToken.length() || !isTextual(nextToken)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: expecting a name for `as`", + lastDebug + }; + + if (isTableColumn(nextToken)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: name specified for `as` cannot be an existing table property", + lastDebug + }; + + asName = nextToken; + idx += 2; + + token = tokens[idx]; + nextToken = idx + 1 >= static_cast(tokens.size()) ? std::string() : tokens[idx + 1]; + } + + if (token == "key") + { + + if (!nextToken.length() || !isTextual(nextToken)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: expecting a name in `key` portion of statement", + lastDebug + }; + + if (!isTableColumn(nextToken)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: `key` must be a table property", + lastDebug + }; + + keyColumn = nextToken; + idx += 2; + + token = tokens[idx]; + nextToken = idx + 1 >= static_cast(tokens.size()) ? std::string() : tokens[idx + 1]; + } + + if (token == "type") + { + if (!nextToken.length() || !isTextual(nextToken)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: expecting a `type` for `lambda` lambda ('int', 'double' or 'text')", + lastDebug + }; + + if (nextToken == "int") + type = db::PropertyTypes_e::intProp; + else if (nextToken == "double") + type = db::PropertyTypes_e::doubleProp; + else if (nextToken == "text") + type = db::PropertyTypes_e::textProp; + else + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: `type` for `lambda` may be 'int', 'double' or 'text'", + lastDebug + }; + + idx += 2; + + token = tokens[idx]; + nextToken = idx + 1 >= static_cast(tokens.size()) ? std::string() : tokens[idx + 1]; + } + + auto selectLambdaId = -1; + + if (token == "{") + { + if (type == db::PropertyTypes_e::runTimeTypeProp) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: `type` is required when using a `lambda`", + lastDebug + }; + + const auto matchingIndex = seekMatchingCurly(tokens, idx); + + const Blocks::Line selectLambda(tokens.begin() + idx + 1, tokens.begin() + matchingIndex); + + if (isProperty(columnName) || isTableColumn(columnName)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "result columns in lambda aggregations cannot use an existing property name", + lastDebug + }; + + if (selectLambda.size() == 0) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: lambda contains no code", + lastDebug + }; + + // if there is no logic, just straight iteration we push the logic block as -1 + // the interpreter will run in a true state for the logic if it sees -1 + selectLambdaId = addLinesAsBlock(selectLambda); + idx = matchingIndex + 1; + } + + // automatic lambda - assume this is a just a variable + if (!isTableColumn(columnName) && !isProperty(columnName) && selectLambdaId == -1) + { + if (type == db::PropertyTypes_e::runTimeTypeProp) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: when using a variable in a select you must specify an output type", + lastDebug + }; + + const Blocks::Line selectLambda { columnName }; + selectLambdaId = addLinesAsBlock(selectLambda); + } + + // already used, then throw and suggest using `as` + if (getTrackingIndex(selects, asName) != -1) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "`as` name in `select` already in use", + lastDebug + }; + + // register this property as having been referenced + const auto columnIdx = selectLambdaId == -1 ? columnIndex(columnName) : 0; + const auto selectIdx = selectsIndex(asName); + + if (columnName == "session") + { + usesSessions = true; + // session counting uses a specialized count method + modifier = ColumnModifiers.find("dist_count_person")->second; + + // reference session so it becomes part of data set + columnIndex("session"); + } + + auto aggOnce = false; + + // properties, the id property, lambdas, value types, and dist_count_person are + // counted just once per customer in result branches. + if (isProperty(columnName) || + selectLambdaId != -1 || + modifier == Modifiers_e::value || + modifier == Modifiers_e::dist_count_person || + columnName == "id") + aggOnce = true; - if (!isTableColumn(columnName)) + const auto propInfo = tableColumns->getProperty(columnName); + + Variable_s var(columnName, asName, "property", modifier); + var.distinctColumnName = keyColumn; + + var.index = selectIdx; // index in variable array + var.column = columnIdx; // index in grid + var.schemaColumn = propInfo ? propInfo->idx : -1; + var.schemaType = !propInfo || type != db::PropertyTypes_e::runTimeTypeProp ? type : propInfo->type; + var.lambdaIndex = selectLambdaId; + var.aggOnce = aggOnce; + + // if this is selection is keyed to another property lets reference it as well + const auto keyIdx = selectLambdaId == -1 ? columnIndex(keyColumn) : 0; + var.distinctColumn = keyIdx; // index of key property in grid + + selectColumnInfo.push_back(var); + } + + // THROW - should have found `end` + } + + // select when parseMode is customers + int parseSelectCustomers(Blocks::Line& tokens, const int start) + { + const std::unordered_set newStatementWords = { + "value", + //"var", + }; + + auto idx = start + 1; + const auto end = static_cast(tokens.size()); + + while (idx < end) + { + auto token = tokens[idx]; + auto nextToken = idx + 1 >= static_cast(tokens.size()) ? std::string() : tokens[idx + 1]; + + // end of select definition + if (token == "end") + return idx + 1; + + // should be a modifier? + if (!ColumnModifiers.count(token)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: expecting an aggregate type (customers query permits: value)", + lastDebug + }; + + // should be a textual word + if (!isTextual(nextToken)) throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, - "expecting a table property", + "select: expecting a property name after aggregate", lastDebug }; + + auto modifier = ColumnModifiers.find(token)->second; + const auto columnName = nextToken; // actual property name in table + auto keyColumn = columnName; // distinct to itself + auto asName = columnName; // aliased as itself + db::PropertyTypes_e type = db::PropertyTypes_e::runTimeTypeProp; + idx += 2; token = tokens[idx]; @@ -1032,7 +1273,7 @@ namespace openset::query throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, - "expecting a name in `as` portion of `select` statement", + "select: expecting a name for `as`", lastDebug }; @@ -1040,7 +1281,7 @@ namespace openset::query throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, - "`as` portion of `select` statement cannot be a table property", + "select: name specified for `as` cannot be an existing table property", lastDebug }; @@ -1058,7 +1299,7 @@ namespace openset::query throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, - "expecting a name in `key` portion of `select` statement", + "select: expecting a name in `key` portion of statement", lastDebug }; @@ -1066,14 +1307,99 @@ namespace openset::query throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, - "`key` portion of `select` must be a table property", + "select: `key` must be a table property", lastDebug }; keyColumn = nextToken; idx += 2; + + token = tokens[idx]; + nextToken = idx + 1 >= static_cast(tokens.size()) ? std::string() : tokens[idx + 1]; + } + + if (token == "type") + { + if (!nextToken.length() || !isTextual(nextToken)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: expecting a `type` for `lambda` lambda ('int', 'double' or 'text')", + lastDebug + }; + + if (nextToken == "int") + type = db::PropertyTypes_e::intProp; + else if (nextToken == "double") + type = db::PropertyTypes_e::doubleProp; + else if (nextToken == "text") + type = db::PropertyTypes_e::textProp; + else + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: `type` for `lambda` may be 'int', 'double' or 'text'", + lastDebug + }; + + idx += 2; + + token = tokens[idx]; + nextToken = idx + 1 >= static_cast(tokens.size()) ? std::string() : tokens[idx + 1]; } + auto selectLambdaId = -1; + + if (token == "{") + { + if (type == db::PropertyTypes_e::runTimeTypeProp) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: `type` is required when using a `lambda`", + lastDebug + }; + + const auto matchingIndex = seekMatchingCurly(tokens, idx); + + const Blocks::Line selectLambda(tokens.begin() + idx + 1, tokens.begin() + matchingIndex); + + if (isProperty(columnName) || isTableColumn(columnName)) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "result columns in lambda aggregations cannot use an existing property name", + lastDebug + }; + + if (selectLambda.size() == 0) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: lambda contains no code", + lastDebug + }; + + // if there is no logic, just straight iteration we push the logic block as -1 + // the interpreter will run in a true state for the logic if it sees -1 + selectLambdaId = addLinesAsBlock(selectLambda); + idx = matchingIndex + 1; + } + + // automatic lambda - assume this is a just a variable + if (!isTableColumn(columnName) && !isProperty(columnName) && selectLambdaId == -1) + { + if (type == db::PropertyTypes_e::runTimeTypeProp) + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "select: when using a variable in a select you must specify an output type", + lastDebug + }; + + const Blocks::Line selectLambda { columnName }; + selectLambdaId = addLinesAsBlock(selectLambda); + } // already used, then throw and suggest using `as` if (getTrackingIndex(selects, asName) != -1) @@ -1085,8 +1411,7 @@ namespace openset::query }; // register this property as having been referenced - const auto columnIdx = columnIndex(columnName); - + const auto columnIdx = selectLambdaId == -1 ? columnIndex(columnName) : 0; const auto selectIdx = selectsIndex(asName); if (columnName == "session") @@ -1099,6 +1424,17 @@ namespace openset::query columnIndex("session"); } + auto aggOnce = false; + + // properties, the id property, lambdas, value types, and dist_count_person are + // counted just once per customer in result branches. + if (isProperty(columnName) || + selectLambdaId != -1 || + modifier == Modifiers_e::value || + modifier == Modifiers_e::dist_count_person || + columnName == "id") + aggOnce = true; + const auto propInfo = tableColumns->getProperty(columnName); Variable_s var(columnName, asName, "property", modifier); @@ -1106,20 +1442,47 @@ namespace openset::query var.index = selectIdx; // index in variable array var.column = columnIdx; // index in grid - var.schemaColumn = propInfo->idx; - var.schemaType = propInfo->type; + var.schemaColumn = propInfo ? propInfo->idx : -1; + var.schemaType = !propInfo || type != db::PropertyTypes_e::runTimeTypeProp ? type : propInfo->type; + var.lambdaIndex = selectLambdaId; + var.aggOnce = aggOnce; // if this is selection is keyed to another property lets reference it as well - const auto keyIdx = columnIndex(keyColumn); + const auto keyIdx = selectLambdaId == -1 ? columnIndex(keyColumn) : 0; var.distinctColumn = keyIdx; // index of key property in grid selectColumnInfo.push_back(var); } // THROW - should have found `end` + } + int parseSelect(Blocks::Line& tokens, const int start) + { + switch (parseMode) + { + case ParseMode_e::report: + return parseSelectReport(tokens, start); + case ParseMode_e::segment: + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "`select` is not used in segment scripts", + lastDebug + }; + case ParseMode_e::customers: + return parseSelectCustomers(tokens, start); + default: + throw QueryParse2Error_s { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "unexpected parse mode while parsing select", + lastDebug + }; + } } + int extractLine(Blocks::Line& tokens, const int start, Blocks::Line& extraction) { const std::unordered_set forceNewLine = { @@ -2100,7 +2463,7 @@ namespace openset::query if (isString(item)) { - auto cleanString = stripQuotes(item); + const auto cleanString = stripQuotes(item); auto stringIdx = stringLiteralIndex(cleanString); middle.emplace_back(MiddleOp_e::push_literal, stringIdx, lastDebug.line, start); @@ -2740,7 +3103,6 @@ namespace openset::query void compile(Macro_s& inMacros) { - auto& finCode = inMacros.code; auto& lambdas = inMacros.lambdas; @@ -3103,18 +3465,19 @@ namespace openset::query } } - // add user vars - //Tracking stringLiterals; - //Tracking properties; - //Tracking aggregates; - auto index = 0; for (auto& v : columns) { const auto schemaInfo = tableColumns->getProperty(v); + if (!schemaInfo) + continue; + if (v == "session"s) + { + usesSessions = true; inMacros.sessionColumn = index; + } inMacros.vars.tableVars.push_back(Variable_s{v, ""}); inMacros.vars.tableVars.back().index = index; @@ -3162,6 +3525,14 @@ namespace openset::query inMacros.vars.columnVars = selectColumnInfo; + index = 0; + for (auto& col : selectColumnInfo) + { + if (col.lambdaIndex != -1) + inMacros.vars.columnLambdas.push_back(index); + ++index; + } + inMacros.filters = filters; } @@ -3723,8 +4094,9 @@ namespace openset::query inMacros.rawIndex += word + " "; } - bool compileQuery(const std::string& query, openset::db::Properties* columnsPtr, Macro_s& inMacros, ParamVars* templateVars) + bool compileQuery(const std::string& query, openset::db::Properties* columnsPtr, Macro_s& inMacros, ParamVars* templateVars, ParseMode_e parseAs = ParseMode_e::report) { + parseMode = parseAs; try { diff --git a/src/result.cpp b/src/result.cpp index ad51b69..24ab974 100644 --- a/src/result.cpp +++ b/src/result.cpp @@ -136,10 +136,10 @@ void ResultSet::setAccTypesFromMacros(const openset::query::Macro_s ¯os) accTypes[dataIndex] = ResultTypes_e::Double; break; case db::PropertyTypes_e::boolProp: - accTypes[dataIndex] = ResultTypes_e::Int; + accTypes[dataIndex] = ResultTypes_e::Bool; break; case db::PropertyTypes_e::textProp: - accTypes[dataIndex] = ResultTypes_e::Int; + accTypes[dataIndex] = ResultTypes_e::Text; break; case db::PropertyTypes_e::freeProp: default: @@ -210,7 +210,7 @@ robin_hood::unordered_map> merge * merge performs a sync merge on a vector of sorted results. * * STL was used here because it has great iterators, but a little is lost in -* readabilty. I apologize in advance for the **blah stuff. +* readability. I apologize in advance for the **blah stuff. * * Step one make a vector of iterators for each result in the results vector. * (note, the results vector contains vectors of sorted results). @@ -345,7 +345,6 @@ ResultSet::RowVector mergeResultSets( } break; case openset::query::Modifiers_e::value: - left->columns[valueIndex].value = right->columns[valueIndex].value; left->columns[valueIndex].count = right->columns[valueIndex].count; break; @@ -384,10 +383,10 @@ ResultSet::RowVector mergeResultSets( } void ResultMuxDemux::mergeMacroLiterals( - const openset::query::Macro_s macros, + const openset::query::Macro_s& macros, std::vector& resultSets) { - // copy literals from macros into a localtext object + // copy literals from macros into a local text object for (auto& l : macros.vars.literals) resultSets.front()->addLocalText(l.hashValue, l.value); } diff --git a/src/result.h b/src/result.h index 6da652b..0fd409d 100644 --- a/src/result.h +++ b/src/result.h @@ -285,7 +285,7 @@ namespace openset // JSON public: static void mergeMacroLiterals( - query::Macro_s macros, + const query::Macro_s& macros, std::vector& resultSets); static char* multiSetToInternode( diff --git a/src/rpc.h b/src/rpc.h index ceea8e6..c9784d4 100644 --- a/src/rpc.h +++ b/src/rpc.h @@ -50,9 +50,10 @@ namespace openset::comms }, { "GET", std::regex(R"(^/v1/tables(\/|\?|\#|)$)"), RpcTable::table_list, {} }, // RpcQuery - { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/event(\/|\?|\#|)$)"), RpcQuery::event, { { 1, "table" } } }, + { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/report(\/|\?|\#|)$)"), RpcQuery::event, { { 1, "table" } } }, { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/segment(\/|\?|\#|)$)"), RpcQuery::segment, { { 1, "table" } } }, { "GET", std::regex(R"(^/v1/query/([a-z0-9_]+)/customer(\/|\?|\#|)$)"), RpcQuery::customer, { { 1, "table" } } }, + { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/customers(\/|\?|\#|)$)"), RpcQuery::segment_customers, { { 1, "table" } } }, { "GET", std::regex(R"(^/v1/query/([a-z0-9_]+)/property/([a-z0-9_\.]+)(\/|\?|\#|)$)"), diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index 0417771..10de787 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -67,19 +67,19 @@ shared_ptr forkQuery( const int trim = -1, const int64_t bucket = 0, const int64_t forceMin = std::numeric_limits::min(), - const int64_t forceMax = std::numeric_limits::min(), + const int64_t forceMax = std::numeric_limits::max(), const int64_t retryCount = 1) { auto newParams = message->getQuery(); newParams.emplace("fork", "true"); - const auto startTime = Now(); // special case... if we ran this query during a map change, run it again (re-fork) + const auto startTime = Now(); + + // special case... if we ran this query during a map change, run it again (re-fork) if (openset::globals::sentinel->wasDuringMapChange(startTime - 1, startTime)) { const auto backOff = (retryCount * retryCount) * 20; - ThreadSleep( - backOff < 10'000 - ? backOff - : 10'000); + ThreadSleep(backOff < 10'000 ? backOff : 10'000); + return forkQuery( table, message, @@ -94,9 +94,10 @@ shared_ptr forkQuery( forceMax, retryCount + 1); } - const auto setCount = resultSetCount - ? resultSetCount - : 1; // call all nodes and gather results - JSON is what's coming back + + const auto setCount = resultSetCount ? resultSetCount : 1; + + // call all nodes and gather results - JSON is what's coming back // NOTE - it would be fully possible to flatten results to binary auto result = openset::globals::mapper->dispatchCluster( message->getMethod(), @@ -105,15 +106,14 @@ shared_ptr forkQuery( message->getPayload(), message->getPayloadLength(), true); + const auto dispatchEndTime = Now(); + // special case... if we ran this query during a map change, run it again (re-fork) if (openset::globals::sentinel->wasDuringMapChange(startTime, dispatchEndTime)) { const auto backOff = (retryCount * retryCount) * 20; - ThreadSleep( - backOff < 10000 - ? backOff - : 10000); + ThreadSleep( backOff < 10000 ? backOff : 10000); return forkQuery( table, message, @@ -128,6 +128,7 @@ shared_ptr forkQuery( forceMax, retryCount + 1); } + std::vector resultSets; for (auto& r : result.responses) { @@ -150,7 +151,9 @@ shared_ptr forkQuery( { message->reply(openset::http::StatusCode::client_error_bad_request, error); // free up the responses - openset::globals::mapper->releaseResponses(result); // clean up all those resultSet* + openset::globals::mapper->releaseResponses(result); + + // clean up all those resultSet* for (auto res : resultSets) delete res; return nullptr; @@ -169,21 +172,28 @@ shared_ptr forkQuery( openset::errors::errorCode_e::route_error, "potential node failure - please re-issue the request" }, - message); // free up the responses - openset::globals::mapper->releaseResponses(result); // clean up all those resultSet* + message); + + // free up the responses + openset::globals::mapper->releaseResponses(result); + // clean up all those resultSet* for (auto res : resultSets) delete res; return nullptr; } } auto resultJson = make_shared(); - ResultMuxDemux::resultSetToJson(resultColumnCount, setCount, resultSets, resultJson.get()); // free up the responses + ResultMuxDemux::resultSetToJson(resultColumnCount, setCount, resultSets, resultJson.get()); + + // free up the responses openset::globals::mapper->releaseResponses(result); // clean up all those resultSet* - for (auto r : resultSets) - delete r; + for (auto res : resultSets) + delete res; + if (bucket) ResultMuxDemux::jsonResultHistogramFill(resultJson.get(), bucket, forceMin, forceMax); + switch (sortMode) { case ResultSortMode_e::key: @@ -194,6 +204,7 @@ shared_ptr forkQuery( break; default: ; } + ResultMuxDemux::jsonResultTrim(resultJson.get(), trim); // local function to fill Meta data in result JSON const auto fillMeta = [](const openset::query::VarList& mapping, cjson* jsonArray) { @@ -329,8 +340,9 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& const auto useStampCounts = message->getParamBool("stamp_counts"); const auto trimSize = message->getParamInt("trim", -1); const auto sortOrder = message->getParamString("order", "desc") == "asc" - ? ResultSortOrder_e::Asc - : ResultSortOrder_e::Desc; + ? ResultSortOrder_e::Asc + : ResultSortOrder_e::Desc; + auto sortColumnName = ""s; auto sortMode = ResultSortMode_e::column; if (message->isParam("sort")) @@ -339,10 +351,10 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& if (sortColumnName == "group") sortMode = ResultSortMode_e::key; } - const auto log = "Inbound events query (fork: "s + (isFork - ? "true"s - : "false"s) + ")"s; + + const auto log = "Inbound events query (fork: "s + (isFork ? "true"s : "false"s) + ")"s; Logger::get().info(log); + if (!tableName.length()) { RpcError( @@ -354,6 +366,7 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& message); return; } + if (!queryCode.length()) { RpcError( @@ -365,6 +378,7 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& message); return; } + auto table = database->getTable(tableName); if (!table) { @@ -376,8 +390,9 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& }, message); return; - } // override session time if provided, otherwise use table default + } + // override session time if provided, otherwise use table default const auto sessionTime = message->getParamInt("session_time", table->getSessionTime()); query::ParamVars paramVars = getInlineVaraibles(message); query::Macro_s queryMacros; // this is our compiled code block @@ -398,12 +413,14 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& message); return; } + if (p.error.inError()) { Logger::get().error(p.error.getErrorJSON()); message->reply(http::StatusCode::client_error_bad_request, p.error.getErrorJSON()); return; } + if (message->isParam("segments")) { const auto segmentText = message->getParamString("segments"); @@ -426,7 +443,9 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& message); return; } - } // set the sessionTime (timeout) value, this will get relayed + } + + // set the sessionTime (timeout) value, this will get relayed // through the to oloop_query, the customer object and finally the grid queryMacros.sessionTime = sessionTime; if (debug) @@ -489,13 +508,17 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); return; - } // We are a Fork! - // create list of active_owner parititions for factory function + } + + // We are a Fork! + + // create list of active_owner partitions for factory function auto activeList = globals::mapper->partitionMap.getPartitionsByNodeIdAndStates( globals::running->nodeId, { mapping::NodeState_e::active_owner }); + // Shared Results - Partitions spread across working threads (AsyncLoop's made by AsyncPool) // we don't have to worry about locking anything shared between partitions in the same // thread as they are executed serially, rather than in parallel. @@ -513,32 +536,42 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& resultSets.push_back( new ResultSet( queryMacros.vars.columnVars.size() * (queryMacros.segments.size() - ? queryMacros.segments.size() - : 1))); // nothing active - return an empty set - not an error + ? queryMacros.segments.size() + : 1))); + + // nothing active - return an empty set - not an error if (!activeList.size()) { // 1. Merge Macro Literals - ResultMuxDemux::mergeMacroLiterals(queryMacros, resultSets); // 2. Merge the rows + ResultMuxDemux::mergeMacroLiterals(queryMacros, resultSets); + + // 2. Merge the rows int64_t bufferLength = 0; const auto buffer = ResultMuxDemux::multiSetToInternode( queryMacros.vars.columnVars.size(), queryMacros.segments.size(), resultSets, - bufferLength); // reply will be responsible for buffer + bufferLength); + + // reply will be responsible for buffer message->reply(http::StatusCode::success_ok, buffer, bufferLength); - PoolMem::getPool().freePtr(buffer); // clean up stray resultSets + PoolMem::getPool().freePtr(buffer); + + // clean up stray resultSets Logger::get().info("event query on " + table->getName()); for (auto resultSet : resultSets) delete resultSet; return; - } /* + } + + /* * this Shuttle will gather our result sets roll them up and spit them back * * note that queryMacros are captured with a copy, this is because a reference * version will have had it's destructor called when the function exits. * * Note: ShuttleLamda comes in two versions, - */ //auto shuttle = new ShuttleLambdaAsync( + */ const auto shuttle = new ShuttleLambda( message, activeList.size(), @@ -567,9 +600,9 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& } // 1. Merge the Macro Literals - // 2. Merge the rows ResultMuxDemux::mergeMacroLiterals(queryMacros, resultSets); + // 2. Merge the rows int64_t bufferLength = 0; const auto buffer = ResultMuxDemux::multiSetToInternode( queryMacros.vars.columnVars.size(), @@ -577,16 +610,317 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& //queryMacros.indexes.size(), resultSets, bufferLength); - /* - cjson tDoc; - ResultMuxDemux::resultSetToJson( + + message->reply(http::StatusCode::success_ok, buffer, bufferLength); + PoolMem::getPool().freePtr(buffer); + + Logger::get().info("event query on " + table->getName()); + + // clean up stray resultSets + for (auto resultSet : resultSets) + delete resultSet; + + // this will delete the shuttle, and clear up the CellQueryResult_s vector + release_cb(); + }); + + auto instance = 0; + + // pass factory function (as lambda) to create new cell objects + partitions->cellFactory( + activeList, + [shuttle, table, queryMacros, resultSets, &instance](AsyncLoop* loop) -> OpenLoop* + { + instance++; + return new OpenLoopQuery(shuttle, table, queryMacros, resultSets[loop->getWorkerId()], instance); + }); +} + +void RpcQuery::segment_customers(const openset::web::MessagePtr& message, const RpcMapping& matches) +{ + auto database = globals::database; + const auto partitions = globals::async; + const auto request = message->getJSON(); + const auto tableName = matches.find("table"s)->second; + const auto queryCode = std::string { message->getPayload(), message->getPayloadLength() }; + const auto debug = message->getParamBool("debug"); + const auto isFork = message->getParamBool("fork"); + const auto trimSize = message->getParamInt("trim", -1); + const auto sortOrder = message->getParamString("order", "desc") == "asc" + ? ResultSortOrder_e::Asc + : ResultSortOrder_e::Desc; + + auto sortColumnName = ""s; + auto sortMode = ResultSortMode_e::column; + if (message->isParam("sort")) + { + sortColumnName = message->getParamString("sort"); + if (sortColumnName == "group") + sortMode = ResultSortMode_e::key; + } + + const auto log = "Inbound counts query (fork: "s + (isFork ? "true"s : "false"s) + ")"s; + Logger::get().info(log); + + if (!tableName.length()) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "missing or invalid table name" + }, + message); + return; + } + + if (!queryCode.length()) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "missing query code (POST query as text)" + }, + message); + return; + } + + auto table = database->getTable(tableName); + + if (!table) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "table could not be found" + }, + message); + return; + } + + // override session time if provided, otherwise use table default + const auto sessionTime = message->getParamInt("session_time", table->getSessionTime()); + query::ParamVars paramVars = getInlineVaraibles(message); + query::Macro_s queryMacros; // this is our compiled code block + query::QueryParser p; + + try + { + // compile in customers mode + p.compileQuery(queryCode.c_str(), table->getProperties(), queryMacros, ¶mVars, query::ParseMode_e::customers); + } + catch (const std::runtime_error& ex) + { + RpcError( + errors::Error { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + std::string { ex.what() } + }, + message); + return; + } + + if (p.error.inError()) + { + Logger::get().error(p.error.getErrorJSON()); + message->reply(http::StatusCode::client_error_bad_request, p.error.getErrorJSON()); + return; + } + + if (message->isParam("segments")) + { + const auto segmentText = message->getParamString("segments"); + auto parts = split(segmentText, ','); + queryMacros.segments.clear(); + for (const auto& part : parts) + { + const auto trimmedPart = trim(part); + if (trimmedPart.length()) + queryMacros.segments.push_back(trimmedPart); + } + if (!queryMacros.segments.size()) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::syntax_error, + "no segment names specified" + }, + message); + return; + } + } + + // set the sessionTime (timeout) value, this will get relayed + // through the to oloop_query, the customer object and finally the grid + queryMacros.sessionTime = sessionTime; + if (debug) + { + auto debugOutput = MacroDbg(queryMacros); // reply as text + message->reply(http::StatusCode::success_ok, &debugOutput[0], debugOutput.length()); + return; + } + auto sortColumn = 0; + if (sortMode != ResultSortMode_e::key && sortColumnName.size()) + { + auto set = false; + auto idx = -1; + for (auto& c : queryMacros.vars.columnVars) + { + ++idx; + if (c.alias == sortColumnName) + { + set = true; + sortColumn = c.index; + break; + } + } + if (!set) + { + RpcError( + errors::Error { + errors::errorClass_e::parse, + errors::errorCode_e::syntax_error, + "sort property not found in query aggregates" + }, + message); + return; + } + } + + /* + * We are originating the query. + * + * At this point in the function we have validated that the + * script compiles, maps to the schema, is on a valid table, + * etc. + * + * We will call our forkQuery function. + * + * forQuery will call all the nodes (including this one) with the + * `is_fork` variable set to true. + */ + + if (!isFork) + { + const auto json = forkQuery( + table, + message, + queryMacros.vars.columnVars.size(), + queryMacros.segments.size(), + sortMode, + sortOrder, + sortColumn, + trimSize); + if (json) // if null/empty we had an error + message->reply(http::StatusCode::success_ok, *json); + return; + } + + // We are a Fork! + + // create list of active_owner partitions for factory function + auto activeList = globals::mapper->partitionMap.getPartitionsByNodeIdAndStates( + globals::running->nodeId, + { + mapping::NodeState_e::active_owner + }); + + // Shared Results - Partitions spread across working threads (AsyncLoop's made by AsyncPool) + // we don't have to worry about locking anything shared between partitions in the same + // thread as they are executed serially, rather than in parallel. + // + // By creating one result set for each AsyncLoop thread we can have a lockless ResultSet + // as well as generally reduce the number of ResultSets needed (especially when partition + // counts are high). + // + // Note: These are heap objects because we lose scope, as this function + // exits before the result objects are used. + // + std::vector resultSets; + resultSets.reserve(partitions->getWorkerCount()); + for (auto i = 0; i < partitions->getWorkerCount(); ++i) + resultSets.push_back( + new ResultSet( + queryMacros.vars.columnVars.size() * (queryMacros.segments.size() + ? queryMacros.segments.size() + : 1))); + + // nothing active - return an empty set - not an error + if (!activeList.size()) + { + // 1. Merge Macro Literals + ResultMuxDemux::mergeMacroLiterals(queryMacros, resultSets); + + // 2. Merge the rows + int64_t bufferLength = 0; + const auto buffer = ResultMuxDemux::multiSetToInternode( + queryMacros.vars.columnVars.size(), + queryMacros.segments.size(), + resultSets, + bufferLength); + + // reply will be responsible for buffer + message->reply(http::StatusCode::success_ok, buffer, bufferLength); + PoolMem::getPool().freePtr(buffer); + + // clean up stray resultSets + Logger::get().info("event query on " + table->getName()); + for (auto resultSet : resultSets) + delete resultSet; + return; + } + + /* + * this Shuttle will gather our result sets roll them up and spit them back + * + * note that queryMacros are captured with a copy, this is because a reference + * version will have had it's destructor called when the function exits. + * + * Note: ShuttleLamda comes in two versions, + */ + const auto shuttle = new ShuttleLambda( + message, + activeList.size(), + [queryMacros, table, resultSets]( + vector>& responses, + web::MessagePtr message, + voidfunc release_cb) mutable + { + // process the data and respond + // check for errors, add up totals + for (const auto& r : responses) + { + if (r.data.error.inError()) + { + // any error that is recorded should be considered a hard error, so report it + const auto errorMessage = r.data.error.getErrorJSON(); + Logger::get().error(errorMessage); + message->reply(http::StatusCode::client_error_bad_request, errorMessage); + // clean up stray resultSets + for (auto resultSet : resultSets) + delete resultSet; + + release_cb(); + return; + } + } + + // 1. Merge the Macro Literals + ResultMuxDemux::mergeMacroLiterals(queryMacros, resultSets); + + // 2. Merge the rows + int64_t bufferLength = 0; + const auto buffer = ResultMuxDemux::multiSetToInternode( queryMacros.vars.columnVars.size(), - queryMacros.indexes.size(), + queryMacros.segments.size(), + //queryMacros.indexes.size(), resultSets, - &tDoc); + bufferLength); - cout << cjson::stringify(&tDoc, true ); - */ message->reply(http::StatusCode::success_ok, buffer, bufferLength); PoolMem::getPool().freePtr(buffer); @@ -596,10 +930,13 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& for (auto resultSet : resultSets) delete resultSet; - release_cb(); // this will delete the shuttle, and clear up the CellQueryResult_s vector + // this will delete the shuttle, and clear up the CellQueryResult_s vector + release_cb(); }); - auto instance = 0; // pass factory function (as lambda) to create new cell objects + auto instance = 0; + + // pass factory function (as lambda) to create new cell objects partitions->cellFactory( activeList, [shuttle, table, queryMacros, resultSets, &instance](AsyncLoop* loop) -> OpenLoop* @@ -607,6 +944,7 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& instance++; return new OpenLoopQuery(shuttle, table, queryMacros, resultSets[loop->getWorkerId()], instance); }); + } void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping& matches) @@ -672,7 +1010,7 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping continue; query::Macro_s queryMacros; // this is our compiled code block query::QueryParser p; - p.compileQuery(r.code.c_str(), table->getProperties(), queryMacros, ¶mVars); + p.compileQuery(r.code.c_str(), table->getProperties(), queryMacros, ¶mVars, query::ParseMode_e::segment); if (p.error.inError()) { @@ -783,15 +1121,18 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping if (!activeList.size()) { // 1. Merge Macro Literals - ResultMuxDemux::mergeMacroLiterals(queries.front().second, resultSets); // 2. Merge the rows + ResultMuxDemux::mergeMacroLiterals(queries.front().second, resultSets); + + // 2. Merge the rows int64_t bufferLength = 0; - const auto buffer = ResultMuxDemux::multiSetToInternode(1, 1, resultSets, bufferLength); + const auto buffer = ResultMuxDemux::multiSetToInternode(1, 1, resultSets, bufferLength); // reply is responsible for buffer message->reply(http::StatusCode::success_ok, buffer, bufferLength); PoolMem::getPool().freePtr(buffer); - Logger::get().info("No active workers for " + table->getName()); // clean up stray resultSets + Logger::get().info("No active workers for " + table->getName()); + // clean up stray resultSets for (auto resultSet : resultSets) delete resultSet; @@ -813,6 +1154,7 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping { // any error that is recorded should be considered a hard error, so report it message->reply(http::StatusCode::client_error_bad_request, r.data.error.getErrorJSON()); + // clean up stray resultSets for (auto resultSet : resultSets) delete resultSet; @@ -823,9 +1165,9 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping } // 1. Merge Macro Literals - // 2. Merge the rows ResultMuxDemux::mergeMacroLiterals(queries.front().second, resultSets); + // 2. Merge the rows int64_t bufferLength = 0; const auto buffer = ResultMuxDemux::multiSetToInternode( queries.front().second.vars.columnVars.size(), @@ -846,8 +1188,9 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping }); auto instance = 0; - auto workers = 0; // pass factory function (as lambda) to create new cell objects + auto workers = 0; + // pass factory function (as lambda) to create new cell objects partitions->cellFactory( activeList, [shuttle, table, queries, resultSets, &workers, &instance](AsyncLoop* loop) -> OpenLoop* @@ -1128,9 +1471,7 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat * We will call our forkQuery function. * * forQuery will call all the nodes (including this one) with the - * `is_fork` varaible set to true. - * - * + * `is_fork` variable set to true. */ if (!isFork) { @@ -1148,12 +1489,13 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat return; } - // create list of active_owner parititions for factory function + // create list of active_owner partitions for factory function const auto activeList = globals::mapper->partitionMap.getPartitionsByNodeIdAndStates( globals::running->nodeId, { mapping::NodeState_e::active_owner }); + // Shared Results - Partitions spread across working threads (AsyncLoop's made by AsyncPool) // we don't have to worry about locking anything shared between partitions in the same // thread as they are executed serially, rather than in parallel. @@ -1182,9 +1524,12 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat queryInfo.segments.size(), resultSets, bufferLength); + // reply will be responsible for buffer message->reply(http::StatusCode::success_ok, buffer, bufferLength); - PoolMem::getPool().freePtr(buffer); // clean up stray resultSets + PoolMem::getPool().freePtr(buffer); + + // clean up stray resultSets for (auto resultSet : resultSets) delete resultSet; return; @@ -1196,7 +1541,7 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat * note that queryMacros are captured with a copy, this is because a reference * version will have had it's destructor called when the function exits. * - * Note: ShuttleLamda comes in two versions, + * Note: ShuttleLambda comes in two versions, */ const auto shuttle = new ShuttleLambda( message, @@ -1233,14 +1578,18 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat message->reply(http::StatusCode::success_ok, buffer, bufferLength); - Logger::get().info("Fork query on " + table->getName()); // clean up all those resultSet* + Logger::get().info("Fork query on " + table->getName()); + // clean up all those resultSet* for (auto r : resultSets) delete r; release_cb(); // this will delete the shuttle, and clear up the CellQueryResult_s vector }); - auto instance = 0; // pass factory function (as lambda) to create new cell objects + + auto instance = 0; + + // pass factory function (as lambda) to create new cell objects partitions->cellFactory( activeList, [shuttle, table, queryInfo, resultSets, &instance](AsyncLoop* loop) -> OpenLoop* @@ -1265,6 +1614,7 @@ void RpcQuery::customer(openset::web::MessagePtr& message, const RpcMapping& mat message); return; } + const auto tableName = matches.find("table"s)->second; if (!tableName.length()) { @@ -1277,6 +1627,7 @@ void RpcQuery::customer(openset::web::MessagePtr& message, const RpcMapping& mat message); return; } + const auto table = globals::database->getTable(tableName); if (!table) { @@ -1290,7 +1641,7 @@ void RpcQuery::customer(openset::web::MessagePtr& message, const RpcMapping& mat return; } - int64_t uuid = 0; + int64_t uuid = std::numeric_limits::min(); if (table->numericCustomerIds) { @@ -1300,6 +1651,7 @@ void RpcQuery::customer(openset::web::MessagePtr& message, const RpcMapping& mat } catch (...) { + // error returned below in `if (uuid == ::min()) } } else @@ -1308,13 +1660,13 @@ void RpcQuery::customer(openset::web::MessagePtr& message, const RpcMapping& mat uuid = MakeHash(uuString); } - if (uuid == 0) + if (uuid == std::numeric_limits::min()) { RpcError( errors::Error { errors::errorClass_e::query, errors::errorCode_e::general_error, - "invalid id" + "invalid customer id" }, message); return; @@ -1390,17 +1742,18 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma const auto tableName = matches.find("table"s)->second; const auto groupName = matches.find("name"s)->second; const auto queryCode = std::string { message->getPayload(), message->getPayloadLength() }; + const auto debug = message->getParamBool("debug"); const auto isFork = message->getParamBool("fork"); const auto trimSize = message->getParamInt("trim", -1); const auto sortOrder = message->getParamString("order", "desc") == "asc" - ? ResultSortOrder_e::Asc - : ResultSortOrder_e::Desc; + ? ResultSortOrder_e::Asc + : ResultSortOrder_e::Desc; const auto sortMode = ResultSortMode_e::key; - const auto log = "Inbound events query (fork: "s + (isFork - ? "true"s - : "false"s) + ")"s; + + const auto log = "Inbound events query (fork: "s + (isFork ? "true"s : "false"s) + ")"s; Logger::get().info(log); + if (!tableName.length()) { RpcError( @@ -1412,6 +1765,7 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma message); return; } + if (!queryCode.length()) { RpcError( @@ -1423,6 +1777,7 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma message); return; } + auto table = database->getTable(tableName); if (!table) { @@ -1434,11 +1789,14 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma }, message); return; - } // override session time if provided, otherwise use table default + } + + // override session time if provided, otherwise use table default const auto sessionTime = message->getParamInt("session_time", table->getSessionTime()); query::ParamVars paramVars = getInlineVaraibles(message); query::Macro_s queryMacros; // this is our compiled code block query::QueryParser p; + try { p.compileQuery(queryCode.c_str(), table->getProperties(), queryMacros, ¶mVars); @@ -1454,12 +1812,15 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma message); return; } + if (p.error.inError()) { Logger::get().error(p.error.getErrorJSON()); message->reply(http::StatusCode::client_error_bad_request, p.error.getErrorJSON()); return; - } // Histogram querys must call tally + } + + // Histogram querys must call tally if (queryMacros.marshalsReferenced.count(query::Marshals_e::marshal_tally)) { RpcError( @@ -1471,6 +1832,7 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma message); return; } + if (message->isParam("segments")) { const auto segmentText = message->getParamString("segments"); @@ -1493,24 +1855,32 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma message); return; } - } // set the sessionTime (timeout) value, this will get relayed + } + + // set the sessionTime (timeout) value, this will get relayed // through the to oloop_query, the customer object and finally the grid queryMacros.sessionTime = sessionTime; if (debug) { - auto debugOutput = MacroDbg(queryMacros); // reply as text + auto debugOutput = MacroDbg(queryMacros); + // reply as text message->reply(http::StatusCode::success_ok, &debugOutput[0], debugOutput.length()); return; } + int64_t bucket = 0; if (message->isParam("bucket")) bucket = static_cast(stod(message->getParamString("bucket", "0")) * 10000.0); + auto forceMin = std::numeric_limits::min(); if (message->isParam("min")) forceMin = static_cast(stod(message->getParamString("min", "0")) * 10000.0); + auto forceMax = std::numeric_limits::min(); if (message->isParam("max")) - forceMax = static_cast(stod(message->getParamString("max", "0")) * 10000.0); /* + forceMax = static_cast(stod(message->getParamString("max", "0")) * 10000.0); + + /* * We are originating the query. * * At this point in the function we have validated that the @@ -1520,9 +1890,7 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma * We will call our forkQuery function. * * forQuery will call all the nodes (including this one) with the - * `is_fork` varaible set to true. - * - * + * `is_fork` variable set to true. */ if (!isFork) { @@ -1541,13 +1909,16 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); return; - } // We are a Fork! + } + + // We are a Fork! // create list of active_owner parititions for factory function auto activeList = globals::mapper->partitionMap.getPartitionsByNodeIdAndStates( globals::running->nodeId, { mapping::NodeState_e::active_owner }); + // Shared Results - Partitions spread across working threads (AsyncLoop's made by AsyncPool) // we don't have to worry about locking anything shared between partitions in the same // thread as they are executed serially, rather than in parallel. @@ -1565,31 +1936,41 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma resultSets.push_back( new ResultSet( queryMacros.vars.columnVars.size() * (queryMacros.segments.size() - ? queryMacros.segments.size() - : 1))); // nothing active - return an empty set - not an error + ? queryMacros.segments.size() + : 1))); + + // nothing active - return an empty set - not an error if (activeList.empty()) { // 1. Merge Macro Literals - ResultMuxDemux::mergeMacroLiterals(queryMacros, resultSets); // 2. Merge the rows + ResultMuxDemux::mergeMacroLiterals(queryMacros, resultSets); + + // 2. Merge the rows int64_t bufferLength = 0; const auto buffer = ResultMuxDemux::multiSetToInternode( queryMacros.vars.columnVars.size(), queryMacros.segments.size(), resultSets, - bufferLength); // reply will be responsible for buffer + bufferLength); + + // reply will be responsible for buffer message->reply(http::StatusCode::success_ok, buffer, bufferLength); - PoolMem::getPool().freePtr(buffer); // clean up stray resultSets + PoolMem::getPool().freePtr(buffer); + + // clean up stray resultSets for (auto resultSet : resultSets) delete resultSet; return; - } /* + } + + /* * this Shuttle will gather our result sets roll them up and spit them back * * note that queryMacros are captured with a copy, this is because a reference * version will have had it's destructor called when the function exits. * * Note: ShuttleLamda comes in two versions, - */ //auto shuttle = new ShuttleLambdaAsync( + */ const auto shuttle = new ShuttleLambda( message, activeList.size(), @@ -1620,17 +2001,24 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma queryMacros.segments.size(), resultSets, bufferLength); + message->reply(http::StatusCode::success_ok, buffer, bufferLength); - Logger::get().info("Fork query on " + table->getName()); // clean up stray resultSets + + Logger::get().info("Fork query on " + table->getName()); + + // clean up stray resultSets for (auto resultSet : resultSets) delete resultSet; PoolMem::getPool().freePtr(buffer); - release_cb(); // this will delete the shuttle, and clear up the CellQueryResult_s vector + + // this will delete the shuttle, and clear up the CellQueryResult_s vector + release_cb(); }); - auto forEach = message->isParam("foreach") - ? message->getParamString("foreach") - : ""s; - auto instance = 0; // pass factory function (as lambda) to create new cell objects + + auto forEach = message->isParam("foreach") ? message->getParamString("foreach") : ""s; + auto instance = 0; + + // pass factory function (as lambda) to create new cell objects partitions->cellFactory( activeList, [shuttle, table, queryMacros, resultSets, groupName, bucket, forEach, &instance](AsyncLoop* loop) -> OpenLoop* @@ -1679,20 +2067,25 @@ openset::mapping::Mapper::Responses queryDispatch( { if (doneSending) return false; - csLock lock(cs); //if (running > runMax) // send up to RunMax, fill any that are complete - // return; + + csLock lock(cs); + if (iter == queries.end() || result.routeError) { doneSending = true; return false; } ++running; - ++sendCount; // convert captures in Section Defintion to REST params - for (auto p : *(iter->params.getDict())) + ++sendCount; + + // convert captures in section definition and converts to REST params + for (auto &p : *(iter->params.getDict())) if (p.first.getString() != "each") // missing a char* != ??? params.emplace(p.first.getString(), p.second.getString()); // add a segments param + if (segments.size()) params.emplace("segments"s, join(segments)); // make queries + if (iter->sectionType == "segment") { method = "POST"; @@ -1715,7 +2108,9 @@ openset::mapping::Mapper::Responses queryDispatch( payload = std::move(iter->code); // eat it } ++iter; - } // fire these queries off + } + + // fire these queries off const auto success = openset::globals::mapper->dispatchAsync( openset::globals::running->nodeId, // fork to self @@ -1724,17 +2119,22 @@ openset::mapping::Mapper::Responses queryDispatch( params, payload, completeCallback); + if (!success) - result.routeError = true; //nextQuery(); + result.routeError = true; + return true; }; + while (sendOne()) { while (running > runMax) ThreadSleep(55); } + while (!doneSending && sendCount != receivedCount) - ThreadSleep(50); // replace with semaphore + ThreadSleep(50); // TODO replace with semaphore + return result; } @@ -1745,7 +2145,9 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche const auto tableName = matches.find("table"s)->second; const auto queryCode = std::string { message->getPayload(), message->getPayloadLength() }; const auto debug = message->getParamBool("debug"); + Logger::get().info("Inbound multi query"s); + if (!tableName.length()) { RpcError( @@ -1757,6 +2159,7 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche message); return; } + if (!queryCode.length()) { RpcError( @@ -1768,6 +2171,7 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche message); return; } + const auto table = database->getTable(tableName); if (!table) { @@ -1780,6 +2184,7 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche message); return; } + thread runner( [=]() { @@ -1788,20 +2193,25 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche query::QueryParser::SectionDefinitionList segmentList; query::QueryParser::SectionDefinitionList queryList; query::QueryParser::SectionDefinition_s useSection; - query::SegmentList segments; // extract the + query::SegmentList segments; + for (auto& s : subQueries) + { if (s.sectionType == "segment") segmentList.push_back(s); else if (s.sectionType == "use") useSection = s; else queryList.push_back(s); + } + if (useSection.sectionType == "use" && useSection.sectionName.length()) { segments.push_back(useSection.sectionName); for (const auto& p : *useSection.params.getDict()) segments.push_back(p.first); } + if (segmentList.size()) { auto results = queryDispatch(tableName, segments, segmentList); @@ -1836,6 +2246,7 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche return; } } + if (queryList.size()) { auto results = queryDispatch(tableName, segments, queryList); @@ -1858,6 +2269,7 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche results.routeError = true; // this will trigger the next error } } + if (results.routeError) { RpcError( @@ -1869,6 +2281,7 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche message); return; } + cjson responseJson; auto resultBranch = responseJson.setArray("_"); for (auto& r : results.responses) @@ -1878,8 +2291,10 @@ void RpcQuery::batch(openset::web::MessagePtr& message, const RpcMapping& matche if (const auto item = resultItemJson.xPath("/_/0"); item) cjson::parse(cjson::stringify(item), insertAt, true); } + message->reply(http::StatusCode::success_ok, responseJson); } }); + runner.detach(); } diff --git a/src/rpc_query.h b/src/rpc_query.h index 5fd23ac..f99e154 100644 --- a/src/rpc_query.h +++ b/src/rpc_query.h @@ -21,6 +21,8 @@ namespace openset::comms static void property(const openset::web::MessagePtr& message, const RpcMapping& matches); // GET /v1/query/{table}/customer?{id|idstr}={user_id_key} static void customer(const openset::web::MessagePtr& message, const RpcMapping& matches); + // GET /v1/query/{table}/customers?{various optional switches} + static void segment_customers(openset::web::MessagePtr& message, const RpcMapping& matches); // POST /v1/query/{table}/histogram/{name} static void histogram(const openset::web::MessagePtr& message, const RpcMapping& matches); // POST /v1/query/{table}/batch From 6efbd96924c00c5dc51a6651579fb7f134f999a3 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 14 Nov 2019 16:59:09 -0500 Subject: [PATCH 02/31] roughing in customer list functionality --- CMakeLists.txt | 2 + lib/str/strtools.cpp | 482 ++++++++++++++++++------------------ src/oloop_customer_list.cpp | 174 +++++++++++++ src/oloop_customer_list.h | 54 ++++ src/querycommon.h | 3 + src/queryinterpreter.cpp | 40 ++- src/queryinterpreter.h | 2 + src/queryparserosl.h | 5 +- src/result.h | 7 + src/rpc.h | 4 +- src/rpc_query.cpp | 94 ++++--- src/rpc_query.h | 4 +- 12 files changed, 559 insertions(+), 312 deletions(-) create mode 100644 src/oloop_customer_list.cpp create mode 100644 src/oloop_customer_list.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e9e7125..b81c5fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,8 @@ set(SOURCE_FILES src/oloop_cleaner.h src/oloop_customer.cpp src/oloop_customer.h + src/oloop_customer_list.cpp + src/oloop_customer_list.h src/oloop_histogram.cpp src/oloop_histogram.h src/oloop_insert.cpp diff --git a/lib/str/strtools.cpp b/lib/str/strtools.cpp index 1fc4ea1..bb8a576 100644 --- a/lib/str/strtools.cpp +++ b/lib/str/strtools.cpp @@ -2,257 +2,257 @@ bool EndsWith(std::string Source, std::string Find) { - const auto pos = Source.rfind(Find); + const auto pos = Source.rfind(Find); - if (pos == std::string::npos) - return false; + if (pos == std::string::npos) + return false; - if (pos == Source.length() - Find.length()) - return true; + if (pos == Source.length() - Find.length()) + return true; - return false; + return false; } bool StartsWith(const std::string& Source, const std::string& Find) { - return Source.length() >= Find.length() && std::equal(Find.begin(), Find.end(), Source.begin()); + return Source.length() >= Find.length() && std::equal(Find.begin(), Find.end(), Source.begin()); } void Replace(std::string& Source, std::string Find, std::string Replace) { - size_t pos = 0; + size_t pos = 0; - while (true) - { - pos = Source.find(Find, pos); + while (true) + { + pos = Source.find(Find, pos); - if (pos == std::string::npos) - return; + if (pos == std::string::npos) + return; - Source.erase(pos, Find.length()); + Source.erase(pos, Find.length()); - Source.insert(pos, Replace); + Source.insert(pos, Replace); - pos += Replace.length(); - } + pos += Replace.length(); + } } // Removes white space in strings... cleans in place, moves null termator void cleanStr(char* Str, char CleanChar) { - char* ReadPtr = Str; - char* WritePtr = Str; - - while (*ReadPtr) - { - *WritePtr = *ReadPtr; - WritePtr++; - ReadPtr++; - if ((*ReadPtr == CleanChar) && (*(WritePtr - 1) == CleanChar)) - WritePtr--; - } - - *WritePtr = 0; + char* ReadPtr = Str; + char* WritePtr = Str; + + while (*ReadPtr) + { + *WritePtr = *ReadPtr; + WritePtr++; + ReadPtr++; + if ((*ReadPtr == CleanChar) && (*(WritePtr - 1) == CleanChar)) + WritePtr--; + } + + *WritePtr = 0; } std::string join(const std::vector& strings, std::string quotes) { - if (strings.empty()) - return ""; + if (strings.empty()) + return ""; - std::string res; + std::string res; - bool comma = false; - for (auto& str : strings) - { - if (str.empty()) - continue; + bool comma = false; + for (auto& str : strings) + { + if (str.empty()) + continue; - res += (comma ? "," : "") + quotes + str + quotes; - comma = true; - } + res += (comma ? "," : "") + quotes + str + quotes; + comma = true; + } - return res; + return res; } std::string join(const std::unordered_set& strings, std::string quotes) { - if (strings.empty()) - return ""; + if (strings.empty()) + return ""; - std::string res; + std::string res; - bool comma = false; - for (auto& str : strings) - { - if (str.empty()) - continue; + bool comma = false; + for (auto& str : strings) + { + if (str.empty()) + continue; - res += (comma ? "," : "") + quotes + str + quotes; - comma = true; - } + res += (comma ? "," : "") + quotes + str + quotes; + comma = true; + } - return res; + return res; } std::string cleanStr(std::string Source, std::string Remove) { - std::string Result = ""; + std::string Result = ""; - const char* Start = Source.c_str(); - const char* Removal; + const char* Start = Source.c_str(); + const char* Removal; - bool badchars = false; + bool badchars = false; - while (*Start) - { - badchars = false; + while (*Start) + { + badchars = false; - Removal = Remove.c_str(); + Removal = Remove.c_str(); - while (*Removal) - { - if (*Removal == *Start) - { - badchars = true; - break; - } + while (*Removal) + { + if (*Removal == *Start) + { + badchars = true; + break; + } - Removal++; - } + Removal++; + } - if (badchars) - { - Start++; - continue; - } + if (badchars) + { + Start++; + continue; + } - Result.push_back(*Start); + Result.push_back(*Start); - Start++; - } + Start++; + } - return Result; + return Result; } // makes a heap copy of a const string... remember to "delete []" after use char* copyStr(const char* SourceStr) { - int32_t len = strlen(SourceStr); + int32_t len = strlen(SourceStr); - char* NewStr = new char[len + 1]; + char* NewStr = new char[len + 1]; - strcpy(NewStr, SourceStr); + strcpy(NewStr, SourceStr); - return NewStr; + return NewStr; } void copyStr(char* dest, const char* source, int32_t maxLen) { - int32_t len = strlen(source); + int32_t len = strlen(source); - len = (len <= maxLen) ? len : maxLen; - memcpy(dest, source, len); - dest[len] = '\0'; + len = (len <= maxLen) ? len : maxLen; + memcpy(dest, source, len); + dest[len] = '\0'; } // use on non cost data, modifies original string, make a copy __strList splitStr(char* SourceStr, char* SplitChars) { - __strList Result = new std::vector(); - - char* Start = SourceStr; - char* Last = Start; - char* Splits; - - while (*Start) - { - Splits = SplitChars; - - while (*Splits) - { - if (*Splits == *Start) - { - if (Start == SourceStr) - { - Last++; - } - else - { - *Start = 0; - Result->push_back(Last); - Last = Start + 1; - } - break; - } - - Splits++; - } - - Start++; - } - - if (*Last) - Result->push_back(Last); - - return Result; + __strList Result = new std::vector(); + + char* Start = SourceStr; + char* Last = Start; + char* Splits; + + while (*Start) + { + Splits = SplitChars; + + while (*Splits) + { + if (*Splits == *Start) + { + if (Start == SourceStr) + { + Last++; + } + else + { + *Start = 0; + Result->push_back(Last); + Last = Start + 1; + } + break; + } + + Splits++; + } + + Start++; + } + + if (*Last) + Result->push_back(Last); + + return Result; } // use on non cost data, modifies original string, make a copy void splitStr(const std::string& SourceStr, std::string SplitChars, __stringList Result) { - char CopiedSource[8192]; - - Result->clear(); - - strncpy(CopiedSource, SourceStr.c_str(), 8191); - //char* CopiedSource = copyStr( SourceStr.c_str() ); - - char* Start = CopiedSource; - char* Last = Start; - const char* Splits; - - while (*Start) - { - Splits = SplitChars.c_str(); - - while (*Splits) - { - if (*Splits == *Start) - { - if (Start == CopiedSource) - { - Last++; - } - else - { - *Start = 0; - Result->push_back(Last); - Last = Start + 1; - } - break; - } - - Splits++; - } - - Start++; - } - - if (*Last) - Result->push_back(Last); - - //delete []CopiedSource; + char CopiedSource[8192]; + + Result->clear(); + + strncpy(CopiedSource, SourceStr.c_str(), 8191); + //char* CopiedSource = copyStr( SourceStr.c_str() ); + + char* Start = CopiedSource; + char* Last = Start; + const char* Splits; + + while (*Start) + { + Splits = SplitChars.c_str(); + + while (*Splits) + { + if (*Splits == *Start) + { + if (Start == CopiedSource) + { + Last++; + } + else + { + *Start = 0; + Result->push_back(Last); + Last = Start + 1; + } + break; + } + + Splits++; + } + + Start++; + } + + if (*Last) + Result->push_back(Last); + + //delete []CopiedSource; } void toUpper(std::string& Text) { - std::transform(Text.begin(), Text.end(), Text.begin(), ::toupper); + std::transform(Text.begin(), Text.end(), Text.begin(), ::toupper); } void toLower(std::string& Text) { - std::transform(Text.begin(), Text.end(), Text.begin(), ::tolower); + std::transform(Text.begin(), Text.end(), Text.begin(), ::tolower); } std::string toLowerCase(std::string Text) @@ -263,121 +263,121 @@ std::string toLowerCase(std::string Text) void toLower(char* str) { - while (*str != 0) - { - (*str) = (char)tolower(*str); - ++str; - } + while (*str != 0) + { + (*str) = (char)tolower(*str); + ++str; + } } void split(const std::string& Source, char Token, std::vector& Result) { - Result.clear(); + Result.clear(); - if (Source.size() == 0) - return; + if (Source.size() == 0) + return; - int32_t Start = 0; - int32_t End = 0; - int32_t Size = Source.size(); + int32_t Start = 0; + int32_t End = 0; + int32_t Size = Source.size(); - End = Source.find(Token, 0); + End = Source.find(Token, 0); - while (End != -1) - { - // clears out multiple delimiters, like 1,2,,,,,,,3,4,5,6 so you don't end up with empties int the return array - if (End - Start == 0) - { - while (Start != Size && Source[Start] == Token) - Start++; + while (End != -1) + { + // clears out multiple delimiters, like 1,2,,,,,,,3,4,5,6 so you don't end up with empties int the return array + if (End - Start == 0) + { + while (Start != Size && Source[Start] == Token) + Start++; - End = Source.find(Token, Start); + End = Source.find(Token, Start); - if (End == -1) - break; - } + if (End == -1) + break; + } - if (End - Start > 0) - Result.push_back(Source.substr(Start, End - Start)); + if (End - Start > 0) + Result.push_back(Source.substr(Start, End - Start)); - Start += (End - Start) + 1; + Start += (End - Start) + 1; - End = Source.find(Token, Start); - } + End = Source.find(Token, Start); + } - End = Size; + End = Size; - if (End - Start > 0) - Result.push_back(Source.substr(Start, End - Start)); + if (End - Start > 0) + Result.push_back(Source.substr(Start, End - Start)); - if (Result.size() == 0) - Result.push_back(Source); + if (Result.size() == 0) + Result.push_back(Source); } std::vector split(const std::string& Source, char Token) { - std::vector Result; + std::vector Result; - split(Source, Token, Result); + split(Source, Token, Result); - return Result; + return Result; } std::string N2S(int32_t Value, int32_t MinWidth) { - char Buffer[256]; - sprintf(Buffer, "%i", Value); + char Buffer[256]; + sprintf(Buffer, "%i", Value); - std::string result(Buffer); + std::string result(Buffer); - if (MinWidth) - { - while (result.length() < static_cast(MinWidth)) - result = "0" + result; - } + if (MinWidth) + { + while (result.length() < static_cast(MinWidth)) + result = "0" + result; + } - return result; + return result; }; std::string N2S(int64_t Value, int32_t MinWidth) { - char Buffer[256]; - sprintf(Buffer, INT64_FORMAT, Value); + char Buffer[256]; + sprintf(Buffer, INT64_FORMAT, Value); - std::string result(Buffer); + std::string result(Buffer); - if (MinWidth) - { - while (result.length() < static_cast(MinWidth)) - result = "0" + result; - } + if (MinWidth) + { + while (result.length() < static_cast(MinWidth)) + result = "0" + result; + } - return result; + return result; }; void N2S(int64_t Value, std::string& Result) { - char Buffer[32]; - sprintf(Buffer, INT64_FORMAT, Value); - Result = Buffer; + char Buffer[32]; + sprintf(Buffer, INT64_FORMAT, Value); + Result = Buffer; }; //based on javascript encodeURIComponent() std::string char2hex(char dec) { - char dig1 = (dec & 0xF0) >> 4; - char dig2 = (dec & 0x0F); - if (0 <= dig1 && dig1 <= 9) - dig1 += 48; //0,48inascii - if (10 <= dig1 && dig1 <= 15) - dig1 += 65 - 10; //a,97inascii - if (0 <= dig2 && dig2 <= 9) - dig2 += 48; - if (10 <= dig2 && dig2 <= 15) - dig2 += 65 - 10; - - std::string r; - r.append(&dig1, 1); - r.append(&dig2, 1); - return r; + char dig1 = (dec & 0xF0) >> 4; + char dig2 = (dec & 0x0F); + if (0 <= dig1 && dig1 <= 9) + dig1 += 48; //0,48inascii + if (10 <= dig1 && dig1 <= 15) + dig1 += 65 - 10; //a,97inascii + if (0 <= dig2 && dig2 <= 9) + dig2 += 48; + if (10 <= dig2 && dig2 <= 15) + dig2 += 65 - 10; + + std::string r; + r.append(&dig1, 1); + r.append(&dig2, 1); + return r; }; diff --git a/src/oloop_customer_list.cpp b/src/oloop_customer_list.cpp new file mode 100644 index 0000000..9c77800 --- /dev/null +++ b/src/oloop_customer_list.cpp @@ -0,0 +1,174 @@ +#include "oloop_customer_list.h" +#include "indexbits.h" +#include "asyncpool.h" +#include "tablepartitioned.h" +#include "internoderouter.h" + +using namespace openset::async; +using namespace openset::query; +using namespace openset::result; + +// yes, we are passing queryMacros by value to get a copy +OpenLoopCustomerList::OpenLoopCustomerList( + ShuttleLambda* shuttle, + Database::TablePtr table, + Macro_s macros, + openset::result::ResultSet* result, + int instance) + : OpenLoop(table->getName(), oloopPriority_e::realtime), + // queries are high priority and will preempt other running cells + macros(std::move(macros)), + shuttle(shuttle), + table(table), + parts(nullptr), + maxLinearId(0), + currentLinId(-1), + interpreter(nullptr), + instance(instance), + runCount(0), + startTime(0), + population(0), + index(nullptr), + result(result) +{} + +OpenLoopCustomerList::~OpenLoopCustomerList() +{ + if (interpreter) + { + // free up any segment bits we may have made + //for (auto bits : interpreter->segmentIndexes) + // delete bits; + + delete interpreter; + } +} + +void OpenLoopCustomerList::prepare() +{ + parts = table->getPartitionObjects(loop->partition, false); + + if (!parts) + { + suicide(); + return; + } + + maxLinearId = parts->people.customerCount(); + + // generate the index for this query + indexing.mount(table.get(), macros, loop->partition, maxLinearId); + bool countable; + index = indexing.getIndex("_", countable); + population = index->population(maxLinearId); + + interpreter = new Interpreter(macros); + interpreter->setResultObject(result); + + // if we are in segment compare mode: + if (macros.segments.size()) + { + std::vector segments; + + for (const auto& segmentName : macros.segments) + { + if (segmentName == "*"s) + { + auto tBits = new IndexBits(); + tBits->makeBits(maxLinearId, 1); + segments.push_back(tBits); + } + else + { + if (!parts->segments.count(segmentName)) + { + shuttle->reply( + 0, + result::CellQueryResult_s{ + instance, + {}, + openset::errors::Error{ + openset::errors::errorClass_e::run_time, + openset::errors::errorCode_e::item_not_found, + "missing segment '" + segmentName + "'" + } + } + ); + suicide(); + return; + } + + segments.push_back(parts->segments[segmentName].bits); + + } + } + + interpreter->setCompareSegments(index, segments); + } + + // map table, partition and select schema properties to the Customer object + auto mappedColumns = interpreter->getReferencedColumns(); + if (!person.mapTable(table.get(), loop->partition, mappedColumns)) + { + partitionRemoved(); + suicide(); + return; + } + + person.setSessionTime(macros.sessionTime); + + startTime = Now(); +} + +bool OpenLoopCustomerList::run() +{ + while (true) + { + if (sliceComplete()) + return true; + + // are we done? This will return the index of the + // next set bit until there are no more, or maxLinId is met + if (interpreter->error.inError() || !index->linearIter(currentLinId, maxLinearId)) + { + result->setAccTypesFromMacros(macros); + + shuttle->reply( + 0, + CellQueryResult_s { + instance, + {}, + interpreter->error, + }); + + parts->attributes.clearDirty(); + + suicide(); + return false; + } + + if (const auto personData = parts->people.getCustomerByLIN(currentLinId); personData != nullptr) + { + ++runCount; + person.mount(personData); + person.prepare(); + interpreter->mount(&person); + interpreter->exec(); // run the script on this customer - do some magic + } + } +} + +void OpenLoopCustomerList::partitionRemoved() +{ + shuttle->reply( + 0, + CellQueryResult_s { + instance, + {}, + openset::errors::Error { + openset::errors::errorClass_e::run_time, + openset::errors::errorCode_e::partition_migrated, + "please retry query" + } + }); +} diff --git a/src/oloop_customer_list.h b/src/oloop_customer_list.h new file mode 100644 index 0000000..29a44f0 --- /dev/null +++ b/src/oloop_customer_list.h @@ -0,0 +1,54 @@ +#pragma once +#include "common.h" +#include "database.h" +#include "oloop.h" +#include "shuttle.h" +#include "querycommon.h" +#include "queryindexing.h" +#include "queryinterpreter.h" +#include "result.h" + +namespace openset +{ + namespace db + { + class Table; + class TablePartitioned; + }; + + namespace async + { + class OpenLoopCustomerList : public OpenLoop + { + public: + openset::query::Macro_s macros; + ShuttleLambda* shuttle; + openset::db::Database::TablePtr table; + openset::db::TablePartitioned* parts; + int64_t maxLinearId; + int64_t currentLinId; + Customer person; + openset::query::Interpreter* interpreter; + int instance; + int runCount; + int64_t startTime; + int population; + openset::query::Indexing indexing; + openset::db::IndexBits* index; + openset::result::ResultSet* result; + + explicit OpenLoopCustomerList( + ShuttleLambda* shuttle, + openset::db::Database::TablePtr table, + openset::query::Macro_s macros, + openset::result::ResultSet* result, + int instance); + + ~OpenLoopCustomerList() final; + + void prepare() final; + bool run() final; + void partitionRemoved() final; + }; + } +} diff --git a/src/querycommon.h b/src/querycommon.h index c590f50..d410d6c 100644 --- a/src/querycommon.h +++ b/src/querycommon.h @@ -867,15 +867,18 @@ namespace openset int64_t withinWindow {LLONG_MAX}; int64_t continueFrom {0}; }; + using FilterList = vector; using CountList = vector; // structure for variables using BlockMap = vector; + using AutoGrouping = vector; struct Variables_S { VarList userVars; VarList tableVars; VarList columnVars; + AutoGrouping autoGrouping; BlockMap blockList; ColumnLambdas columnLambdas; FunctionList functions; diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index c0fbfe6..44bab40 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -156,7 +156,8 @@ void openset::query::Interpreter::extractMarshalParams(const int paramCount) { for (auto i = 0; i < paramCount; ++i) // PERF { - --stackPtr; // if any of these params are undefined, exit + --stackPtr; + // if any of these params are undefined, exit if (stackPtr->typeOf() != cvar::valueType::STR && *stackPtr == NONE) marshalParams[i] = NONE; else @@ -164,11 +165,11 @@ void openset::query::Interpreter::extractMarshalParams(const int paramCount) } } -void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_s* columns, const int currentRow) +void openset::query::Interpreter::tally(const int paramCount, const Col_s* columns, const int currentRow) { if (paramCount <= 0) - return; // pop the stack into a pre-allocated array of cvars in reverse order - extractMarshalParams(paramCount); // strings, doubles, and bools are all ints internally, + return; + // this will ensure non-int types are represented as ints // during grouping const auto fixToInt = [&](const cvar& value) -> int64_t @@ -180,16 +181,9 @@ void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_ case cvar::valueType::FLT: case cvar::valueType::DBL: return value.getDouble() * 10000; case cvar::valueType::STR: - { - const auto tString = value.getString(); - const auto hash = MakeHash(tString); - result->addLocalText(hash, tString); // cache this text - return hash; - } + return result->addLocalTextAndHash(value.getString()); // cache this text case cvar::valueType::BOOL: - return value.getBool() - ? 1 - : 0; + return value.getBool() ? 1 : 0; default: return NONE; } @@ -301,12 +295,6 @@ void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_ case Modifiers_e::value: resultColumnValue = aggValue; break; - /*case Modifiers_e::var: - if (resultColumns->columns[resultIndex].value == NONE) - resultColumns->columns[resultIndex].value = 1; //fixToInt(resCol.value); - else - resultColumns->columns[resultIndex].value++; //+= fixToInt(resCol.value); - break;*/ default: break; } @@ -329,12 +317,8 @@ void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_ macros.vars.columnVars[varIndex].value = round((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getDouble() * 10000.0); break; case PropertyTypes_e::textProp: - { - const auto tString = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getString(); - auto hash= MakeHash(tString); - result->addLocalText(hash, tString); // cache this text - macros.vars.columnVars[varIndex].value = hash; - } + macros.vars.columnVars[varIndex].value = + result->addLocalTextAndHash((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getString()); // cache this text break; default: macros.vars.columnVars[varIndex].value = 0; @@ -354,6 +338,12 @@ void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_ } } +void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_s* columns, const int currentRow) +{ + extractMarshalParams(paramCount); + tally(paramCount, columns, currentRow); +} + void __nestItercvar(const cvar* value, string& result) { if (value->typeOf() == cvar::valueType::DICT) diff --git a/src/queryinterpreter.h b/src/queryinterpreter.h index 55fc47a..cd1dfef 100644 --- a/src/queryinterpreter.h +++ b/src/queryinterpreter.h @@ -235,6 +235,8 @@ namespace openset void extractMarshalParams(const int paramCount); + void tally(const int paramCount, const Col_s* columns, const int currentRow); + void marshal_tally(const int paramCount, const Col_s* columns, const int currentRow); void marshal_log(const int paramCount); diff --git a/src/queryparserosl.h b/src/queryparserosl.h index dd392eb..5a93208 100644 --- a/src/queryparserosl.h +++ b/src/queryparserosl.h @@ -3484,6 +3484,7 @@ namespace openset::query inMacros.vars.tableVars.back().column = index; inMacros.vars.tableVars.back().actual = v; inMacros.vars.tableVars.back().isSet = schemaInfo->isSet; + inMacros.vars.tableVars.back().isProp = schemaInfo->isCustomerProperty; inMacros.vars.tableVars.back().sortOrder = schemaInfo->idx; inMacros.vars.tableVars.back().schemaColumn = schemaInfo->idx; inMacros.vars.tableVars.back().schemaType = schemaInfo->type; @@ -3526,10 +3527,12 @@ namespace openset::query inMacros.vars.columnVars = selectColumnInfo; index = 0; - for (auto& col : selectColumnInfo) + for (auto& col : inMacros.vars.columnVars) { if (col.lambdaIndex != -1) inMacros.vars.columnLambdas.push_back(index); + if (isProperty(col.actual)) + col.isProp = true; ++index; } diff --git a/src/result.h b/src/result.h index 0fd409d..edabb28 100644 --- a/src/result.h +++ b/src/result.h @@ -238,6 +238,13 @@ namespace openset localText.emplace(hashId, textPtr); } } + + int64_t addLocalTextAndHash(const std::string& value) + { + const auto hash = MakeHash(value); + addLocalText(hash, value); + return hash; + } }; struct CellQueryResult_s diff --git a/src/rpc.h b/src/rpc.h index c9784d4..6d10fc3 100644 --- a/src/rpc.h +++ b/src/rpc.h @@ -50,10 +50,10 @@ namespace openset::comms }, { "GET", std::regex(R"(^/v1/tables(\/|\?|\#|)$)"), RpcTable::table_list, {} }, // RpcQuery - { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/report(\/|\?|\#|)$)"), RpcQuery::event, { { 1, "table" } } }, + { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/report(\/|\?|\#|)$)"), RpcQuery::report, { { 1, "table" } } }, { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/segment(\/|\?|\#|)$)"), RpcQuery::segment, { { 1, "table" } } }, { "GET", std::regex(R"(^/v1/query/([a-z0-9_]+)/customer(\/|\?|\#|)$)"), RpcQuery::customer, { { 1, "table" } } }, - { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/customers(\/|\?|\#|)$)"), RpcQuery::segment_customers, { { 1, "table" } } }, + { "POST", std::regex(R"(^/v1/query/([a-z0-9_]+)/customers(\/|\?|\#|)$)"), RpcQuery::customer_list, { { 1, "table" } } }, { "GET", std::regex(R"(^/v1/query/([a-z0-9_]+)/property/([a-z0-9_\.]+)(\/|\?|\#|)$)"), diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index 10de787..ce0cc14 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -11,6 +11,7 @@ #include "oloop_query.h" #include "oloop_segment.h" #include "oloop_customer.h" +#include "oloop_customer_list.h" #include "oloop_property.h" #include "oloop_histogram.h" #include "asyncpool.h" @@ -143,7 +144,7 @@ shared_ptr forkQuery( } else if (r.code != openset::http::StatusCode::success_ok) { - // try to capture a json error that has perculated up from the forked call. + // try to capture a json error that has peculated up from the forked call. if (r.data && r.length && r.data[0] == '{') { cjson error(std::string(r.data, r.length), cjson::Mode_e::string); @@ -164,6 +165,7 @@ shared_ptr forkQuery( result.routeError = true; // this will trigger the next error } } + if (result.routeError) { RpcError( @@ -328,7 +330,7 @@ openset::query::ParamVars getInlineVaraibles(const openset::web::MessagePtr& mes return paramVars; } -void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& matches) +void RpcQuery::report(const openset::web::MessagePtr& message, const RpcMapping& matches) { auto database = globals::database; const auto partitions = globals::async; @@ -636,7 +638,7 @@ void RpcQuery::event(const openset::web::MessagePtr& message, const RpcMapping& }); } -void RpcQuery::segment_customers(const openset::web::MessagePtr& message, const RpcMapping& matches) +void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcMapping& matches) { auto database = globals::database; const auto partitions = globals::async; @@ -645,19 +647,15 @@ void RpcQuery::segment_customers(const openset::web::MessagePtr& message, const const auto queryCode = std::string { message->getPayload(), message->getPayloadLength() }; const auto debug = message->getParamBool("debug"); const auto isFork = message->getParamBool("fork"); - const auto trimSize = message->getParamInt("trim", -1); - const auto sortOrder = message->getParamString("order", "desc") == "asc" + const auto trimSize = message->getParamInt("trim", -1); + const auto sortMode = ResultSortMode_e::key; + const auto sortOrder = message->getParamString("order", "desc") == "asc" ? ResultSortOrder_e::Asc : ResultSortOrder_e::Desc; + auto sortKeyString = message->getParamString("sort", ""); - auto sortColumnName = ""s; - auto sortMode = ResultSortMode_e::column; - if (message->isParam("sort")) - { - sortColumnName = message->getParamString("sort"); - if (sortColumnName == "group") - sortMode = ResultSortMode_e::key; - } + if (!sortKeyString.length()) + sortKeyString = "id"; const auto log = "Inbound counts query (fork: "s + (isFork ? "true"s : "false"s) + ")"s; Logger::get().info(log); @@ -730,6 +728,47 @@ void RpcQuery::segment_customers(const openset::web::MessagePtr& message, const return; } + // validate that sortKeys are in the select statement + const auto sortKeyParts = split(sortKeyString, ','); + + auto index = 0; + for (auto key : sortKeyParts) + { + key = trim(key); + auto found = false; + + if (key.length()) + { + for (auto& column : queryMacros.vars.columnVars) + { + if (column.alias == key) + { + found = true; + + queryMacros.vars.autoGrouping.push_back(index); + + break; + } + } + } + + if (key.length() == 0 || !found) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "sort key in query string not found in query script select statement" + }, + message); + return; + } + + ++index; + } + + + if (message->isParam("segments")) { const auto segmentText = message->getParamString("segments"); @@ -764,32 +803,6 @@ void RpcQuery::segment_customers(const openset::web::MessagePtr& message, const return; } auto sortColumn = 0; - if (sortMode != ResultSortMode_e::key && sortColumnName.size()) - { - auto set = false; - auto idx = -1; - for (auto& c : queryMacros.vars.columnVars) - { - ++idx; - if (c.alias == sortColumnName) - { - set = true; - sortColumn = c.index; - break; - } - } - if (!set) - { - RpcError( - errors::Error { - errors::errorClass_e::parse, - errors::errorCode_e::syntax_error, - "sort property not found in query aggregates" - }, - message); - return; - } - } /* * We are originating the query. @@ -942,9 +955,8 @@ void RpcQuery::segment_customers(const openset::web::MessagePtr& message, const [shuttle, table, queryMacros, resultSets, &instance](AsyncLoop* loop) -> OpenLoop* { instance++; - return new OpenLoopQuery(shuttle, table, queryMacros, resultSets[loop->getWorkerId()], instance); + return new OpenLoopCustomerList(shuttle, table, queryMacros, resultSets[loop->getWorkerId()], instance); }); - } void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping& matches) diff --git a/src/rpc_query.h b/src/rpc_query.h index f99e154..1621941 100644 --- a/src/rpc_query.h +++ b/src/rpc_query.h @@ -14,7 +14,7 @@ namespace openset::comms { public: // POST /v1/query/{table}/event - static void event(const openset::web::MessagePtr& message, const RpcMapping& matches); + static void report(const openset::web::MessagePtr& message, const RpcMapping& matches); // POST /v1/query/{table}/segment static void segment(const openset::web::MessagePtr& message, const RpcMapping& matches); // POST /v1/query/{table}/property/{name}?{various optional query params} @@ -22,7 +22,7 @@ namespace openset::comms // GET /v1/query/{table}/customer?{id|idstr}={user_id_key} static void customer(const openset::web::MessagePtr& message, const RpcMapping& matches); // GET /v1/query/{table}/customers?{various optional switches} - static void segment_customers(openset::web::MessagePtr& message, const RpcMapping& matches); + static void customer_list(openset::web::MessagePtr& message, const RpcMapping& matches); // POST /v1/query/{table}/histogram/{name} static void histogram(const openset::web::MessagePtr& message, const RpcMapping& matches); // POST /v1/query/{table}/batch From 6127096c05ebec8d3f009fed433778a76ff295cc Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Fri, 15 Nov 2019 15:58:32 -0500 Subject: [PATCH 03/31] first version of customer list query returning JSON --- src/querycommon.h | 8 +++ src/queryinterpreter.cpp | 96 ++++++++++++++++++++++++++++-- src/queryinterpreter.h | 9 +-- src/queryparserosl.h | 27 ++++----- src/result.cpp | 125 +++++++++++++++++++++++++++++++++++---- src/result.h | 6 ++ src/rpc_query.cpp | 43 +++++++++++--- src/rpc_table.cpp | 2 +- 8 files changed, 265 insertions(+), 51 deletions(-) diff --git a/src/querycommon.h b/src/querycommon.h index d410d6c..0915ed2 100644 --- a/src/querycommon.h +++ b/src/querycommon.h @@ -14,6 +14,13 @@ namespace openset { namespace query { + enum class ScriptMode_e + { + report, + segment, + customers + }; + enum class BlockType_e { code, @@ -911,6 +918,7 @@ namespace openset int64_t segmentTTL { -1 }; int64_t segmentRefresh { -1 }; int sessionColumn { -1 }; + ScriptMode_e scriptMode; int64_t sessionTime { 60'000LL * 30LL }; // 30 minutes std::string rawScript; diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index 44bab40..54a9e55 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -129,7 +129,9 @@ void openset::query::Interpreter::mount(Customer* person) uuid = person->getUUID(); linid = person->getMeta()->linId; } + stackPtr = stack; + if (!isConfigured && rows->size()) configure(); } @@ -235,8 +237,8 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum */ distinctKey.set( resCol.index, - (resCol.modifier == Modifiers_e::var) ? - fixToInt(resCol.value) : + (resCol.lambdaIndex != -1) ? + resCol.value.getInt64() : columns->cols[resCol.distinctColumn], (resCol.aggOnce) ? 0 : @@ -256,6 +258,9 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum columns->cols[resCol.column] : resCol.value.getInt64(); + if (resCol.column == PROP_UUID) + exportCustomerId = true; + switch (resCol.modifier) { case Modifiers_e::avg: @@ -311,7 +316,7 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum switch (macros.vars.columnVars[varIndex].schemaType) { case PropertyTypes_e::intProp: - macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getInt32(); + macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getInt64(); break; case PropertyTypes_e::doubleProp: macros.vars.columnVars[varIndex].value = round((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getDouble() * 10000.0); @@ -320,6 +325,9 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum macros.vars.columnVars[varIndex].value = result->addLocalTextAndHash((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getString()); // cache this text break; + case PropertyTypes_e::boolProp: + macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getBool(); + break; default: macros.vars.columnVars[varIndex].value = 0; } @@ -332,10 +340,71 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum if (depth == paramCount || (item.typeOf() != cvar::valueType::STR && item == NONE)) break; rowKey.key[depth] = fixToInt(item); - rowKey.types[depth] = getType(item); //result->setAtDepth(rowKey, set_cb); - aggColumns(result->getMakeAccumulator(rowKey)); + rowKey.types[depth] = getType(item); + if (macros.scriptMode != ScriptMode_e::customers) + aggColumns(result->getMakeAccumulator(rowKey)); ++depth; } + + if (macros.scriptMode == ScriptMode_e::customers) + aggColumns(result->getMakeAccumulator(rowKey)); +} + +void openset::query::Interpreter::autoTally() +{ + // the script is in an exit state because it terminated, we are going to resurect it. + loopState = LoopState_e::run; + + const auto paramCount = static_cast(macros.vars.autoGrouping.size()); + auto index = 0; + for (const auto varIndex : macros.vars.autoGrouping) + { + if (macros.vars.columnVars[varIndex].lambdaIndex != -1) + { + switch (macros.vars.columnVars[varIndex].schemaType) + { + case PropertyTypes_e::intProp: + marshalParams[index] = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getInt64(); + break; + case PropertyTypes_e::doubleProp: + marshalParams[index] = round((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getDouble() * 10000.0); + break; + case PropertyTypes_e::textProp: + marshalParams[index] = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getString(); + break; + case PropertyTypes_e::boolProp: + marshalParams[index] = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getBool(); + break; + default: + marshalParams[index] = NONE; + } + } + else + { + if (macros.vars.columnVars[varIndex].schemaColumn == PROP_UUID) + { + if (grid->getTable()->numericCustomerIds) + { + marshalParams[index] = this->grid->getUUID(); + } + else + { + const auto id = this->grid->getUUIDString(); + result->addLocalTextAndHash(id); + marshalParams[index] = id; + } + } + else + { + cout << "hmmm" << endl; + } + + } + + ++index; + } + + tally(paramCount, grid->getEmptyRow(), 0); } void openset::query::Interpreter::marshal_tally(const int paramCount, const Col_s* columns, const int currentRow) @@ -3019,6 +3088,9 @@ void openset::query::Interpreter::exec() returns.push_back(*(stackPtr - 1)); // capture last value on stack } + if (macros.scriptMode == ScriptMode_e::customers) + autoTally(); + setGridProps(); } catch (const std::runtime_error& ex) @@ -3112,6 +3184,10 @@ void openset::query::Interpreter::exec(const int64_t functionHash) "unknown run-time error (3)", additional); } + + if (macros.scriptMode == ScriptMode_e::customers) + autoTally(); + // write back props (checks for change by hashing) setGridProps(); @@ -3126,11 +3202,19 @@ void openset::query::Interpreter::exec(const int64_t functionHash) void openset::query::Interpreter::setGridProps() { + auto table = grid->getTable(); + + if (exportCustomerId && table->numericCustomerIds) + { + result->addLocalTextAndHash(this->grid->getUUIDString()); // cache this text + exportCustomerId = false; + } + // write back props (checks for change by hashing) if (!macros.writesProps || !propsChanged) return; - auto schema = grid->getTable()->getProperties(); + auto schema = table->getProperties(); for (auto& var : macros.vars.userVars) { diff --git a/src/queryinterpreter.h b/src/queryinterpreter.h index cd1dfef..dd28d11 100644 --- a/src/queryinterpreter.h +++ b/src/queryinterpreter.h @@ -194,8 +194,8 @@ namespace openset // used to load global variables into user variable space bool firstRun{ true }; - bool inReturn{ false }; + bool exportCustomerId { false }; // this will always point to the last debug message Debug_s* lastDebug{ nullptr }; @@ -236,9 +236,9 @@ namespace openset void extractMarshalParams(const int paramCount); void tally(const int paramCount, const Col_s* columns, const int currentRow); + void autoTally(); void marshal_tally(const int paramCount, const Col_s* columns, const int currentRow); - void marshal_log(const int paramCount); void marshal_break(const int paramCount); void marshal_dt_within(const int paramCount, const int64_t rowStamp); @@ -247,24 +247,19 @@ namespace openset void marshal_bucket(const int paramCount); void marshal_round(const int paramCount); void marshal_fix(const int paramCount); - void marshal_makeDict(const int paramCount); void marshal_makeList(const int paramCount); void marshal_makeSet(const int paramCount); - void marshal_population(const int paramCount); void marshal_intersection(const int paramCount); void marshal_union(const int paramCount); void marshal_compliment(const int paramCount); void marshal_difference(const int paramCount); - void marshal_slice(const int paramCount); void marshal_find(const int paramCount, const bool reverse = false); void marshal_split(const int paramCount) const; void marshal_strip(const int paramCount) const; - void marshal_url_decode(const int paramCount) const; - void marshal_get_row(const int paramCount) const; // get a string from the literals script block by ID diff --git a/src/queryparserosl.h b/src/queryparserosl.h index 5a93208..1204276 100644 --- a/src/queryparserosl.h +++ b/src/queryparserosl.h @@ -138,13 +138,6 @@ namespace openset::query } }; - enum class ParseMode_e - { - report, - segment, - customers - }; - enum class MiddleOp_e { push_user, @@ -320,7 +313,7 @@ namespace openset::query Debugger_s lastDebug; errors::Error error; - ParseMode_e parseMode { ParseMode_e::report }; + ScriptMode_e parseMode { ScriptMode_e::report }; QueryParser() = default; ~QueryParser() = default; @@ -1146,9 +1139,9 @@ namespace openset::query } // automatic lambda - assume this is a just a variable - if (!isTableColumn(columnName) && !isProperty(columnName) && selectLambdaId == -1) + if ((!isTableColumn(columnName) || isProperty(columnName)) && selectLambdaId == -1) { - if (type == db::PropertyTypes_e::runTimeTypeProp) + if (!isProperty(columnName) && type == db::PropertyTypes_e::runTimeTypeProp) throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, @@ -1387,9 +1380,9 @@ namespace openset::query } // automatic lambda - assume this is a just a variable - if (!isTableColumn(columnName) && !isProperty(columnName) && selectLambdaId == -1) + if ((!isTableColumn(columnName) || isProperty(columnName)) && selectLambdaId == -1) { - if (type == db::PropertyTypes_e::runTimeTypeProp) + if (!isProperty(columnName) && type == db::PropertyTypes_e::runTimeTypeProp) throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, @@ -1461,16 +1454,16 @@ namespace openset::query { switch (parseMode) { - case ParseMode_e::report: + case ScriptMode_e::report: return parseSelectReport(tokens, start); - case ParseMode_e::segment: + case ScriptMode_e::segment: throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, "`select` is not used in segment scripts", lastDebug }; - case ParseMode_e::customers: + case ScriptMode_e::customers: return parseSelectCustomers(tokens, start); default: throw QueryParse2Error_s { @@ -4097,12 +4090,13 @@ namespace openset::query inMacros.rawIndex += word + " "; } - bool compileQuery(const std::string& query, openset::db::Properties* columnsPtr, Macro_s& inMacros, ParamVars* templateVars, ParseMode_e parseAs = ParseMode_e::report) + bool compileQuery(const std::string& query, openset::db::Properties* columnsPtr, Macro_s& inMacros, ParamVars* templateVars, ScriptMode_e parseAs = ScriptMode_e::report) { parseMode = parseAs; try { + inMacros.scriptMode = parseAs; tableColumns = columnsPtr; @@ -4113,7 +4107,6 @@ namespace openset::query initialParse(query); - if (!selectColumnInfo.size()) { const auto columnName = "id"; diff --git a/src/result.cpp b/src/result.cpp index 24ab974..9f7266d 100644 --- a/src/result.cpp +++ b/src/result.cpp @@ -561,6 +561,115 @@ openset::result::ResultSet* ResultMuxDemux::internodeToResultSet( return result; } +void ResultMuxDemux::resultFlatColumnsToJson( + const int resultColumnCount, + const int resultSetCount, + std::vector& resultSets, + cjson* doc) +{ + + auto mergedText = mergeResultText(resultSets); + auto rows = mergeResultSets(resultColumnCount, resultSetCount, resultSets); + + const auto shiftIterations = resultSetCount ? resultSetCount : 1; + const auto shiftSize = resultColumnCount; + + // this will retrieve either the string literals from the macros, + // the merged localText or exorcise a lock and look in the blob + const auto getText = [&](int64_t valueHash) -> const char* + { + if (const auto textPair = mergedText.find(valueHash); textPair != mergedText.end()) + return textPair->second; + + // nothing found, NA_TEXT + return NA_TEXT; + }; + + auto current = doc->pushArray(); + current->setName("_"); + + auto& modifiers = resultSets[0]->accModifiers; + auto& types = resultSets[0]->accTypes; + + auto rowCounter = -1; + for (auto& r : rows) + { + ++rowCounter; + + const auto shiftOffset = 0; + + auto array = current->pushArray(); + + for (auto dataIndex = shiftOffset, colIndex = 0; dataIndex < shiftOffset + shiftSize; ++dataIndex, ++ + colIndex) + { + const auto& value = r.second->columns[dataIndex].value; + const auto& count = r.second->columns[dataIndex].count; + + // Is this a null, a double, a string or anything else (ints) + if (r.second->columns[dataIndex].value == NONE) + { + if (types[colIndex] == ResultTypes_e::Double || + types[colIndex] == ResultTypes_e::Int) + array->push(static_cast(0)); + else + array->pushNull(); + } + else + { + switch (modifiers[colIndex]) + { + case query::Modifiers_e::sum: + case query::Modifiers_e::min: + case query::Modifiers_e::max: + if (types[colIndex] == ResultTypes_e::Double) + array->push(value / 10000.0); + else + array->push(value); + break; + case query::Modifiers_e::avg: + if (!count) + array->pushNull(); + else if (types[colIndex] == ResultTypes_e::Double) + array->push((value / 10000.0) / static_cast(count)); + else + array->push(value / static_cast(count)); + break; + case query::Modifiers_e::count: + case query::Modifiers_e::dist_count_person: + array->push(value); + break; + case query::Modifiers_e::value: + if (types[colIndex] == ResultTypes_e::Text) + array->push(getText(value)); + else if (types[colIndex] == ResultTypes_e::Double) + array->push(value / 10000.0); + else if (types[colIndex] == ResultTypes_e::Bool) + array->push(value ? true : false); + else + array->push(value); + break; + case query::Modifiers_e::var: + { + if (types[colIndex] == ResultTypes_e::Text) + array->push(getText(value)); + else if (types[colIndex] == ResultTypes_e::Double) + array->push(value / 10000.0); + else if (types[colIndex] == ResultTypes_e::Bool) + array->push(value ? true : false); + else + array->push(value); + } + break; + + default: + array->push(value); + } + } + } + } +} + void ResultMuxDemux::resultSetToJson( const int resultColumnCount, const int resultSetCount, @@ -946,8 +1055,8 @@ void ResultMuxDemux::jsonResultSortByColumn(cjson* doc, const ResultSortOrder_e "_", [&](const cjson* left, const cjson* right) -> bool { - auto colLeft = left->xPath("/c"); - auto colRight = right->xPath("/c"); + const auto colLeft = left->find("c");//left->xPath("/c"); + const auto colRight = right->find("c");//right->xPath("/c"); switch (colLeft->at(column)->type()) { @@ -964,11 +1073,6 @@ void ResultMuxDemux::jsonResultSortByColumn(cjson* doc, const ResultSortOrder_e if (sort == ResultSortOrder_e::Asc) return (colLeft->at(column)->getString() < colRight->at(column)->getString()); return (colLeft->at(column)->getString() > colRight->at(column)->getString()); - - case cjson::Types_e::OBJECT: - case cjson::Types_e::ARRAY: - case cjson::Types_e::VOIDED: - case cjson::Types_e::NUL: default: return false; } @@ -981,8 +1085,8 @@ void ResultMuxDemux::jsonResultSortByGroup(cjson* doc, const ResultSortOrder_e s "_", [&](const cjson* left, const cjson* right) -> bool { - auto colLeft = left->xPath("/g"); - auto colRight = right->xPath("/g"); + auto colLeft = left->find("g"); + auto colRight = right->find("/g"); cvar leftValue; cvar rightValue; @@ -1029,8 +1133,7 @@ void ResultMuxDemux::jsonResultSortByGroup(cjson* doc, const ResultSortOrder_e s if (sort == ResultSortOrder_e::Asc) return (leftValue < rightValue); - else - return (leftValue > rightValue); + return (leftValue > rightValue); }); } diff --git a/src/result.h b/src/result.h index edabb28..c48388b 100644 --- a/src/result.h +++ b/src/result.h @@ -307,6 +307,12 @@ namespace openset char* data, int64_t blockLength); + static void resultFlatColumnsToJson( + int resultColumnCount, + int resultSetCount, + std::vector& resultSets, + cjson* doc); + static void resultSetToJson( int resultColumnCount, int resultSetCount, diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index ce0cc14..3c82030 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -62,6 +62,7 @@ shared_ptr forkQuery( const openset::web::MessagePtr& message, const int resultColumnCount, const int resultSetCount, + const openset::query::ScriptMode_e scriptMode, const ResultSortMode_e sortMode = ResultSortMode_e::column, const ResultSortOrder_e sortOrder = ResultSortOrder_e::Desc, const int sortColumn = 0, @@ -86,6 +87,7 @@ shared_ptr forkQuery( message, resultColumnCount, resultSetCount, + scriptMode, sortMode, sortOrder, sortColumn, @@ -120,6 +122,7 @@ shared_ptr forkQuery( message, resultColumnCount, resultSetCount, + scriptMode, sortMode, sortOrder, sortColumn, @@ -137,7 +140,7 @@ shared_ptr forkQuery( resultSets.push_back(ResultMuxDemux::internodeToResultSet(r.data, r.length)); else { - // there is an error message from one of the participing nodes + // there is an error message from one of the participating nodes if (!r.data || !r.length) { result.routeError = true; @@ -184,6 +187,21 @@ shared_ptr forkQuery( return nullptr; } } + + if (scriptMode == openset::query::ScriptMode_e::customers) + { + auto resultJson = make_shared(); + ResultMuxDemux::resultFlatColumnsToJson(resultColumnCount, setCount, resultSets, resultJson.get()); + + // free up the responses + openset::globals::mapper->releaseResponses(result); + // clean up all those resultSet* + for (auto res : resultSets) + delete res; + Logger::get().info("RpcQuery on " + table->getName()); + return resultJson; + } + auto resultJson = make_shared(); ResultMuxDemux::resultSetToJson(resultColumnCount, setCount, resultSets, resultJson.get()); @@ -503,6 +521,7 @@ void RpcQuery::report(const openset::web::MessagePtr& message, const RpcMapping& message, queryMacros.vars.columnVars.size(), queryMacros.segments.size(), + queryMacros.scriptMode, sortMode, sortOrder, sortColumn, @@ -707,7 +726,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM try { // compile in customers mode - p.compileQuery(queryCode.c_str(), table->getProperties(), queryMacros, ¶mVars, query::ParseMode_e::customers); + p.compileQuery(queryCode.c_str(), table->getProperties(), queryMacros, ¶mVars, query::ScriptMode_e::customers); } catch (const std::runtime_error& ex) { @@ -731,7 +750,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM // validate that sortKeys are in the select statement const auto sortKeyParts = split(sortKeyString, ','); - auto index = 0; + for (auto key : sortKeyParts) { key = trim(key); @@ -739,6 +758,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM if (key.length()) { + auto index = 0; for (auto& column : queryMacros.vars.columnVars) { if (column.alias == key) @@ -749,6 +769,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM break; } + ++index; } } @@ -763,12 +784,8 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM message); return; } - - ++index; } - - if (message->isParam("segments")) { const auto segmentText = message->getParamString("segments"); @@ -824,6 +841,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM message, queryMacros.vars.columnVars.size(), queryMacros.segments.size(), + queryMacros.scriptMode, sortMode, sortOrder, sortColumn, @@ -1022,7 +1040,7 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping continue; query::Macro_s queryMacros; // this is our compiled code block query::QueryParser p; - p.compileQuery(r.code.c_str(), table->getProperties(), queryMacros, ¶mVars, query::ParseMode_e::segment); + p.compileQuery(r.code.c_str(), table->getProperties(), queryMacros, ¶mVars, query::ScriptMode_e::segment); if (p.error.inError()) { @@ -1111,7 +1129,10 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping table, message, queries.front().second.vars.columnVars.size(), - queries.front().second.segments.size()); + queries.front().second.segments.size(), + queries.front().second.scriptMode + ); + if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); return; @@ -1492,6 +1513,7 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat message, 1, queryInfo.segments.size(), + query::ScriptMode_e::report, ResultSortMode_e::column, sortOrder, 0, @@ -1911,6 +1933,7 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma message, 1, queryMacros.segments.size(), + openset::query::ScriptMode_e::report, sortMode, sortOrder, 0, @@ -1918,8 +1941,10 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma bucket, forceMin, forceMax); + if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); + return; } diff --git a/src/rpc_table.cpp b/src/rpc_table.cpp index 60a0270..de745f1 100644 --- a/src/rpc_table.cpp +++ b/src/rpc_table.cpp @@ -149,7 +149,7 @@ void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMa // set the default required properties columns->setProperty(PROP_STAMP, "stamp", PropertyTypes_e::intProp, false); columns->setProperty(PROP_EVENT, "event", PropertyTypes_e::textProp, false); - columns->setProperty(PROP_UUID, "id", PropertyTypes_e::intProp, false); + columns->setProperty(PROP_UUID, "id", useNumericIds ? PropertyTypes_e::intProp : PropertyTypes_e::textProp, false); columns->setProperty(PROP_SEGMENT, "__segment", PropertyTypes_e::textProp, false); columns->setProperty(PROP_SESSION, "session", PropertyTypes_e::intProp, false); From 1b5c9f3275b13ce567a42dcb490bc1a44bceb4a9 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Tue, 19 Nov 2019 20:15:55 -0500 Subject: [PATCH 04/31] replaced customer props store and encode, fixed profiling perf problems --- CMakeLists.txt | 2 + lib/heapstack/heapstack.h | 203 ++++++++++++---------- lib/var/var.h | 11 ++ src/common.h | 190 ++++++++++----------- src/customer.h | 255 ++++++++++++++-------------- src/customer_props.cpp | 345 ++++++++++++++++++++++++++++++++++++++ src/customer_props.h | 59 +++++++ src/grid.cpp | 340 ++++++++++++------------------------- src/grid.h | 24 +-- src/oloop.cpp | 56 +++---- src/oloop_query.cpp | 5 +- src/querycommon.h | 5 +- src/queryinterpreter.cpp | 223 ++++++++++-------------- src/queryinterpreter.h | 2 +- src/queryparserosl.h | 19 ++- src/result.cpp | 90 ++++++++-- src/result.h | 66 ++++++-- src/rpc_insert.cpp | 2 +- src/rpc_query.cpp | 35 +++- src/table.h | 1 + 20 files changed, 1170 insertions(+), 763 deletions(-) create mode 100644 src/customer_props.cpp create mode 100644 src/customer_props.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b81c5fe..d045589 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,6 +108,8 @@ set(SOURCE_FILES src/attributes.h src/config.cpp src/config.h + src/customer_props.cpp + src/customer_props.h src/database.cpp src/database.h src/dbtypes.h diff --git a/lib/heapstack/heapstack.h b/lib/heapstack/heapstack.h index 7c1cae0..af30b29 100644 --- a/lib/heapstack/heapstack.h +++ b/lib/heapstack/heapstack.h @@ -38,7 +38,7 @@ using namespace std; // constants used by HeapStack and PoolMem namespace MemConstants { - const int64_t HeapStackBlockSize = 256LL * 1024LL; + const int64_t HeapStackBlockSize = 256LL * 1024LL; } class HeapStackBlockPool @@ -47,51 +47,51 @@ class HeapStackBlockPool const size_t MAXPOOLBLOCKS = 32; - std::vector pool; - CriticalSection poolLock; + std::vector pool; + CriticalSection poolLock; HeapStackBlockPool() = default; public: - // singlton - static HeapStackBlockPool& getPool() - { + // singlton + static HeapStackBlockPool& getPool() + { static HeapStackBlockPool globalPool{}; - return globalPool; - } - - inline void* Get() - { - { // scope the lock - csLock lock(poolLock); - - if (!pool.empty()) - { - const auto block = pool.back(); - pool.pop_back(); - return block; - } - } - return new char[MemConstants::HeapStackBlockSize]; - } - - inline void Put(void* item) - { - csLock lock(poolLock); + return globalPool; + } + + inline void* Get() + { + { // scope the lock + csLock lock(poolLock); + + if (!pool.empty()) + { + const auto block = pool.back(); + pool.pop_back(); + return block; + } + } + return new char[MemConstants::HeapStackBlockSize]; + } + + inline void Put(void* item) + { + csLock lock(poolLock); // cap the number of blocks... not resource friendly - if (pool.size() >= MAXPOOLBLOCKS) + if (pool.size() >= MAXPOOLBLOCKS) delete[] static_cast(item); else - pool.push_back(item); - } + pool.push_back(item); + } - int32_t blockCount() const - { - return static_cast(pool.size()); - } + int32_t blockCount() const + { + return static_cast(pool.size()); + } }; @@ -100,33 +100,33 @@ class HeapStack { private: - // this is the block structure, blocks of heap memory cast to this type will ultimately - // become our stack(s). - // Note: alignment forced + // this is the block structure, blocks of heap memory cast to this type will ultimately + // become our stack(s). + // Note: alignment forced #pragma pack(push,1) - struct block_s - { - block_s* nextBlock{ nullptr }; - int64_t endOffset{ 0 }; - bool nonpooled{ false }; - char data[1] {0}; // fake size, we will be casting this over a buffer - }; + struct block_s + { + block_s* nextBlock{ nullptr }; + int64_t endOffset{ 0 }; + bool nonpooled{ false }; + char data[1] {0}; // fake size, we will be casting this over a buffer + }; #pragma pack(pop) - const int64_t headerSize{ sizeof(block_s) - 1LL }; // size of block header, minus the 1 byte 'data' array - const int64_t blockSize{ MemConstants::HeapStackBlockSize }; - const int64_t dataSize{ MemConstants::HeapStackBlockSize - headerSize }; + const int64_t headerSize{ sizeof(block_s) - 1LL }; // size of block header, minus the 1 byte 'data' array + const int64_t blockSize{ MemConstants::HeapStackBlockSize }; + const int64_t dataSize{ MemConstants::HeapStackBlockSize - headerSize }; - int64_t blocks{ 0 }; - int64_t bytes{ 0 }; + int64_t blocks{ 0 }; + int64_t bytes{ 0 }; - block_s* head{ nullptr }; - block_s* tail{ nullptr }; + block_s* head{ nullptr }; + block_s* tail{ nullptr }; public: - // constructor, default allocates 4 meg blocks. - HeapStack() = default; + // constructor, default allocates 4 meg blocks. + HeapStack() = default; HeapStack(HeapStack&& other) noexcept { @@ -160,57 +160,82 @@ class HeapStack return *this; } - ~HeapStack(); + ~HeapStack(); private: - void Release(); + void Release(); public: - // newPtr - returns a pointer to a block of memory of "size" - inline char* newPtr(const int64_t size) - { - if (size >= dataSize) - newNonpooledBlock(size); - else if (!tail || tail->endOffset + size >= dataSize) - newBlock(); + // newPtr - returns a pointer to a block of memory of "size" + inline char* newPtr(const int64_t size) + { + if (size >= dataSize) + newNonpooledBlock(size); + else if (!tail || tail->endOffset + size >= dataSize) + newBlock(); + + char* insertPtr = tail->data + tail->endOffset; + tail->endOffset += size; + bytes += size; + return insertPtr; + } - char* insertPtr = tail->data + tail->endOffset; - tail->endOffset += size; - bytes += size; - return insertPtr; - } + int64_t* newInt64() + { + return reinterpret_cast(newPtr(sizeof(int64_t))); + } + + int32_t* newInt32() + { + return reinterpret_cast(newPtr(sizeof(int32_t))); + } + + int16_t* newInt16() + { + return reinterpret_cast(newPtr(sizeof(int16_t))); + } + + int8_t* newInt8() + { + return reinterpret_cast(newPtr(sizeof(int8_t))); + } + + char* newChar() + { + return newPtr(sizeof(char)); + } - void reset(); + void reset(); - // currentData - returns a pointer to current memory block - char* currentData() const; + // currentData - returns a pointer to current memory block + char* currentData() const; - char* getHeadPtr() const; + char* getHeadPtr() const; - block_s* firstBlock() const; + block_s* firstBlock() const; - // getSizeBytes - returns how many bytes are being used by DATA in the block stack. - int64_t getBytes() const; + // getSizeBytes - returns how many bytes are being used by DATA in the block stack. + int64_t getBytes() const; - // getAllocated - returns how many bytes are used by the raw blocks in the block stack - int64_t getAllocated() const; + // getAllocated - returns how many bytes are used by the raw blocks in the block stack + int64_t getAllocated() const; - // getBlocks - returns how many blocks are within the block stack - int64_t getBlocks() const; + // getBlocks - returns how many blocks are within the block stack + int64_t getBlocks() const; - // flatten - returns a contiguous block of memory containing the data within all the blocks. - // - // returns pointer made with pooled mem, must be deleted with pooled mem - char* flatten() const; + // flatten - returns a contiguous block of memory containing the data within all the blocks. + // + // returns pointer made with pooled mem, must be deleted with pooled mem + char* flatten() const; - // flatten - same as basic flatten but returns length via reference param - char* flatten(int64_t& length) const; + // flatten - same as basic flatten but returns length via reference param + char* flatten(int64_t& length) const; - // release a flattened pointer here - static void releaseFlatPtr(char* flatPtr); + // release a flattened pointer here + static void releaseFlatPtr(char* flatPtr); private: - // newBlock - adds a new block to the list of blocks, updates the block links. - void newBlock(); - void newNonpooledBlock(int64_t size); + // newBlock - adds a new block to the list of blocks, updates the block links. + void newBlock(); + void newNonpooledBlock(int64_t size); }; diff --git a/lib/var/var.h b/lib/var/var.h index e2b6ccc..e539f5a 100644 --- a/lib/var/var.h +++ b/lib/var/var.h @@ -637,6 +637,17 @@ class cvar return reference != nullptr; } + bool isPod() const + { + return ( + type == valueType::DBL || + type == valueType::FLT || + type == valueType::INT32 || + type == valueType::INT64 || + type == valueType::BOOL || + type == valueType::STR); + } + bool isContainer() const { return (type == valueType::DICT || type == valueType::LIST || type == valueType::SET); diff --git a/src/common.h b/src/common.h index 3ca1472..fd0bd88 100644 --- a/src/common.h +++ b/src/common.h @@ -10,21 +10,21 @@ const int32_t PARTITION_MAX = 1024; // hard limit, not operating limit const int32_t MAX_PROPERTIES = 4096; /* - Because the full names a just do damn long and ugly turning what could - usually fit on one line of code into two + Because the full names a just do damn long and ugly turning what could + usually fit on one line of code into two */ #define recast reinterpret_cast #define cast static_cast enum class serializedBlockType_e : int64_t { - attributes = 1, - people = 2 + attributes = 1, + people = 2 }; /* - These should be moved out, but I'm putting them here - until I get a feel for how many of these there are + These should be moved out, but I'm putting them here + until I get a feel for how many of these there are */ int64_t Now(); @@ -42,100 +42,100 @@ using namespace std; namespace std { - // hasher for std::pair - template <> - struct hash> - { - size_t operator()(const std::pair& v) const - { - return static_cast(MakeHash(v.first + v.second)); - } - }; - - // hasher for std::pair - template <> - struct hash> - { - size_t operator()(const std::pair& v) const - { - return static_cast(MakeHash(recast(&v), sizeof(v))); - } - }; - - // hasher for std::pair - template <> - struct hash> - { - size_t operator()(const std::pair& v) const - { - return static_cast(MakeHash(recast(&v), sizeof(v))); - } - }; - + // hasher for std::pair + template <> + struct hash> + { + size_t operator()(const std::pair& v) const + { + return static_cast(MakeHash(v.first + v.second)); + } + }; + + // hasher for std::pair + template <> + struct hash> + { + size_t operator()(const std::pair& v) const + { + return static_cast(MakeHash(recast(&v), sizeof(v))); + } + }; + + // hasher for std::pair + template <> + struct hash> + { + size_t operator()(const std::pair& v) const + { + return static_cast(MakeHash(recast(&v), sizeof(v))); + } + }; + // hasher for std::pair - template <> - struct hash> - { - size_t operator()(const std::pair& v) const - { - return static_cast(MakeHash(recast(&v), sizeof(v))); - } - }; + template <> + struct hash> + { + size_t operator()(const std::pair& v) const + { + return static_cast(MakeHash(recast(&v), sizeof(v))); + } + }; }; namespace std { - namespace - { - // I borrowed this generic tuple hasher from StackOverflow: - // - // http://stackoverflow.com/questions/20834838/using-tuple-in-unordered-map - // - // Code from boost - // Reciprocal of the golden ratio helps spread entropy - // and handles duplicates. - // See Mike Seymour in magic-numbers-in-boosthash-combine: - // http://stackoverflow.com/questions/4948780 - - template - inline void hash_combine(std::size_t& seed, T const& v) - { - seed ^= hash()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - - // Recursive template code derived from Matthieu M. - template ::value - 1> - struct HashValueImpl - { - static void apply(size_t& seed, Tuple const& tuple) - { - HashValueImpl::apply(seed, tuple); - hash_combine(seed, get(tuple)); - } - }; - - template - struct HashValueImpl - { - static void apply(size_t& seed, Tuple const& tuple) - { - hash_combine(seed, get<0>(tuple)); - } - }; - } - - template - struct hash> - { - size_t - operator()(std::tuple const& tt) const - { - size_t seed = 0; - HashValueImpl >::apply(seed, tt); - return seed; - } - - }; + namespace + { + // I borrowed this generic tuple hasher from StackOverflow: + // + // http://stackoverflow.com/questions/20834838/using-tuple-in-unordered-map + // + // Code from boost + // Reciprocal of the golden ratio helps spread entropy + // and handles duplicates. + // See Mike Seymour in magic-numbers-in-boosthash-combine: + // http://stackoverflow.com/questions/4948780 + + template + inline void hash_combine(std::size_t& seed, T const& v) + { + seed ^= hash()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + + // Recursive template code derived from Matthieu M. + template ::value - 1> + struct HashValueImpl + { + static void apply(size_t& seed, Tuple const& tuple) + { + HashValueImpl::apply(seed, tuple); + hash_combine(seed, get(tuple)); + } + }; + + template + struct HashValueImpl + { + static void apply(size_t& seed, Tuple const& tuple) + { + hash_combine(seed, get<0>(tuple)); + } + }; + } + + template + struct hash> + { + size_t + operator()(std::tuple const& tt) const + { + size_t seed = 0; + HashValueImpl >::apply(seed, tt); + return seed; + } + + }; } using voidfunc = std::function; diff --git a/src/customer.h b/src/customer.h index a83c5c2..83ec185 100644 --- a/src/customer.h +++ b/src/customer.h @@ -8,131 +8,132 @@ using namespace std; namespace openset { - namespace db - { - // forward references - class Table; - class Attributes; - class AttributeBlob; - class Customers; - - /*! \class Customer - * - * Reusable Container for managing personData_s structures - * - * The idea is that for an insert job or query job - * a customer object would be created, mapped to the - * correct table (and as such, the schema and - * partition) then re-used by calling mount with - * different raw personData_s pointers. This allows - * for the expensive configuration to be done once - * per job. - * - * The usage is as follows: - * - * 1. call mapTable - * 2. call either mapSchema - * - without params to map all properties to the grid - * - with a property list to map specific properties (for query) - * 3. call prepare to map customer data to Grid object - * 4. do work. This could be insert, and commit, or just reading - */ - class Customer - { - - private: - Grid grid; - Table* table; - Attributes* attributes; - AttributeBlob* blob; - Customers* people; - int partition; - - public: - Customer(); - ~Customer() = default; - - // totally reset the customer object back to square one - void reinitialize(); - - /** - * \brief map a table and partition to this Customer object - * \param[in] tablePtr pointer to a Table object - * \param[in] Partition number this object lives in - */ - bool mapTable(Table* tablePtr, int Partition); - bool mapTable(Table* tablePTr, int Partition, vector& columnNames); - - /** - * \brief maps a personData_s object to the Customer object - * \param[in] personData - */ - void mount(PersonData_s* personData); - - /** - * \brief expands personData_s object into Grid object - */ - void prepare(); - - void setSessionTime(const int64_t sessionTime) - { - grid.setSessionTime(sessionTime); - } - - /** - * \brief return reference to grid object - * \return Grid const pointer (read only) - */ - inline Grid* getGrid() - { - return &grid; - } - - int64_t getUUID() const - { - return grid.getMeta()->id; - } - - inline PersonData_s* getMeta() const - { - return grid.getMeta(); - } - - /** - * \brief insert a single JSON row into the Customer.grid object - * \param rowData single row JSON document object. - */ - void insert(cjson* rowData); - - /** - * \brief commit (re-compress) the data in Customer.grid - * - * \remarks this will rebuild a new personData_s structure and update - * the Table.people.linearIndex to reflect the change. - * - * \note The personData_s pointer passed to mount - * from the caller will be invalid, so this commit - * returns the new pointer if this is important. - */ - PersonData_s* commit(); - - private: - /** - * map the entire schema to the Customer.grid object, called by - * map table - * \return - */ - bool mapSchemaAll(); - - /** - * map a portion of the schema to the Customer.grid object, this is - * used during a query, and is called by mapTable - * - * \param[in] columnNames list of properties we want to extract - * \return success - */ - bool mapSchemaList(const vector& columnNames); - - }; - }; + namespace db + { + // forward references + class Table; + class Attributes; + class AttributeBlob; + class Customers; + class Grid; + + /*! \class Customer + * + * Reusable Container for managing personData_s structures + * + * The idea is that for an insert job or query job + * a customer object would be created, mapped to the + * correct table (and as such, the schema and + * partition) then re-used by calling mount with + * different raw personData_s pointers. This allows + * for the expensive configuration to be done once + * per job. + * + * The usage is as follows: + * + * 1. call mapTable + * 2. call either mapSchema + * - without params to map all properties to the grid + * - with a property list to map specific properties (for query) + * 3. call prepare to map customer data to Grid object + * 4. do work. This could be insert, and commit, or just reading + */ + class Customer + { + + private: + Grid grid; + Table* table; + Attributes* attributes; + AttributeBlob* blob; + Customers* people; + int partition; + + public: + Customer(); + ~Customer() = default; + + // totally reset the customer object back to square one + void reinitialize(); + + /** + * \brief map a table and partition to this Customer object + * \param[in] tablePtr pointer to a Table object + * \param[in] Partition number this object lives in + */ + bool mapTable(Table* tablePtr, int Partition); + bool mapTable(Table* tablePTr, int Partition, vector& columnNames); + + /** + * \brief maps a personData_s object to the Customer object + * \param[in] personData + */ + void mount(PersonData_s* personData); + + /** + * \brief expands personData_s object into Grid object + */ + void prepare(); + + void setSessionTime(const int64_t sessionTime) + { + grid.setSessionTime(sessionTime); + } + + /** + * \brief return reference to grid object + * \return Grid const pointer (read only) + */ + inline Grid* getGrid() + { + return &grid; + } + + int64_t getUUID() const + { + return grid.getMeta()->id; + } + + inline PersonData_s* getMeta() const + { + return grid.getMeta(); + } + + /** + * \brief insert a single JSON row into the Customer.grid object + * \param rowData single row JSON document object. + */ + void insert(cjson* rowData); + + /** + * \brief commit (re-compress) the data in Customer.grid + * + * \remarks this will rebuild a new personData_s structure and update + * the Table.people.linearIndex to reflect the change. + * + * \note The personData_s pointer passed to mount + * from the caller will be invalid, so this commit + * returns the new pointer if this is important. + */ + PersonData_s* commit(); + + private: + /** + * map the entire schema to the Customer.grid object, called by + * map table + * \return + */ + bool mapSchemaAll(); + + /** + * map a portion of the schema to the Customer.grid object, this is + * used during a query, and is called by mapTable + * + * \param[in] columnNames list of properties we want to extract + * \return success + */ + bool mapSchemaList(const vector& columnNames); + + }; + }; }; diff --git a/src/customer_props.cpp b/src/customer_props.cpp new file mode 100644 index 0000000..d3b4129 --- /dev/null +++ b/src/customer_props.cpp @@ -0,0 +1,345 @@ +#include "customer_props.h" +#include "table.h" +#include "properties.h" +#include "dbtypes.h" + +void openset::db::CustomerProps::reset() +{ + mem.reset(); + propsChanged = false; + // setting to nil/none faster than erasing them + for (auto& prop : props) + prop.second = NONE; + + oldValues.clear(); + newValues.clear(); +} + +char* openset::db::CustomerProps::encodeCustomerProps(openset::db::Table* table) +{ + mem.reset(); + + auto tableProps = table->getProperties(); + + const auto count = mem.newInt32(); + *count = 0; + + for (auto& prop : props) + { + auto info = tableProps->getProperty(prop.first); + + if (!info || + !info->isCustomerProperty || + info->type == openset::db::PropertyTypes_e::freeProp || + info->type == openset::db::PropertyTypes_e::runTimeTypeProp) + continue; + + auto& var = prop.second; + + if (var.isPod()) + { + // sip nil/none values + if (var.getInt64() == NONE) + continue; + + // if this is POD and we want a set, skip + if (info->isSet) + continue; + } + else // is a container + { + // skip incorrect types (must be set) + if (var.typeOf() != cvar::valueType::SET) + continue; + + // skip if table prop is not a set + if (!info->isSet) + continue; + + // skip nil/none values + if (var.len() == 0) + continue; + } + + // store column index + *mem.newInt32() = static_cast(info->idx); + // store column type + *mem.newInt32() = static_cast(info->type); + + // placeholder size + const auto size = mem.newInt32(); + + const auto startOffset = mem.getBytes(); + + switch (info->type) + { + case openset::db::PropertyTypes_e::intProp: + if (info->isSet) + { + // store number of elements + *mem.newInt32() = prop.second.len(); + for (auto& item : *var.getSet()) + *mem.newInt64() = item.getInt64(); + } + else + { + *mem.newInt64() = var.getInt64(); // copy the union in cvar + } + break; + case openset::db::PropertyTypes_e::doubleProp: + if (info->isSet) + { + // store number of elements + *mem.newInt32() = prop.second.len(); + for (auto& item : *var.getSet()) + *mem.newInt64() = round(item.getDouble() * 10000); + } + else + { + *mem.newInt64() = round(var.getDouble() * 10000); // copy the union in cvar + } + break; + case openset::db::PropertyTypes_e::boolProp: + if (info->isSet) + { + // store number of elements + *mem.newInt32() = prop.second.len(); + for (auto& item : *var.getSet()) + *mem.newInt64() = item.getBool() ? 1 : 0; + } + else + { + *mem.newInt64() = var.getBool() ? 1 : 0; // copy the union in cvar + } + break; + case openset::db::PropertyTypes_e::textProp: + if (info->isSet) + { + // store number of elements + *mem.newInt32() = prop.second.len(); + for (auto& item : *var.getSet()) + { + const auto text = var.getString(); + const auto buffer = mem.newPtr(text.length()); + // text length + *mem.newInt32() = text.length(); + memcpy(buffer, text.c_str(),text.length()); + } + } + else + { + const auto text = var.getString(); + const auto buffer = mem.newPtr(text.length()); + // text length + *mem.newInt32() = text.length(); + memcpy(buffer, text.c_str(),text.length()); + } + break; + } + + // update size of data + *size = mem.getBytes() - startOffset; + + ++(*count); + } + + return mem.flatten(); +}; + +void openset::db::CustomerProps::decodeCustomerProps(openset::db::Table* table, char* data) +{ + reset(); + + if (!data) + return; + + auto tableProps = table->getProperties(); + const auto count = static_cast(*data); + data += sizeof(int32_t); + + for (auto i = 0; i < count; ++i) + { + const auto propIndex = *reinterpret_cast(data); + data += sizeof(int32_t); + const auto propType = *reinterpret_cast(data); + data += sizeof(int32_t); + const auto recordSize = *reinterpret_cast(data); + data += sizeof(int32_t); + + const auto info = tableProps->getProperty(propIndex); + + // skip if something has changed (dropped or redefined column?) + if (!info->isCustomerProperty || info->type != propType) + { + data += recordSize; + continue; + } + + switch (propType) + { + case openset::db::PropertyTypes_e::intProp: + if (info->isSet) + { + const auto elements = *reinterpret_cast(data); + data += sizeof(int32_t); + + cvar set; + set.set(); + + for (auto e = 0; e < elements; ++e) + { + set += *reinterpret_cast(data); + data += sizeof(int64_t); + } + + props[propIndex] = std::move(set); + } + else + { + props[propIndex] = *reinterpret_cast(data); + data += sizeof(int64_t); + } + break; + case openset::db::PropertyTypes_e::doubleProp: + if (info->isSet) + { + const auto elements = *reinterpret_cast(data); + data += sizeof(int32_t); + + cvar set; + set.set(); + + for (auto e = 0; e < elements; ++e) + { + set += (static_cast(*reinterpret_cast(data)) / 10000.0); + data += sizeof(int64_t); + } + + props[propIndex] = std::move(set); + } + else + { + props[propIndex] = (static_cast(*reinterpret_cast(data)) / 10000.0); + data += sizeof(int64_t); + } + break; + case openset::db::PropertyTypes_e::boolProp: + if (info->isSet) + { + const auto elements = *reinterpret_cast(data); + data += sizeof(int32_t); + + cvar set; + set.set(); + + for (auto e = 0; e < elements; ++e) + { + set += *reinterpret_cast(data) ? true : false; + data += sizeof(int64_t); + } + + props[propIndex] = std::move(set); + } + else + { + props[propIndex] = *reinterpret_cast(data) ? true : false; + data += sizeof(int64_t); + } + break; + case openset::db::PropertyTypes_e::textProp: + if (info->isSet) + { + const auto elements = *reinterpret_cast(data); + data += sizeof(int32_t); + + cvar set; + set.set(); + + for (auto e = 0; e < elements; ++e) + { + const auto textLength = *reinterpret_cast(data); + data += sizeof(int32_t); + set += std::string(data, textLength); + data += textLength; + } + + props[propIndex] = std::move(set); + } + else + { + const auto textLength = *reinterpret_cast(data); + data += sizeof(int32_t); + props[propIndex] = std::string(data, textLength); + data += textLength; + } + break; + } + } +} + +int64_t cvarToDB(cvar& value) +{ + switch (value.typeOf()) + { + case cvar::valueType::INT32: case cvar::valueType::INT64: + return value.getInt64(); + case cvar::valueType::FLT: case cvar::valueType::DBL: + return value.getDouble() * 10000; + case cvar::valueType::STR: + return MakeHash(value.getString()); + case cvar::valueType::BOOL: + return value.getBool() ? 1 : 0; + default: + return NONE; + } +} + +void openset::db::CustomerProps::setProp(openset::db::Table* table, int propIndex, cvar& value) +{ + const auto propInfo = table->getProperties()->getProperty(propIndex); + + if (!propInfo || !propInfo->isCustomerProperty) + return; + + if (auto& iter = props.find(propIndex); iter != props.end()) + { + if (iter->second != value) + { + propsChanged = true; + oldValues.emplace_back(propIndex, cvarToDB(iter->second)); + iter->second = value; + } + } + else + { + props[propIndex] = value; + propsChanged = true; + } + newValues.emplace_back(propIndex, cvarToDB(value)); +} + +void openset::db::CustomerProps::setProp(openset::db::Table* table, std::string& name, cvar& value) +{ + const auto propInfo = table->getProperties()->getProperty(name); + + if (!propInfo || !propInfo->isCustomerProperty) + return; + + setProp(table, propInfo->idx, value); +} + +cvar openset::db::CustomerProps::getProp(openset::db::Table* table, int propIndex) +{ + for (auto& prop : props) + { + if (prop.first == propIndex) + return prop.second; + } + + return NONE; +} + +openset::db::CustomerPropMap* openset::db::CustomerProps::getCustomerProps() +{ + return &props; +} diff --git a/src/customer_props.h b/src/customer_props.h new file mode 100644 index 0000000..a14a600 --- /dev/null +++ b/src/customer_props.h @@ -0,0 +1,59 @@ +#pragma once +#include "robin_hood.h" +#include "heapstack/heapstack.h" +#include "var/var.h" + +namespace openset +{ + namespace db + { + class Table; + + using CustomerPropMap = robin_hood::unordered_map>; + using CustomerPropChange = std::pair; + using CustomerPropChangeList = std::vector; + + class CustomerProps + { + HeapStack mem; + CustomerPropMap props; + + bool propsChanged {false}; + + CustomerPropChangeList oldValues; + CustomerPropChangeList newValues; + + public: + + CustomerProps() = default; + ~CustomerProps() = default; + + void reset(); + + char* encodeCustomerProps(openset::db::Table* table); + void decodeCustomerProps(openset::db::Table* table, char* data); + + void setProp(openset::db::Table* table, int propIndex, cvar& value); + void setProp(openset::db::Table* table, std::string& name, cvar& value); + + cvar getProp(openset::db::Table* table, int propIndex); + + bool havePropsChanged() const + { + return propsChanged; + } + + CustomerPropChangeList& getOldValues() + { + return oldValues; + } + + CustomerPropChangeList& getNewValues() + { + return newValues; + } + + CustomerPropMap* getCustomerProps(); + }; + }; +}; \ No newline at end of file diff --git a/src/grid.cpp b/src/grid.cpp index 5aa8bba..b47a91a 100644 --- a/src/grid.cpp +++ b/src/grid.cpp @@ -180,8 +180,7 @@ void Grid::reset() rows.clear(); // release the rows - likely to not free vector internals mem.reset(); // release the memory to the pool - will always leave one page rawData = nullptr; - propHash = 0; - hasInsert = { false }; + hasInsert = false; } void Grid::reinitialize() @@ -227,6 +226,34 @@ bool Grid::mapSchema(Table* tablePtr, Attributes* attributesPtr, const vectorblob; } + +openset::db::CustomerProps* Grid::getCustomerPropsManager() +{ + return &customerProps; +} + + +openset::db::CustomerPropMap* Grid::getCustomerProps() +{ + customerProps.decodeCustomerProps(table, rawData->props); + return customerProps.getCustomerProps(); +} + +void Grid::setCustomerProps() +{ + if (!customerProps.havePropsChanged()) + return; + if (rawData->props) + PoolMem::getPool().freePtr(rawData->props); + rawData->props = customerProps.encodeCustomerProps(table); + + for (auto &change : customerProps.getOldValues()) + attributes->setDirty(this->rawData->linId, change.first, change.second, false); + + for (auto &change : customerProps.getNewValues()) + attributes->setDirty(this->rawData->linId, change.first, change.second, true); +} + cjson Grid::toJSON() { auto properties = table->getProperties(); @@ -238,84 +265,81 @@ cjson Grid::toJSON() doc.set("id", this->rawData->getIdStr()); auto propDoc = doc.setObject("properties"); - const auto props = getProps(false); + const auto props = getCustomerProps(); - const auto propDict = props.getDict(); - if (propDict) + for (const auto &key : *props) { - for (const auto &key : *propDict) - { - const auto propInfo = properties->getProperty(key.first); + const auto propInfo = properties->getProperty(key.first); - if (!propInfo) - continue; + if (!propInfo) + continue; - if (propInfo->isSet && key.second.typeOf() == cvar::valueType::SET) + if (propInfo->isSet && key.second.typeOf() == cvar::valueType::SET) + { + auto propList = propDoc->setArray(propInfo->name); + for (const auto &setItem : *key.second.getSet()) { - auto propList = propDoc->setArray(key.first); - for (const auto &setItem : *key.second.getSet()) - { - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - propList->push(key.second.getInt64()); - break; - case PropertyTypes_e::doubleProp: - propList->push(key.second.getDouble()); - break; - case PropertyTypes_e::boolProp: - propList->push(key.second.getBool()); - break; - case PropertyTypes_e::textProp: - propList->push(key.second.getString()); - break; - } - } + switch (propInfo->type) + { + case PropertyTypes_e::intProp: + propList->push(key.second.getInt64()); + break; + case PropertyTypes_e::doubleProp: + propList->push(key.second.getDouble()); + break; + case PropertyTypes_e::boolProp: + propList->push(key.second.getBool()); + break; + case PropertyTypes_e::textProp: + propList->push(key.second.getString()); + break; } - else if (propInfo->isSet && key.second.typeOf() == cvar::valueType::LIST) + } + } + else if (propInfo->isSet && key.second.typeOf() == cvar::valueType::LIST) + { + auto propList = propDoc->setArray(propInfo->name); + for (const auto &setItem : *key.second.getList()) { - auto propList = propDoc->setArray(key.first); - for (const auto &setItem : *key.second.getList()) - { - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - propList->push(key.second.getInt64()); - break; - case PropertyTypes_e::doubleProp: - propList->push(key.second.getDouble()); - break; - case PropertyTypes_e::boolProp: - propList->push(key.second.getBool()); - break; - case PropertyTypes_e::textProp: - propList->push(key.second.getString()); - break; - } - } + switch (propInfo->type) + { + case PropertyTypes_e::intProp: + propList->push(key.second.getInt64()); + break; + case PropertyTypes_e::doubleProp: + propList->push(key.second.getDouble()); + break; + case PropertyTypes_e::boolProp: + propList->push(key.second.getBool()); + break; + case PropertyTypes_e::textProp: + propList->push(key.second.getString()); + break; } - else + } + } + else + { + switch (propInfo->type) { - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - propDoc->set(key.first, key.second.getInt64()); - break; - case PropertyTypes_e::doubleProp: - propDoc->set(key.first, key.second.getDouble()); - break; - case PropertyTypes_e::boolProp: - propDoc->set(key.first, key.second.getBool()); - break; - case PropertyTypes_e::textProp: - propDoc->set(key.first, key.second.getString()); - break; - } + case PropertyTypes_e::intProp: + propDoc->set(propInfo->name, key.second.getInt64()); + break; + case PropertyTypes_e::doubleProp: + propDoc->set(propInfo->name, key.second.getDouble()); + break; + case PropertyTypes_e::boolProp: + propDoc->set(propInfo->name, key.second.getBool()); + break; + case PropertyTypes_e::textProp: + propDoc->set(propInfo->name, key.second.getString()); + break; } } - } + + auto rowDoc = doc.setArray("events"); const auto convertToJSON = [&](cjson* branch, Properties::Property_s* propInfo, int64_t value, bool isArray) @@ -409,67 +433,6 @@ Col_s* Grid::newRow() return reinterpret_cast(row); } -cvar Grid::getProps(const bool propsMayChange) -{ - if (!rawData->props) - return cvar(cvar::valueType::DICT); - - cvar var; - - // deserialize the props into a cvar for injection into the interpreter - varBlob::deserialize(var, rawData->props); - - // hash props so we can detect changes - propHash = varBlob::hash(var); - - if (propsMayChange) - diff.add(this, var, IndexDiffing::Mode_e::before); - - return var; -} - -void Grid::setProps(cvar& var) -{ - - diff.add(this, var, IndexDiffing::Mode_e::after); - - // are the props deleted or empty? Yes, then lets free memory - if (var == NONE || var.len() == 0) - { - if (rawData->props) - PoolMem::getPool().freePtr(rawData->props); - rawData->props = nullptr; - return; - } - - // if anything has changed, lets replace the props and free the last props - const auto afterHash = varBlob::hash(var); - - if (afterHash != propHash) - { - if (rawData->props) - PoolMem::getPool().freePtr(rawData->props); - - varBlob::serialize(propMem, var); - rawData->props = propMem.flatten(); - propMem.reset(); - - diff.iterRemoved( - [&](const int32_t col, const int64_t val) - { - attributes->setDirty(this->rawData->linId, col, val, false); - } - ); - - diff.iterAdded( - [&](const int32_t col, const int64_t val) - { - attributes->setDirty(this->rawData->linId, col, val, true); - } - ); - } -} - void Grid::mount(PersonData_s* personData) { #ifdef DEBUG @@ -1024,7 +987,7 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins if (hasCustomerProps) { - auto insertProps = getProps(true); + customerProps.decodeCustomerProps(table, rawData->props); for (auto c : inboundProperties) { @@ -1033,7 +996,7 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins { const auto schemaCol = propertyMap->propertyMap[iter->second]; const auto propInfo = properties->getProperty(schemaCol); - const auto& colName = propInfo->name; + const auto& propIndex = propInfo->idx; if (!propInfo->isCustomerProperty) continue; @@ -1041,70 +1004,24 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins switch (c->type()) { case cjson::Types_e::INT: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - insertProps[colName] = c->getInt(); - break; - case PropertyTypes_e::boolProp: - insertProps[colName] = c->getInt() ? true : false; - break; - case PropertyTypes_e::textProp: - insertProps[colName] = to_string(c->getInt()); - break; - } + customerProps.setProp(table, propIndex, cvar(c->getInt())); break; case cjson::Types_e::DBL: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - insertProps[colName] = c->getDouble(); - break; - case PropertyTypes_e::boolProp: - insertProps[colName] = c->getDouble() != 0 ? true : false; - break; - case PropertyTypes_e::textProp: - insertProps[colName] = to_string(c->getDouble()); - break; - } + customerProps.setProp(table, propIndex, cvar(c->getDouble())); break; case cjson::Types_e::STR: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - continue; - case PropertyTypes_e::boolProp: - insertProps[colName] = c->getString() != "0"; - break; - case PropertyTypes_e::textProp: - insertProps[colName] = c->getString(); - break; - } + customerProps.setProp(table, propIndex, cvar(c->getString())); break; case cjson::Types_e::BOOL: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - insertProps[colName] = c->getBool() ? 1 : 0; - break; - case PropertyTypes_e::boolProp: - insertProps[colName] = c->getBool(); - break; - case PropertyTypes_e::textProp: - insertProps[colName] = c->getBool() ? "true" : "false"; - break; - } + customerProps.setProp(table, propIndex, cvar(c->getBool())); break; case cjson::Types_e::ARRAY: { if (!propInfo->isSet) continue; - insertProps[colName].set(); + cvar tempSet; + tempSet.set(); auto aNodes = c->getNodes(); const auto startIdx = setData.size(); @@ -1113,72 +1030,27 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins switch (n->type()) { case cjson::Types_e::INT: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - insertProps[colName] += n->getInt(); - break; - case PropertyTypes_e::boolProp: - insertProps[colName] += n->getInt() ? true : false; - break; - case PropertyTypes_e::textProp: - insertProps[colName] += to_string(n->getInt()); - break; - } + tempSet += n->getInt(); break; case cjson::Types_e::DBL: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - insertProps[colName] += cast(n->getDouble()); - break; - case PropertyTypes_e::boolProp: - insertProps[colName] += n->getDouble() != 0; - break; - case PropertyTypes_e::textProp: - insertProps[colName] += to_string(n->getDouble()); - break; - } + tempSet += n->getDouble(); break; case cjson::Types_e::STR: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - continue; - case PropertyTypes_e::boolProp: - insertProps[colName] += n->getString() != "0"; - break; - case PropertyTypes_e::textProp: - insertProps[colName] += n->getString(); - break; - } + tempSet += n->getString(); break; case cjson::Types_e::BOOL: - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - case PropertyTypes_e::doubleProp: - insertProps[colName] += c->getBool() ? 1 : 0; - break; - case PropertyTypes_e::boolProp: - insertProps[colName] += c->getBool(); - break; - case PropertyTypes_e::textProp: - insertProps[colName] += c->getBool() ? "true" : "false"; - break; - } + tempSet += c->getBool(); break; } } + + customerProps.setProp(table, propIndex, tempSet); } } } } - setProps(insertProps); + setCustomerProps(); } if (hasCustomerProps && hasEventProp) diff --git a/src/grid.h b/src/grid.h index aecd817..a669ca3 100644 --- a/src/grid.h +++ b/src/grid.h @@ -6,12 +6,12 @@ #include "common.h" #include "property_mapping.h" - #include "var/var.h" #include "cjson/cjson.h" #include "robin_hood.h" - +#include "customer_props.h" +#include "../lib/sba/sba.h" namespace openset { @@ -22,6 +22,7 @@ namespace openset class Attributes; class AttributeBlob; class PropertyMapping; + class CustomerProps; class Grid; struct PropertyMap_s; const int64_t int16_min = numeric_limits::min(); @@ -122,7 +123,6 @@ namespace openset { private: using LineNodes = vector; - using ExpandedRows = vector; using SetVector = vector; #pragma pack(push,1) struct Cast_s @@ -134,7 +134,8 @@ namespace openset const static int sizeOfCastHeader = sizeof(Cast_s::propIndex); const static int sizeOfCast = sizeof(Cast_s); - PropertyMap_s* propertyMap { nullptr }; // we will get our memory via stack + PropertyMap_s* propertyMap { nullptr }; + // we will get our memory via stack // so rows have tight cache affinity HeapStack mem; Rows rows; @@ -148,15 +149,13 @@ namespace openset Table* table { nullptr }; Attributes* attributes { nullptr }; AttributeBlob* blob { nullptr }; - bool hasInsert { false }; + CustomerProps customerProps; + mutable IndexDiffing diff; - // mutable - sorry - mutable int64_t propHash { 0 }; - mutable HeapStack propMem; - public: + public: Grid() = default; ~Grid(); @@ -171,8 +170,6 @@ namespace openset bool mapSchema(Table* tablePtr, Attributes* attributesPtr); bool mapSchema(Table* tablePtr, Attributes* attributesPtr, const vector& propertyNames); void setSessionTime(const int64_t sessionTime) { this->sessionTime = sessionTime; } - cvar getProps(const bool propsMayChange); - void setProps(cvar& var); void mount(PersonData_s* personData); void prepare(); private: @@ -226,6 +223,11 @@ namespace openset PropertyMap_s* getPropertyMap() const { return propertyMap; } AttributeBlob* getAttributeBlob() const; + openset::db::CustomerProps * getCustomerPropsManager(); + + openset::db::CustomerPropMap* getCustomerProps(); + void setCustomerProps(); + cjson toJSON(); // brings object back to zero state void reinitialize(); private: diff --git a/src/oloop.cpp b/src/oloop.cpp index d295a87..946e22c 100644 --- a/src/oloop.cpp +++ b/src/oloop.cpp @@ -6,72 +6,72 @@ using namespace openset::async; int64_t totalRuns = 0; OpenLoop::OpenLoop(std::string owningTable, oloopPriority_e priority) : - priority(priority), - state(oloopState_e::running), + priority(priority), + state(oloopState_e::running), owningTable(std::move(owningTable)), - runAt(0), - runStart(0), - prepared(false), - loop(nullptr) + runAt(0), + runStart(0), + prepared(false), + loop(nullptr) {} OpenLoop::~OpenLoop() { - // calling suicide will set priority to background - if (priority == oloopPriority_e::realtime) - globals::async->realtimeDec(this->loop->worker); + // calling suicide will set priority to background + if (priority == oloopPriority_e::realtime) + globals::async->realtimeDec(this->loop->worker); } void OpenLoop::assignLoop(AsyncLoop* loop) { - this->loop = loop; - if (priority == oloopPriority_e::realtime) - globals::async->realtimeInc(this->loop->worker); + this->loop = loop; + if (priority == oloopPriority_e::realtime) + globals::async->realtimeInc(this->loop->worker); } bool OpenLoop::inBypass() const { - if (priority == oloopPriority_e::realtime) - return false; + if (priority == oloopPriority_e::realtime) + return false; - return (globals::async->getRealtimeRunning(this->loop->worker) != 0); + return (globals::async->getRealtimeRunning(this->loop->worker) != 0); } void OpenLoop::scheduleFuture(uint64_t milliFromNow) { - runAt = Now() + milliFromNow; + runAt = Now() + milliFromNow; } void OpenLoop::scheduleAt(uint64_t milliRunAt) { - runAt = milliRunAt; + runAt = milliRunAt; } void OpenLoop::spawn(OpenLoop* newCell) const { - loop->queueCell(newCell); + loop->queueCell(newCell); } void OpenLoop::suicide() { - if (priority == oloopPriority_e::realtime) - { - globals::async->realtimeDec(this->loop->worker); - priority = oloopPriority_e::background; - } - state = oloopState_e::done; + if (priority == oloopPriority_e::realtime) + { + globals::async->realtimeDec(this->loop->worker); + priority = oloopPriority_e::background; + } + state = oloopState_e::done; } bool OpenLoop::sliceComplete() const { - const auto sliceDivisor = inBypass() ? 3 : 1; - return (Now() > runStart + (loop->runTime / sliceDivisor)); + const auto sliceDivisor = inBypass() ? 3 : 1; + return (Now() > runStart + (loop->runTime / sliceDivisor)); } bool OpenLoop::checkCondition() { - return true; // always good + return true; // always good } bool OpenLoop::checkTimer(const int64_t milliNow) @@ -79,6 +79,6 @@ bool OpenLoop::checkTimer(const int64_t milliNow) return (milliNow > runAt); } -void OpenLoop::partitionRemoved() +void OpenLoop::partitionRemoved() {} diff --git a/src/oloop_query.cpp b/src/oloop_query.cpp index ff737c2..928dee8 100644 --- a/src/oloop_query.cpp +++ b/src/oloop_query.cpp @@ -122,11 +122,14 @@ void OpenLoopQuery::prepare() bool OpenLoopQuery::run() { + int count = 0; while (true) { - if (sliceComplete()) + if (count % 50 == 0 && sliceComplete()) return true; + ++count; + // are we done? This will return the index of the // next set bit until there are no more, or maxLinId is met if (interpreter->error.inError() || !index->linearIter(currentLinId, maxLinearId)) diff --git a/src/querycommon.h b/src/querycommon.h index 0915ed2..fb60d11 100644 --- a/src/querycommon.h +++ b/src/querycommon.h @@ -684,9 +684,10 @@ namespace openset int pushRefs { 0 }; // reference counter for pushes int sortOrder { -1 }; // used for sorting in property order int lambdaIndex { -1 }; // used for variable assignment by lambada + int propShortcut { -1 }; bool nonDistinct { false }; cvar value { NONE }; - cvar startingValue { NONE }; + int64_t valueInt64 { NONE }; Variable_s() = default; Variable_s(const string& actual, const string& space, const int sortOrder = -1) @@ -729,9 +730,9 @@ namespace openset pushRefs = source.pushRefs; sortOrder = source.sortOrder; lambdaIndex = source.lambdaIndex; + propShortcut = source.propShortcut; nonDistinct = source.nonDistinct; value = source.value; - startingValue = source.startingValue; } }; diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index 54a9e55..2f1e6c4 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -174,22 +174,28 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum // this will ensure non-int types are represented as ints // during grouping - const auto fixToInt = [&](const cvar& value) -> int64_t + const auto fixToInt = [&](const cvar& value, result::ResultTypes_e& type) -> int64_t { switch (value.typeOf()) { case cvar::valueType::INT32: case cvar::valueType::INT64: + type = result::ResultTypes_e::Int; return value.getInt64(); case cvar::valueType::FLT: case cvar::valueType::DBL: + type = result::ResultTypes_e::Double; return value.getDouble() * 10000; case cvar::valueType::STR: + type = result::ResultTypes_e::Text; return result->addLocalTextAndHash(value.getString()); // cache this text case cvar::valueType::BOOL: + type = result::ResultTypes_e::Bool; return value.getBool() ? 1 : 0; default: + type = result::ResultTypes_e::None; return NONE; } }; + /* const auto getType = [&](const cvar& value) -> result::ResultTypes_e { switch (value.typeOf()) @@ -208,6 +214,8 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum return result::ResultTypes_e::None; } }; + */ + const auto aggColumns = [&](result::Accumulator* resultColumns) { for (auto& resCol : macros.vars.columnVars) @@ -238,7 +246,7 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum distinctKey.set( resCol.index, (resCol.lambdaIndex != -1) ? - resCol.value.getInt64() : + resCol.valueInt64 : columns->cols[resCol.distinctColumn], (resCol.aggOnce) ? 0 : @@ -246,17 +254,17 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum columns->cols[PROP_STAMP] : currentRow), reinterpret_cast(resultColumns)); - if (eventDistinct.count(distinctKey)) + + if (eventDistinct.emplace(distinctKey, 1).second == false) continue; - eventDistinct.emplace(distinctKey, 1); } auto& resultColumnValue = resultColumns->columns[resCol.index + segmentColumnShift].value; auto& resultColumnCount = resultColumns->columns[resCol.index + segmentColumnShift].count; - const auto aggValue = resCol.lambdaIndex == -1 ? + const auto aggValue = resCol.propShortcut == -1 && resCol.lambdaIndex == -1 ? columns->cols[resCol.column] : - resCol.value.getInt64(); + resCol.valueInt64; if (resCol.column == PROP_UUID) exportCustomerId = true; @@ -316,50 +324,93 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum switch (macros.vars.columnVars[varIndex].schemaType) { case PropertyTypes_e::intProp: - macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getInt64(); + macros.vars.columnVars[varIndex].valueInt64 = + macros.vars.columnVars[varIndex].propShortcut != -1 ? + macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getInt64() : + (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getInt64(); break; case PropertyTypes_e::doubleProp: - macros.vars.columnVars[varIndex].value = round((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getDouble() * 10000.0); + macros.vars.columnVars[varIndex].valueInt64 = + macros.vars.columnVars[varIndex].propShortcut != -1 ? + round(macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getDouble() * 10000.0) : + round((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getDouble() * 10000.0); break; case PropertyTypes_e::textProp: - macros.vars.columnVars[varIndex].value = - result->addLocalTextAndHash((*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getString()); // cache this text + macros.vars.columnVars[varIndex].valueInt64 = + result->addLocalTextAndHash( + macros.vars.columnVars[varIndex].propShortcut != -1 ? + macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getString() : + (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, currentRow)).getString() + ); // cache this text break; case PropertyTypes_e::boolProp: - macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getBool(); + macros.vars.columnVars[varIndex].valueInt64 = + macros.vars.columnVars[varIndex].propShortcut != -1 ? + macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getBool() : + macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getBool(); break; default: - macros.vars.columnVars[varIndex].value = 0; + macros.vars.columnVars[varIndex].valueInt64 = 0; } } } - auto depth = 0; - for (const auto& item : marshalParams) + if (macros.scriptMode == ScriptMode_e::customers) { - if (depth == paramCount || (item.typeOf() != cvar::valueType::STR && item == NONE)) - break; - rowKey.key[depth] = fixToInt(item); - rowKey.types[depth] = getType(item); - if (macros.scriptMode != ScriptMode_e::customers) + auto depth = 0; + for (const auto& item : marshalParams) + { + if (depth == paramCount || (item.typeOf() != cvar::valueType::STR && item == NONE)) + break; + rowKey.key[depth] = fixToInt(item, rowKey.types[depth]); + ++depth; + } + aggColumns(result->getMakeAccumulator(rowKey)); + } + else + { + auto depth = 0; + for (const auto& item : marshalParams) + { + if (depth == paramCount || (item.typeOf() != cvar::valueType::STR && item == NONE)) + break; + rowKey.key[depth] = fixToInt(item, rowKey.types[depth]); aggColumns(result->getMakeAccumulator(rowKey)); - ++depth; + ++depth; + } } - - if (macros.scriptMode == ScriptMode_e::customers) - aggColumns(result->getMakeAccumulator(rowKey)); } void openset::query::Interpreter::autoTally() { - // the script is in an exit state because it terminated, we are going to resurect it. + // the script is in an exit state because it terminated, we are going to resurrect it. loopState = LoopState_e::run; const auto paramCount = static_cast(macros.vars.autoGrouping.size()); auto index = 0; for (const auto varIndex : macros.vars.autoGrouping) { - if (macros.vars.columnVars[varIndex].lambdaIndex != -1) + if (macros.vars.columnVars[varIndex].propShortcut != -1) + { + switch (macros.vars.columnVars[varIndex].schemaType) + { + case PropertyTypes_e::intProp: + marshalParams[index] = macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getInt64(); + break; + case PropertyTypes_e::doubleProp: + marshalParams[index] = round(macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getDouble() * 10000.0); + break; + case PropertyTypes_e::textProp: + marshalParams[index] = macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getString(); + break; + case PropertyTypes_e::boolProp: + marshalParams[index] = macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getBool(); + break; + default: + marshalParams[index] = NONE; + } + } + else if (macros.vars.columnVars[varIndex].lambdaIndex != -1) { switch (macros.vars.columnVars[varIndex].schemaType) { @@ -3041,15 +3092,14 @@ void openset::query::Interpreter::execReset() recursion = 0; nestDepth = 0; breakDepth = 0; - eventCount = -1; inReturn = false; propsChanged = false; loopState = LoopState_e::run; stackPtr = stack; eventDistinct.clear(); - for (auto i = 0; i < STACK_DEPTH; ++i) - stack[i].clear(); + //for (auto i = 0; i < STACK_DEPTH; ++i) + // stack[i].clear(); } void openset::query::Interpreter::exec() @@ -3204,7 +3254,7 @@ void openset::query::Interpreter::setGridProps() { auto table = grid->getTable(); - if (exportCustomerId && table->numericCustomerIds) + if (exportCustomerId && !table->numericCustomerIds) { result->addLocalTextAndHash(this->grid->getUUIDString()); // cache this text exportCustomerId = false; @@ -3222,115 +3272,11 @@ void openset::query::Interpreter::setGridProps() if (!var.isProp) continue; - if (!var.value.isContainer() && var.value.typeOf() != cvar::valueType::BOOL && var.value == NONE) - { - props[var.actual] = NONE; - continue; - } - - if (!var.value.isContainer() && var.value.typeOf() == cvar::valueType::BOOL && var.value.getInt64() == NONE) - { - props[var.actual] = NONE; - continue; - } - - // validate the props against the schema - const auto propInfo = schema->getProperty(var.actual); - - // skip of the property no longer exists or is no longer a prop, skip empty sets - if (!propInfo || !propInfo->isCustomerProperty || (propInfo->isSet && !var.value.len())) - { - props[var.actual] = NONE; - continue; - } - - if (!propInfo->isSet && var.value.isContainer()) - throw std::runtime_error("property '" + var.actual + "' is not defined as a 'set' type."); - - if (propInfo->isSet && !var.value.isContainer()) - throw std::runtime_error("property '" + var.actual + "' is a set type. Values must be 'List' or 'Set'"); - - if (propInfo->isSet && var.value.typeOf() == cvar::valueType::DICT) - throw std::runtime_error( - "property '" + var.actual + "' cannot be a Dict, valid input types are values, Lists or Sets."); - - if (propInfo->isSet) - { - cvar set; - set.set(); - - if (var.value.typeOf() == cvar::valueType::LIST) - { - for (auto& v : *var.value.getList()) - { - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - set += v.getInt64(); - break; - case PropertyTypes_e::doubleProp: - set += v.getDouble(); - break; - case PropertyTypes_e::boolProp: - set += v.getBool(); - break; - case PropertyTypes_e::textProp: - set += v.getString(); - break; - } - } - } - else - { - for (auto& v : *var.value.getSet()) - { - if (v == NONE) // skip nil/none values - continue; - - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - set += v.getInt64(); - break; - case PropertyTypes_e::doubleProp: - set += v.getDouble(); - break; - case PropertyTypes_e::boolProp: - set += v.getBool(); - break; - case PropertyTypes_e::textProp: - set += v.getString(); - break; - } - } - } - - // if it had any values - if (set.len()) - props[var.actual] = set; - } - else - { - switch (propInfo->type) - { - case PropertyTypes_e::intProp: - props[var.actual] = var.value.getInt64(); - break; - case PropertyTypes_e::doubleProp: - props[var.actual] = var.value.getDouble(); - break; - case PropertyTypes_e::boolProp: - props[var.actual] = var.value.getBool(); - break; - case PropertyTypes_e::textProp: - props[var.actual] = var.value.getString(); - break; - } - } + grid->getCustomerPropsManager()->setProp(table, var.schemaColumn, var.value); } // encode - grid->setProps(props); + grid->setCustomerProps(); } void openset::query::Interpreter::getGridProps() @@ -3348,13 +3294,12 @@ void openset::query::Interpreter::getGridProps() return; } - props = grid->getProps(macros.writesProps); + grid->getCustomerProps(); // copy props into userVars for (auto varIndex : macros.props) - macros.vars.userVars[varIndex].value = props.contains(macros.vars.userVars[varIndex].actual) - ? props[macros.vars.userVars[varIndex].actual] - : cvar(NONE); + macros.vars.userVars[varIndex].value = grid->getCustomerPropsManager()->getProp( + grid->getTable(), macros.vars.userVars[varIndex].schemaColumn); } openset::query::Interpreter::Returns& openset::query::Interpreter::getLastReturn() diff --git a/src/queryinterpreter.h b/src/queryinterpreter.h index dd28d11..bbbcfca 100644 --- a/src/queryinterpreter.h +++ b/src/queryinterpreter.h @@ -7,6 +7,7 @@ #include "xxhash.h" #include "robin_hood.h" +#include "customer_props.h" #include "querycommon.h" #include "result.h" #include "errors.h" @@ -173,7 +174,6 @@ namespace openset // debug - log entries are entered in order by calling debug DebugLog debugLog; errors::Error error; - int32_t eventCount{ -1 }; // -1 is uninitialized, calculation cached here // callbacks to external code (i.e. triggers) function getSegment_cb{ nullptr }; diff --git a/src/queryparserosl.h b/src/queryparserosl.h index 1204276..8d10093 100644 --- a/src/queryparserosl.h +++ b/src/queryparserosl.h @@ -1379,6 +1379,8 @@ namespace openset::query idx = matchingIndex + 1; } + auto propShortcut = -1; + // automatic lambda - assume this is a just a variable if ((!isTableColumn(columnName) || isProperty(columnName)) && selectLambdaId == -1) { @@ -1390,8 +1392,15 @@ namespace openset::query lastDebug }; - const Blocks::Line selectLambda { columnName }; - selectLambdaId = addLinesAsBlock(selectLambda); + if (isProperty(columnName)) + { + propShortcut = userVarIndex(columnName); + } + else + { + const Blocks::Line selectLambda { columnName }; + selectLambdaId = addLinesAsBlock(selectLambda); + } } // already used, then throw and suggest using `as` @@ -1438,6 +1447,7 @@ namespace openset::query var.schemaColumn = propInfo ? propInfo->idx : -1; var.schemaType = !propInfo || type != db::PropertyTypes_e::runTimeTypeProp ? type : propInfo->type; var.lambdaIndex = selectLambdaId; + var.propShortcut = propShortcut; var.aggOnce = aggOnce; // if this is selection is keyed to another property lets reference it as well @@ -3495,6 +3505,9 @@ namespace openset::query if (isProperty(v)) { + const auto schemaInfo = tableColumns->getProperty(v); + inMacros.vars.userVars.back().schemaColumn = schemaInfo->idx; + inMacros.vars.userVars.back().isProp = true; inMacros.useProps = true; inMacros.props.push_back(index); @@ -3522,7 +3535,7 @@ namespace openset::query index = 0; for (auto& col : inMacros.vars.columnVars) { - if (col.lambdaIndex != -1) + if (col.lambdaIndex != -1 || col.propShortcut != -1) inMacros.vars.columnLambdas.push_back(index); if (isProperty(col.actual)) col.isProp = true; diff --git a/src/result.cpp b/src/result.cpp index 9f7266d..a82353f 100644 --- a/src/result.cpp +++ b/src/result.cpp @@ -9,8 +9,9 @@ using namespace openset::result; static char NA_TEXT[] = "n/a"; -ResultSet::ResultSet(const int64_t resultWidth) - : resultWidth(resultWidth) +ResultSet::ResultSet(const int64_t resultWidth) : + resultWidth(resultWidth), + resultBytes(resultWidth * sizeof(Accumulation_s)) { accTypes.resize(resultWidth, ResultTypes_e::Int); accModifiers.resize(resultWidth, query::Modifiers_e::sum); @@ -20,6 +21,7 @@ ResultSet::ResultSet(ResultSet&& other) noexcept : results(std::move(other.results)), mem(std::move(other.mem)), resultWidth(other.resultWidth), + resultBytes(other.resultBytes), localText(std::move(other.localText)), accTypes(std::move(other.accTypes)), accModifiers(std::move(other.accModifiers)) @@ -154,14 +156,25 @@ void ResultSet::setAccTypesFromMacros(const openset::query::Macro_s ¯os) Accumulator* ResultSet::getMakeAccumulator(RowKey& key) { - if (const auto tempPair = results.find(key); tempPair != results.end()) + if (auto& res = results.emplace(key, nullptr); res.second == true) + { + const auto t = new(mem.newPtr(resultBytes)) openset::result::Accumulator(resultWidth); + res.first->second = t; + return t; + } + else + { + return res.first->second; + } + + /*if (const auto tempPair = results.find(key); tempPair != results.end()) return tempPair->second; const auto resultBytes = resultWidth * sizeof(Accumulation_s); const auto t = new(mem.newPtr(resultBytes)) openset::result::Accumulator(resultWidth); results.emplace(key, t); - return t; + return t;*/ } void mergeResultTypes( @@ -237,13 +250,38 @@ ResultSet::RowVector mergeResultSets( vector mergeList; - auto count = 0; - + /* for (auto& r : resultSets) { // sort the list r->makeSortedList(); + // if no data, skip + if (!r->sortedResult.size()) + continue; + + // add it the merge list + mergeList.push_back(&r->sortedResult); + count += static_cast(r->sortedResult.size()); + }*/ + + std::vector threads; + //create threads + threads.reserve(resultSets.size()); + for (auto& r : resultSets) + threads.emplace_back(std::thread([](ResultSet* set) + { + set->makeSortedList(); + }, r) + ); + + //wait for them to complete + for (auto& th : threads) + th.join(); + + auto count = 0; + for (auto& r : resultSets) + { // if no data, skip if (!r->sortedResult.size()) continue; @@ -290,8 +328,8 @@ ResultSet::RowVector mergeResultSets( // is it less than equal or // not set (lowestIdx defaults to end(), so not set) if (lowestIdx == iterators.end() || - (*t).first < (**lowestIdx).first || - (*t).first == (**lowestIdx).first) + (*t).first <= (**lowestIdx).first) //|| + //(*t).first == (**lowestIdx).first) { lowestIdx = it; } @@ -600,8 +638,9 @@ void ResultMuxDemux::resultFlatColumnsToJson( auto array = current->pushArray(); - for (auto dataIndex = shiftOffset, colIndex = 0; dataIndex < shiftOffset + shiftSize; ++dataIndex, ++ - colIndex) + for (auto dataIndex = shiftOffset, colIndex = 0; + dataIndex < shiftOffset + shiftSize; + ++dataIndex, ++colIndex) { const auto& value = r.second->columns[dataIndex].value; const auto& count = r.second->columns[dataIndex].count; @@ -1049,14 +1088,41 @@ void ResultMuxDemux::jsonResultHistogramFill( } } +void ResultMuxDemux::flatColumnMultiSort(cjson* doc, const ResultSortOrder_e sort, const int column) +{ + doc->recurseSort( + "_", + [&](const cjson* left, const cjson* right) -> bool + { + switch (left->at(column)->type()) + { + case cjson::Types_e::BOOL: + case cjson::Types_e::INT: + if (sort == ResultSortOrder_e::Asc) + return (left->at(column)->getInt() < right->at(column)->getInt()); + return (left->at(column)->getInt() > right->at(column)->getInt()); + case cjson::Types_e::DBL: + if (sort == ResultSortOrder_e::Asc) + return (left->at(column)->getDouble() < right->at(column)->getDouble()); + return (left->at(column)->getDouble() > right->at(column)->getDouble()); + case cjson::Types_e::STR: + if (sort == ResultSortOrder_e::Asc) + return (left->at(column)->getString() < right->at(column)->getString()); + return (left->at(column)->getString() > right->at(column)->getString()); + default: + return false; + } + }); +} + void ResultMuxDemux::jsonResultSortByColumn(cjson* doc, const ResultSortOrder_e sort, const int column) { doc->recurseSort( "_", [&](const cjson* left, const cjson* right) -> bool { - const auto colLeft = left->find("c");//left->xPath("/c"); - const auto colRight = right->find("c");//right->xPath("/c"); + const auto colLeft = left->find("c"); + const auto colRight = right->find("c"); switch (colLeft->at(column)->type()) { diff --git a/src/result.h b/src/result.h index c48388b..404c423 100644 --- a/src/result.h +++ b/src/result.h @@ -16,9 +16,9 @@ namespace openset { namespace result { - const int keyDepth = 8; + const int keyDepth = 4; - enum class ResultTypes_e : int + enum class ResultTypes_e : int8_t { Int = 0, Double = 1, @@ -41,8 +41,11 @@ namespace openset struct RowKey { +#pragma pack(push,1) + //size_t hash; int64_t key[keyDepth]; ResultTypes_e types[keyDepth]; +#pragma pack(pop) RowKey() = default; @@ -52,18 +55,18 @@ namespace openset key[1] = NONE; key[2] = NONE; key[3] = NONE; - key[4] = NONE; - key[5] = NONE; - key[6] = NONE; - key[7] = NONE; + //key[4] = NONE; + //key[5] = NONE; + //key[6] = NONE; + //key[7] = NONE; types[0] = ResultTypes_e::Int; types[1] = ResultTypes_e::Int; types[2] = ResultTypes_e::Int; types[3] = ResultTypes_e::Int; - types[4] = ResultTypes_e::Int; - types[5] = ResultTypes_e::Int; - types[6] = ResultTypes_e::Int; - types[7] = ResultTypes_e::Int; + //types[4] = ResultTypes_e::Int; + //types[5] = ResultTypes_e::Int; + //types[6] = ResultTypes_e::Int; + //types[7] = ResultTypes_e::Int; } void clearFrom(const int index) @@ -72,6 +75,16 @@ namespace openset *iter = NONE; } + void makeReady() + { + //hash = MakeHash(reinterpret_cast(key), keyDepth * sizeof(int64_t)); + } + + size_t makeHash() const + { + return MakeHash(reinterpret_cast(key), keyDepth * sizeof(int64_t)); + } + RowKey keyFrom(const int index) const { auto newKey { *this }; @@ -116,6 +129,29 @@ namespace openset } return false; } + + inline bool operator>(const RowKey& left, const RowKey& right) + { + for (auto i = 0; i < keyDepth; ++i) + { + if (left.key[i] < right.key[i]) + return false; + if (left.key[i] > right.key[i]) + return true; + } + return false; + } + + inline bool operator<=(const RowKey& left, const RowKey& right) + { + for (auto i = 0; i < keyDepth; ++i) + { + if (left.key[i] > right.key[i]) + return false; + } + return true; + } + } } @@ -125,9 +161,11 @@ namespace std template <> struct hash { - size_t operator()(const openset::result::RowKey key) const noexcept + size_t operator()(const openset::result::RowKey& key) const noexcept { - auto hash = key.key[0]; + return key.makeHash(); + //return key.hash; + /*auto hash = key.key[0]; auto count = 1; for (auto iter = key.key + 1; iter < key.key + openset::result::keyDepth; ++iter, ++count) { @@ -135,7 +173,7 @@ namespace std return hash; hash = (hash << count) + key.key[1]; } - return hash; + return hash;*/ } }; } @@ -181,6 +219,7 @@ namespace openset vector sortedResult; HeapStack mem; int64_t resultWidth { 1 }; + int64_t resultBytes { 8 }; CriticalSection cs; @@ -324,6 +363,7 @@ namespace openset int64_t bucket, int64_t forceMin = std::numeric_limits::min(), int64_t forceMax = std::numeric_limits::min()); + static void flatColumnMultiSort(cjson* doc, ResultSortOrder_e sort, int column); static void jsonResultSortByColumn(cjson* doc, ResultSortOrder_e sort, int column); static void jsonResultSortByGroup(cjson* doc, ResultSortOrder_e sort); diff --git a/src/rpc_insert.cpp b/src/rpc_insert.cpp index 75b4a83..7cf5f59 100644 --- a/src/rpc_insert.cpp +++ b/src/rpc_insert.cpp @@ -101,7 +101,7 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa for (auto row : rows) { - const auto personNode = row->xPath("/id"); + const auto personNode = row->find("id"); if (!personNode) { diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index 3c82030..eccb28a 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -65,7 +65,7 @@ shared_ptr forkQuery( const openset::query::ScriptMode_e scriptMode, const ResultSortMode_e sortMode = ResultSortMode_e::column, const ResultSortOrder_e sortOrder = ResultSortOrder_e::Desc, - const int sortColumn = 0, + const vector sortColumn = {0}, const int trim = -1, const int64_t bucket = 0, const int64_t forceMin = std::numeric_limits::min(), @@ -100,6 +100,8 @@ shared_ptr forkQuery( const auto setCount = resultSetCount ? resultSetCount : 1; + const auto dispatchStartTime = Now(); + // call all nodes and gather results - JSON is what's coming back // NOTE - it would be fully possible to flatten results to binary auto result = openset::globals::mapper->dispatchCluster( @@ -133,6 +135,9 @@ shared_ptr forkQuery( retryCount + 1); } + const auto gatherStartTime = Now(); + + std::vector resultSets; for (auto& r : result.responses) { @@ -188,17 +193,31 @@ shared_ptr forkQuery( } } + const auto gatherEndTime = Now(); + if (scriptMode == openset::query::ScriptMode_e::customers) { auto resultJson = make_shared(); + const auto toJsonStartTime = Now(); ResultMuxDemux::resultFlatColumnsToJson(resultColumnCount, setCount, resultSets, resultJson.get()); + const auto toJsonEndTime = Now(); // free up the responses openset::globals::mapper->releaseResponses(result); // clean up all those resultSet* for (auto res : resultSets) delete res; - Logger::get().info("RpcQuery on " + table->getName()); + + const auto trimStartTime = Now(); + //ResultMuxDemux::flatColumnMultiSort(resultJson.get(), sortOrder, sortColumn[0]); + ResultMuxDemux::jsonResultTrim(resultJson.get(), trim); // local function to fill Meta data in result JSON + const auto trimEndTime = Now(); + + cout << "dispatch: " << (dispatchEndTime - dispatchStartTime) << + " gather: " << (gatherEndTime - gatherStartTime) << + " json: " << (toJsonEndTime - toJsonStartTime) << + " trim: " << (trimEndTime - trimStartTime) << endl; + return resultJson; } @@ -220,7 +239,7 @@ shared_ptr forkQuery( ResultMuxDemux::jsonResultSortByGroup(resultJson.get(), sortOrder); break; case ResultSortMode_e::column: - ResultMuxDemux::jsonResultSortByColumn(resultJson.get(), sortOrder, sortColumn); + ResultMuxDemux::jsonResultSortByColumn(resultJson.get(), sortOrder, sortColumn[0]); break; default: ; } @@ -524,7 +543,7 @@ void RpcQuery::report(const openset::web::MessagePtr& message, const RpcMapping& queryMacros.scriptMode, sortMode, sortOrder, - sortColumn, + {sortColumn}, trimSize); if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); @@ -750,6 +769,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM // validate that sortKeys are in the select statement const auto sortKeyParts = split(sortKeyString, ','); + std::vector sortOrders; for (auto key : sortKeyParts) { @@ -766,6 +786,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM found = true; queryMacros.vars.autoGrouping.push_back(index); + sortOrders.push_back(index); break; } @@ -844,7 +865,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM queryMacros.scriptMode, sortMode, sortOrder, - sortColumn, + sortOrders, trimSize); if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); @@ -1516,7 +1537,7 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat query::ScriptMode_e::report, ResultSortMode_e::column, sortOrder, - 0, + {0}, trimSize); if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); @@ -1936,7 +1957,7 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma openset::query::ScriptMode_e::report, sortMode, sortOrder, - 0, + {0}, trimSize, bucket, forceMin, diff --git a/src/table.h b/src/table.h index b2295eb..2afceb5 100644 --- a/src/table.h +++ b/src/table.h @@ -20,6 +20,7 @@ namespace openset class Database; class PropertyMapping; class TablePartitioned; + class AttributeBlob; struct SegmentTtl_s { From b3a77c2744679d498d8b0ea7953583c90306f9ef Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Wed, 20 Nov 2019 11:06:54 -0500 Subject: [PATCH 05/31] fixed code, tests now pass --- src/customer_props.cpp | 108 ++++++++++++++++++++++++++++++++++------- src/grid.cpp | 11 +++-- test/test_db.h | 6 +-- 3 files changed, 100 insertions(+), 25 deletions(-) diff --git a/src/customer_props.cpp b/src/customer_props.cpp index d3b4129..cd95881 100644 --- a/src/customer_props.cpp +++ b/src/customer_props.cpp @@ -119,20 +119,18 @@ char* openset::db::CustomerProps::encodeCustomerProps(openset::db::Table* table) *mem.newInt32() = prop.second.len(); for (auto& item : *var.getSet()) { - const auto text = var.getString(); - const auto buffer = mem.newPtr(text.length()); - // text length + const auto text = item.getString(); *mem.newInt32() = text.length(); - memcpy(buffer, text.c_str(),text.length()); + const auto buffer = mem.newPtr(text.length()); + memcpy(buffer, text.c_str(), text.length()); } } else { const auto text = var.getString(); - const auto buffer = mem.newPtr(text.length()); - // text length *mem.newInt32() = text.length(); - memcpy(buffer, text.c_str(),text.length()); + const auto buffer = mem.newPtr(text.length()); + memcpy(buffer, text.c_str(), text.length()); } break; } @@ -277,23 +275,50 @@ void openset::db::CustomerProps::decodeCustomerProps(openset::db::Table* table, } } -int64_t cvarToDB(cvar& value) +int64_t cvarToDB(openset::db::PropertyTypes_e type, const cvar& value) { - switch (value.typeOf()) + + switch (type) { - case cvar::valueType::INT32: case cvar::valueType::INT64: + case openset::db::PropertyTypes_e::intProp: return value.getInt64(); - case cvar::valueType::FLT: case cvar::valueType::DBL: + case openset::db::PropertyTypes_e::doubleProp: return value.getDouble() * 10000; - case cvar::valueType::STR: - return MakeHash(value.getString()); - case cvar::valueType::BOOL: + case openset::db::PropertyTypes_e::boolProp: return value.getBool() ? 1 : 0; + case openset::db::PropertyTypes_e::textProp: + return MakeHash(value.getString()); default: return NONE; } } +void listFix(cvar& value) +{ + if (value.typeOf() == cvar::valueType::DICT) + { + cvar set; + set.set(); + + for (auto& item : *value.getDict()) + set += std::move(item.first); + + value = set; + return; + } + if (value.typeOf() == cvar::valueType::LIST) + { + cvar set; + set.set(); + + for (auto& item : *value.getList()) + set += std::move(item); + + value = set; + return; + } +} + void openset::db::CustomerProps::setProp(openset::db::Table* table, int propIndex, cvar& value) { const auto propInfo = table->getProperties()->getProperty(propIndex); @@ -301,21 +326,68 @@ void openset::db::CustomerProps::setProp(openset::db::Table* table, int propInde if (!propInfo || !propInfo->isCustomerProperty) return; + if (propInfo->isSet) + listFix(value); + if (auto& iter = props.find(propIndex); iter != props.end()) { - if (iter->second != value) + if (propInfo->isSet) + { + if (iter->second.typeOf() == cvar::valueType::SET) + { + for (auto& element : *iter->second.getSet()) + { + if (!value.contains(element) && element != NONE) + { + oldValues.emplace_back(propIndex, cvarToDB(propInfo->type, element)); + propsChanged = true; + } + } + + for (auto& element : *value.getSet()) + { + if (!iter->second.contains(element)) + { + newValues.emplace_back(propIndex, cvarToDB(propInfo->type, element)); + propsChanged = true; + } + } + } + + iter->second = value; + } + else if (iter->second != value) { propsChanged = true; - oldValues.emplace_back(propIndex, cvarToDB(iter->second)); + oldValues.emplace_back(propIndex, cvarToDB(propInfo->type, iter->second)); + newValues.emplace_back(propIndex, cvarToDB(propInfo->type, value)); iter->second = value; } } else { - props[propIndex] = value; propsChanged = true; + if (propInfo->isSet) + { + if (value.typeOf() == cvar::valueType::SET) + { + for (auto& element : *value.getSet()) + newValues.emplace_back(propIndex, cvarToDB(propInfo->type, element)); + props[propIndex] = value; + } + else + { + props[propIndex] = NONE; + } + } + else + { + props[propIndex] = value; + newValues.emplace_back(propIndex, cvarToDB(propInfo->type, value)); + } + + newValues.emplace_back(propIndex, NONE); } - newValues.emplace_back(propIndex, cvarToDB(value)); } void openset::db::CustomerProps::setProp(openset::db::Table* table, std::string& name, cvar& value) diff --git a/src/grid.cpp b/src/grid.cpp index b47a91a..8f30e85 100644 --- a/src/grid.cpp +++ b/src/grid.cpp @@ -251,7 +251,10 @@ void Grid::setCustomerProps() attributes->setDirty(this->rawData->linId, change.first, change.second, false); for (auto &change : customerProps.getNewValues()) + { + attributes->getMake(change.first, change.second); attributes->setDirty(this->rawData->linId, change.first, change.second, true); + } } cjson Grid::toJSON() @@ -282,16 +285,16 @@ cjson Grid::toJSON() switch (propInfo->type) { case PropertyTypes_e::intProp: - propList->push(key.second.getInt64()); + propList->push(setItem.getInt64()); break; case PropertyTypes_e::doubleProp: - propList->push(key.second.getDouble()); + propList->push(setItem.getDouble()); break; case PropertyTypes_e::boolProp: - propList->push(key.second.getBool()); + propList->push(setItem.getBool()); break; case PropertyTypes_e::textProp: - propList->push(key.second.getString()); + propList->push(setItem.getString()); break; } } diff --git a/test/test_db.h b/test/test_db.h index c0a72ea..7c9bca9 100644 --- a/test/test_db.h +++ b/test/test_db.h @@ -162,15 +162,15 @@ inline Tests test_db() person.insert(e); } + auto grid = person.getGrid(); + // write back any dirty change bits from the insert parts->attributes.clearDirty(); - auto grid = person.getGrid(); - auto json = grid->toJSON(); // non-condensed // NOTE - uncomment if you want to see the results - //cout << cjson::stringify(&json, true) << endl; + cout << cjson::stringify(&json, true) << endl; std::unordered_set timeStamps; std::unordered_set referral_sources; From 414db6f6340a3f89c55fee3db2984f52be3466bb Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 21 Nov 2019 15:40:25 -0500 Subject: [PATCH 06/31] customer indexes are working at a basic level --- CMakeLists.txt | 2 + lib/mem/blhash.h | 121 +++++++++++++++++++++++++----------- src/attributes.cpp | 38 ++++++++--- src/attributes.h | 12 ++-- src/customer_index.cpp | 11 ++++ src/customer_index.h | 97 +++++++++++++++++++++++++++++ src/customers.cpp | 57 ++++------------- src/customers.h | 3 +- src/grid.cpp | 12 ++-- src/oloop.h | 102 +++++++++++++++--------------- src/oloop_customer_list.cpp | 23 ++++++- src/oloop_customer_list.h | 77 ++++++++++++----------- src/querycommon.h | 2 +- src/queryinterpreter.cpp | 10 --- src/queryinterpreter.h | 2 - src/result.h | 13 +++- src/rpc_table.cpp | 75 +++++++++++++++++++++- src/table.cpp | 6 ++ src/table.h | 30 ++++++--- 19 files changed, 478 insertions(+), 215 deletions(-) create mode 100644 src/customer_index.cpp create mode 100644 src/customer_index.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d045589..e1a46c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,6 +108,8 @@ set(SOURCE_FILES src/attributes.h src/config.cpp src/config.h + src/customer_index.cpp + src/customer_index.h src/customer_props.cpp src/customer_props.h src/database.cpp diff --git a/lib/mem/blhash.h b/lib/mem/blhash.h index d89152b..36cdf94 100644 --- a/lib/mem/blhash.h +++ b/lib/mem/blhash.h @@ -99,8 +99,12 @@ class ShortPtrPool template class BinaryListHash { -#pragma pack(push,1) +public: + using FilterCB = std::function; + +private: +#pragma pack(push,1) struct bl_element_s { tBranch valueWord; @@ -162,12 +166,37 @@ class BinaryListHash return static_cast(&words); } - tKey getKey() + tKey getKey() { - return *reinterpret_cast(words); + tKey key; + memcpy(&key, words, sizeof(tKey)); + return key; } - }; + tKey* getKeyPtr() + { + return reinterpret_cast(words); + } + + + bool operator >(const overlay &right) const + { + auto leftPtr = const_cast(words + elements - 1); + auto rightPtr = const_cast(right.words + elements - 1); + + while (leftPtr >= words) + { + if (*leftPtr > *rightPtr) + return true; + if (*leftPtr < *rightPtr) + return false; + --leftPtr; + --rightPtr; + } + return false; + } + + }; #pragma pack(pop) bl_array_s* root; // root node for hash tree @@ -175,9 +204,17 @@ class BinaryListHash int32_t distinct {0}; + // serialize variables (passing them as params is just really slow) + overlay serializeOver; + overlay serializeStart; + int serializeLimit; + FilterCB serializeCB; + public: - using HashVector = std::vector>; + using ResultItem = std::pair; + using HashVector = std::vector; + HashVector serializeList; BinaryListHash() : root(nullptr) @@ -194,7 +231,7 @@ class BinaryListHash } // debug - dumps usage data for the memory manager - // + // // shows how many cached/recycled lists are available // void debug() @@ -241,7 +278,7 @@ class BinaryListHash // make a space in current node node = makeGap(node, index, lastNode, lastIndex); - + if (iter == words) // we are at the end { memcpy(static_cast(&node->nodes[index].next), &value, sizeof(tVal)); @@ -276,7 +313,7 @@ class BinaryListHash // // if (hash.get( someKey, someValue )) // { - // //do something with some val. + // //do something with some val. // }; // // save a check then a second lookup to get @@ -311,7 +348,7 @@ class BinaryListHash } }; - // exists - is key in hash + // exists - is key in hash // bool exists(tKey key) { @@ -341,44 +378,54 @@ class BinaryListHash } }; - - std::vector> serialize() + + HashVector& serialize(tKey& start, int limit, FilterCB filterCallBack) { + tKey key; + serializeOver.set(&key); + serializeStart.set(&start); - tBranch *iter; - int64_t index, lastIndex; + serializeList.clear(); + serializeList.reserve(distinct); - tKey key; - overlay over(&key); - - HashVector result; - result.reserve(distinct); + serializeLimit = limit; + serializeCB = filterCallBack; - serializeRecurse(root, result, over, 0); + serializeRecurse(root, 0); - return result; + return serializeList; } private: - static void serializeRecurse(bl_array_s* node, HashVector& result, overlay& over, int depth) + void serializeRecurse(bl_array_s* node, int depth) { for (auto idx = 0; idx < node->used; ++idx) { - over.words[over.elements - 1 - depth] = node->nodes[idx].valueWord; + if (serializeLimit == -1) + return; + + serializeOver.words[serializeOver.elements - 1 - depth] = node->nodes[idx].valueWord; - if (depth == over.elements - 1) + if (depth == serializeOver.elements - 1) { - tVal value; // = reinterpret_cast(node->nodes[idx].next); - memcpy(&value, &node->nodes[idx].next, sizeof(tVal)); - result.emplace_back(over.getKey(), value); + if (serializeOver > serializeStart && + serializeCB(serializeOver.getKeyPtr(), reinterpret_cast(&node->nodes[idx].next))) + { + serializeList.emplace_back(*serializeOver.getKeyPtr(), *reinterpret_cast(&node->nodes[idx].next)); + if (serializeList.size() == serializeLimit) + { + serializeLimit = -1; + return; + } + } } else { - serializeRecurse(reinterpret_cast(node->nodes[idx].next), result, over, depth+1); + serializeRecurse(reinterpret_cast(node->nodes[idx].next), depth + 1); } - } - + } + } // this is a fairly common binary search. Google will find you serveral @@ -405,16 +452,16 @@ class BinaryListHash return valWord; // on a short list scanning sequentially is more efficient - // because the data is fits in a cache line. - // iterating the first dozen or is most efficient + // because the data is fits in a cache line. + // iterating the first dozen or is most efficient // and is quicker than list sub-division on my i7 type processor. - // Some of the newer server processors might benefit from a + // Some of the newer server processors might benefit from a // higher setting. // // bl_element_s = 10 bytes - // cache line = 64 bytes. + // cache line = 64 bytes. // 6 elements per cache line. - // + // // testing showed a positive gain for on my processor // at two cache lines worth of elements. @@ -453,7 +500,7 @@ class BinaryListHash else return mid; // found - mid = (first + last) >> 1; // usually written like first + ((last - first) / 2) + mid = (first + last) >> 1; // usually written like first + ((last - first) / 2) } return -(first + 1); @@ -464,7 +511,7 @@ class BinaryListHash { auto length = 1 << static_cast(node->pageBits); - // this node is full, so we will make a new one, and copy + // this node is full, so we will make a new one, and copy if (node->used == length) { bl_array_s* newNode = createNode(node->pageBits + 1); @@ -491,7 +538,7 @@ class BinaryListHash return newNode; } - // mem move will copy overlapped. + // mem move will copy overlapped. if (index < node->used) memmove(&node->nodes[index + 1], &node->nodes[index], sizeof(bl_element_s) * (node->used - index)); diff --git a/src/attributes.cpp b/src/attributes.cpp index 3e4e6ba..183b6b4 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -31,10 +31,15 @@ Attributes::~Attributes() } } -void Attributes::addChange(const int32_t propIndex, const int64_t value, const int32_t linearId, const bool state) +void Attributes::addChange(const int64_t customerId, const int32_t propIndex, const int64_t value, const int32_t linearId, const bool state) { const auto key = attr_key_s{ propIndex, value }; + if (state) + customerIndexing.insert(propIndex, customerId, linearId, value); + else + customerIndexing.erase(propIndex, customerId, value); + if (auto changeRecord = changeIndex.find(key); changeRecord != changeIndex.end()) { changeRecord->second.emplace_back(Attr_changes_s{linearId, state}); @@ -44,9 +49,20 @@ void Attributes::addChange(const int32_t propIndex, const int64_t value, const i changeIndex.emplace(key, std::vector{Attr_changes_s{linearId, state}}); } - Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) { + + if (auto& res = propertyIndex.emplace(attr_key_s{ propIndex, value }, nullptr); res.second == true) + { + const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); + res.first->second = attr; + return attr; + } + else + { + return res.first->second; + } + /* if (auto attrPair = propertyIndex.find({ propIndex, value }); attrPair == propertyIndex.end()) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); @@ -57,22 +73,23 @@ Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) { return attrPair->second; } + */ } Attr_s* Attributes::getMake(const int32_t propIndex, const string& value) { const auto valueHash = MakeHash(value); - if (auto attrPair = propertyIndex.find({ propIndex, valueHash }); attrPair == propertyIndex.end()) + if (auto& res = propertyIndex.emplace(attr_key_s{ propIndex, valueHash }, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); attr->text = blob->storeValue(propIndex, value); - propertyIndex.insert({attr_key_s{ propIndex, valueHash }, attr}); + res.first->second = attr; return attr; } else { - return attrPair->second; + return res.first->second; } } @@ -97,9 +114,9 @@ void Attributes::drop(const int32_t propIndex, const int64_t value) propertyIndex.erase({ propIndex, value }); } -void Attributes::setDirty(const int32_t linId, const int32_t propIndex, const int64_t value, const bool on) +void Attributes::setDirty(const int64_t customerId, const int32_t linId, const int32_t propIndex, const int64_t value, const bool on) { - addChange(propIndex, value, linId, on); + addChange(customerId, propIndex, value, linId, on); } void Attributes::clearDirty() @@ -277,6 +294,13 @@ Attributes::AttrList Attributes::getPropertyValues(const int32_t propIndex, cons return result; } +void Attributes::createCustomerPropIndexes() +{ + const auto props = table->getCustomerIndexProps(); + for (auto prop : *props) + customerIndexing.createIndex(prop); +} + void Attributes::serialize(HeapStack* mem) { // grab 8 bytes, and set the block type at that address diff --git a/src/attributes.h b/src/attributes.h index 2efcebb..249d2cf 100644 --- a/src/attributes.h +++ b/src/attributes.h @@ -8,6 +8,7 @@ #include "robin_hood.h" #include "dbtypes.h" #include "indexbits.h" +#include "customer_index.h" using namespace std; @@ -132,8 +133,9 @@ namespace openset::db using ChangeIndex = robin_hood::unordered_map, robin_hood::hash>; using AttrPair = pair; - ColumnIndex propertyIndex;//{ ringHint_e::lt_5_million }; - ChangeIndex changeIndex;//{ ringHint_e::lt_5_million }; + ColumnIndex propertyIndex; // prop/value store + ChangeIndex changeIndex; // cache for property changes + CustomerIndexing customerIndexing; // indexes for customer_list sort ordering Table* table; AttributeBlob* blob; @@ -143,7 +145,7 @@ namespace openset::db explicit Attributes(const int partition, Table* table, AttributeBlob* attributeBlob, Properties* properties); ~Attributes(); - void addChange(const int32_t propIndex, const int64_t value, const int32_t linearId, const bool state); + void addChange(const int64_t customerId, const int32_t propIndex, const int64_t value, const int32_t linearId, const bool state); Attr_s* getMake(const int32_t propIndex, const int64_t value); Attr_s* getMake(const int32_t propIndex, const string& value); @@ -153,7 +155,7 @@ namespace openset::db void drop(const int32_t propIndex, const int64_t value); - void setDirty(const int32_t linId, const int32_t propIndex, const int64_t value, const bool on = true); + void setDirty(const int64_t customerId, const int32_t linId, const int32_t propIndex, const int64_t value, const bool on); void clearDirty(); // replace an indexes bits with new ones, used when generating segments @@ -169,6 +171,8 @@ namespace openset::db return (partition == other.partition); } + void createCustomerPropIndexes(); + void serialize(HeapStack* mem); int64_t deserialize(char* mem); }; diff --git a/src/customer_index.cpp b/src/customer_index.cpp new file mode 100644 index 0000000..82efeb8 --- /dev/null +++ b/src/customer_index.cpp @@ -0,0 +1,11 @@ +#include "customer_index.h" + +openset::db::CustomerIndexList openset::db::CustomerPropIndex::serialize( + int64_t startCustomer, + int64_t startValue, + int limit, + const std::function& filterCallback) +{ + SortKeyOneProp_s startKey(startCustomer, startValue); + return index.serialize(startKey, limit, filterCallback); +} diff --git a/src/customer_index.h b/src/customer_index.h new file mode 100644 index 0000000..2581f41 --- /dev/null +++ b/src/customer_index.h @@ -0,0 +1,97 @@ +#pragma once + +#include "common.h" +#include "mem/blhash.h" +#include "robin_hood.h" + +namespace openset +{ + namespace db + { + struct SortKeyOneProp_s + { + int64_t customerId; + int64_t value; + + SortKeyOneProp_s() = default; + + SortKeyOneProp_s(const int64_t customerId, const int64_t value) : + customerId(customerId), + value(value) + {} + }; + + using CustomerIndexList = std::vector>; + + class CustomerPropIndex + { + BinaryListHash index; + + public: + CustomerPropIndex() = default; + ~CustomerPropIndex() = default; + + void insert(int64_t customerId, int linId, int64_t value) + { + index.set(SortKeyOneProp_s{ customerId, value}, linId); + } + + void erase(int64_t customerId, int64_t value) + { + // delete from `index` + } + + CustomerIndexList serialize( + int64_t startCustomer, + int64_t startValue, + int limit, + const std::function& filterCallback); + }; + + class CustomerIndexing + { + robin_hood::unordered_map> indexes; + + public: + CustomerIndexing() = default; + ~CustomerIndexing() + { + for (auto& index : indexes) + delete index.second; + } + + void createIndex(int propIndex) + { + if (!indexes.count(propIndex)) + indexes.emplace(propIndex, new CustomerPropIndex()); + } + + void insert(int propIndex, int64_t customerId, int linId, int64_t value) + { + if (value == NONE) + return; + + if (auto& iter = indexes.find(propIndex); iter != indexes.end()) + iter->second->insert(customerId, linId, value); + } + + void erase(int propIndex, int64_t customerId, int64_t value) + { + if (auto& iter = indexes.find(propIndex); iter != indexes.end()) + iter->second->erase(customerId, value); + } + + CustomerIndexList getListAscending( + int propIndex, + int64_t startCustomer, + int64_t startValue, + int limit, + const std::function& filterCallback) + { + if (auto& iter = indexes.find(propIndex); iter != indexes.end()) + return iter->second->serialize(startCustomer, startValue, limit, filterCallback); + return {}; + } + }; + }; +}; \ No newline at end of file diff --git a/src/customers.cpp b/src/customers.cpp index a197066..b4d4fc3 100644 --- a/src/customers.cpp +++ b/src/customers.cpp @@ -55,39 +55,24 @@ PersonData_s* Customers::getCustomerByLIN(const int64_t linId) PersonData_s* Customers::createCustomer(int64_t userId) { - const auto person = getCustomerByID(userId); - - auto isReuse = false; - auto linId = static_cast(customerLinear.size()); - - if (!person && !reuse.empty()) - { - linId = reuse.back(); - reuse.pop_back(); - isReuse = true; - } - - if (!person) // not found, lets create + if (auto& res = customerMap.emplace(userId, 0); res.second == true) { - auto newUser = recast(PoolMem::getPool().getPtr(sizeof(PersonData_s))); - + const auto newUser = recast(PoolMem::getPool().getPtr(sizeof(PersonData_s))); newUser->id = userId; - newUser->linId = linId; + newUser->linId = static_cast(customerLinear.size());; newUser->idBytes = 0; newUser->bytes = 0; newUser->comp = 0; newUser->props = nullptr; - if (!isReuse) - customerLinear.push_back(newUser); - - customerMap[userId] = newUser->linId; - + res.first->second = newUser->linId; + customerLinear.emplace_back(newUser); return newUser; + } + else + { + return customerLinear.at(res.first->second); } - - // check for match/collision - return person; } PersonData_s* Customers::createCustomer(string userIdString) @@ -104,15 +89,7 @@ PersonData_s* Customers::createCustomer(string userIdString) { const auto person = getCustomerByID(hashId); - auto isReuse = false; - auto linId = static_cast(customerLinear.size()); - - if (!person && !reuse.empty()) - { - linId = reuse.back(); - reuse.pop_back(); - isReuse = true; - } + const auto linId = static_cast(customerLinear.size()); if (!person) // not found, lets create { @@ -126,10 +103,8 @@ PersonData_s* Customers::createCustomer(string userIdString) newUser->props = nullptr; newUser->setIdStr(userIdString); - if (!isReuse) - customerLinear.push_back(newUser); - customerMap[hashId] = newUser->linId; + customerLinear.emplace_back(newUser); return newUser; } @@ -165,8 +140,6 @@ void Customers::drop(const int64_t userId) customerLinear[info->linId] = nullptr; - reuse.push_back(info->linId); - PoolMem::getPool().freePtr(info); } @@ -213,7 +186,6 @@ int64_t Customers::deserialize(char* mem) customerMap.clear(); customerLinear.clear(); customerLinear.reserve(sectionLength); - reuse.clear(); // end is the length of the block after the 16 bytes of header const auto end = read + sectionLength; @@ -238,12 +210,5 @@ int64_t Customers::deserialize(char* mem) read += size; } - for (auto i = 0; i < static_cast(customerLinear.size()); ++i) - { - if (!customerLinear[i]) - reuse.push_back(i); - } - - return sectionLength + 16; } diff --git a/src/customers.h b/src/customers.h index b707798..6e3b506 100644 --- a/src/customers.h +++ b/src/customers.h @@ -23,9 +23,8 @@ namespace openset public: robin_hood::unordered_map> customerMap; vector customerLinear; - vector reuse; int partition; - public: + explicit Customers(int partition); ~Customers(); diff --git a/src/grid.cpp b/src/grid.cpp index 8f30e85..e48dfcd 100644 --- a/src/grid.cpp +++ b/src/grid.cpp @@ -248,12 +248,12 @@ void Grid::setCustomerProps() rawData->props = customerProps.encodeCustomerProps(table); for (auto &change : customerProps.getOldValues()) - attributes->setDirty(this->rawData->linId, change.first, change.second, false); + attributes->setDirty(this->rawData->id, this->rawData->linId, change.first, change.second, false); for (auto &change : customerProps.getNewValues()) { attributes->getMake(change.first, change.second); - attributes->setDirty(this->rawData->linId, change.first, change.second, true); + attributes->setDirty(this->rawData->id, this->rawData->linId, change.first, change.second, true); } } @@ -707,7 +707,7 @@ bool Grid::cull() diff.iterRemoved( [&](int32_t col, int64_t val) { - attributes->setDirty(this->rawData->linId, col, val, false); + attributes->setDirty(this->rawData->id, this->rawData->linId, col, val, false); } ); @@ -759,7 +759,7 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins hasEventProp = true; attributes->getMake(schemaCol, NONE); - attributes->setDirty(this->rawData->linId, schemaCol, NONE); + attributes->setDirty(this->rawData->id, this->rawData->linId, schemaCol, NONE, true); auto tempVal = NONE; string tempString; @@ -936,7 +936,7 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins else attributes->getMake(schemaCol, tempVal); - attributes->setDirty(this->rawData->linId, schemaCol, tempVal); + attributes->setDirty(this->rawData->id, this->rawData->linId, schemaCol, tempVal, true); setData.push_back(tempVal); } @@ -961,7 +961,7 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins else attributes->getMake(schemaCol, tempVal); - attributes->setDirty(this->rawData->linId, schemaCol, tempVal); + attributes->setDirty(this->rawData->id, this->rawData->linId, schemaCol, tempVal, true); if (propInfo->isSet) { diff --git a/src/oloop.h b/src/oloop.h index 5c38567..c886d36 100644 --- a/src/oloop.h +++ b/src/oloop.h @@ -4,56 +4,56 @@ namespace openset { - namespace async - { - class AsyncLoop; - - enum class oloopState_e - { - running, - done, - clear - }; - - enum class oloopPriority_e - { - background, - realtime - }; - - class OpenLoop - { - public: - oloopPriority_e priority; - oloopState_e state; + namespace async + { + class AsyncLoop; + + enum class oloopState_e + { + running, + done, + clear + }; + + enum class oloopPriority_e + { + background, + realtime + }; + + class OpenLoop + { + public: + oloopPriority_e priority; + oloopState_e state; std::string owningTable; - int64_t runAt; - int64_t runStart; // time or call to run - bool prepared; - AsyncLoop* loop; - - explicit OpenLoop(std::string owningTable, oloopPriority_e priority = oloopPriority_e::background); - virtual ~OpenLoop(); - void assignLoop(AsyncLoop* loop); - - // if there are realtime priority cells in this - // partition, bypass will be true - bool inBypass() const; - - void scheduleFuture(uint64_t milliFromNow); - void scheduleAt(uint64_t milliRunAt); - - void spawn(OpenLoop* newCell) const; - void suicide(); - - bool sliceComplete() const; - virtual bool checkCondition(); - virtual bool checkTimer(const int64_t milliNow); - - // these must be overridden (preferrably final) in derived classes - virtual void prepare() = 0; - virtual bool run() = 0; - virtual void partitionRemoved() = 0; // allow for error handling if a partition is removed - }; - }; + int64_t runAt; + int64_t runStart; // time or call to run + bool prepared; + AsyncLoop* loop; + + explicit OpenLoop(std::string owningTable, oloopPriority_e priority = oloopPriority_e::background); + virtual ~OpenLoop(); + void assignLoop(AsyncLoop* loop); + + // if there are realtime priority cells in this + // partition, bypass will be true + bool inBypass() const; + + void scheduleFuture(uint64_t milliFromNow); + void scheduleAt(uint64_t milliRunAt); + + void spawn(OpenLoop* newCell) const; + void suicide(); + + bool sliceComplete() const; + virtual bool checkCondition(); + virtual bool checkTimer(const int64_t milliNow); + + // these must be overridden (preferrably final) in derived classes + virtual void prepare() = 0; + virtual bool run() = 0; + virtual void partitionRemoved() = 0; // allow for error handling if a partition is removed + }; + }; }; diff --git a/src/oloop_customer_list.cpp b/src/oloop_customer_list.cpp index 9c77800..25682fc 100644 --- a/src/oloop_customer_list.cpp +++ b/src/oloop_customer_list.cpp @@ -117,6 +117,23 @@ void OpenLoopCustomerList::prepare() person.setSessionTime(macros.sessionTime); + const auto filter = [&](SortKeyOneProp_s* key, int* value) -> bool { + return true; + }; + + + auto propIndex = parts->table->getProperties()->getProperty("score")->idx; + + indexedList = std::move(parts->attributes.customerIndexing.getListAscending( + propIndex, + 10000, + 4, + 1000, + filter + )); + + iter = indexedList.begin(); + startTime = Now(); } @@ -129,7 +146,7 @@ bool OpenLoopCustomerList::run() // are we done? This will return the index of the // next set bit until there are no more, or maxLinId is met - if (interpreter->error.inError() || !index->linearIter(currentLinId, maxLinearId)) + if (interpreter->error.inError() || iter == indexedList.end()) { result->setAccTypesFromMacros(macros); @@ -147,7 +164,7 @@ bool OpenLoopCustomerList::run() return false; } - if (const auto personData = parts->people.getCustomerByLIN(currentLinId); personData != nullptr) + if (const auto personData = parts->people.getCustomerByLIN(iter->second); personData != nullptr) { ++runCount; person.mount(personData); @@ -155,6 +172,8 @@ bool OpenLoopCustomerList::run() interpreter->mount(&person); interpreter->exec(); // run the script on this customer - do some magic } + + ++iter; } } diff --git a/src/oloop_customer_list.h b/src/oloop_customer_list.h index 29a44f0..ad6a24d 100644 --- a/src/oloop_customer_list.h +++ b/src/oloop_customer_list.h @@ -10,45 +10,48 @@ namespace openset { - namespace db - { - class Table; - class TablePartitioned; - }; + namespace db + { + class Table; + class TablePartitioned; + }; - namespace async - { - class OpenLoopCustomerList : public OpenLoop - { - public: - openset::query::Macro_s macros; - ShuttleLambda* shuttle; - openset::db::Database::TablePtr table; - openset::db::TablePartitioned* parts; - int64_t maxLinearId; - int64_t currentLinId; - Customer person; - openset::query::Interpreter* interpreter; - int instance; - int runCount; - int64_t startTime; - int population; - openset::query::Indexing indexing; - openset::db::IndexBits* index; - openset::result::ResultSet* result; + namespace async + { + class OpenLoopCustomerList : public OpenLoop + { + public: + openset::query::Macro_s macros; + ShuttleLambda* shuttle; + openset::db::Database::TablePtr table; + openset::db::TablePartitioned* parts; + int64_t maxLinearId; + int64_t currentLinId; + Customer person; + openset::query::Interpreter* interpreter; + int instance; + int runCount; + int64_t startTime; + int population; + openset::query::Indexing indexing; + openset::db::IndexBits* index; + openset::result::ResultSet* result; - explicit OpenLoopCustomerList( - ShuttleLambda* shuttle, - openset::db::Database::TablePtr table, - openset::query::Macro_s macros, - openset::result::ResultSet* result, - int instance); + CustomerIndexList indexedList; + CustomerIndexList::iterator iter; - ~OpenLoopCustomerList() final; + explicit OpenLoopCustomerList( + ShuttleLambda* shuttle, + openset::db::Database::TablePtr table, + openset::query::Macro_s macros, + openset::result::ResultSet* result, + int instance); - void prepare() final; - bool run() final; - void partitionRemoved() final; - }; - } + ~OpenLoopCustomerList() final; + + void prepare() final; + bool run() final; + void partitionRemoved() final; + }; + } } diff --git a/src/querycommon.h b/src/querycommon.h index fb60d11..99ede12 100644 --- a/src/querycommon.h +++ b/src/querycommon.h @@ -6,8 +6,8 @@ #include #include "errors.h" #include "dbtypes.h" -#include "attributes.h" #include "var/var.h" +#include "attributes.h" #include "../lib/str/strtools.h" namespace openset diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index 2f1e6c4..c5fe8c1 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -3284,16 +3284,6 @@ void openset::query::Interpreter::getGridProps() if (!macros.useProps) return; - props.dict(); - - // no props? clean props is userVars - if (propsIndex != -1 && grid) - { - for (auto varIndex : macros.props) - macros.vars.userVars[varIndex].value = NONE; - return; - } - grid->getCustomerProps(); // copy props into userVars diff --git a/src/queryinterpreter.h b/src/queryinterpreter.h index bbbcfca..313d014 100644 --- a/src/queryinterpreter.h +++ b/src/queryinterpreter.h @@ -151,8 +151,6 @@ namespace openset IndexBits* bits{ nullptr }; int maxBitPop{ 0 }; // largest linear user_id in table/partition - cvar props; - int propsIndex{ -1 }; bool propsChanged{ false }; // counters diff --git a/src/result.h b/src/result.h index 404c423..d41d41b 100644 --- a/src/result.h +++ b/src/result.h @@ -202,18 +202,27 @@ namespace openset Accumulator(const int64_t resultWidth) { + auto columnIter = columns; + + while (columnIter < columns + resultWidth) + { + columnIter->value = NONE; + columnIter->count = 0; + ++columnIter; + } + /* for (auto i = 0; i < resultWidth; ++i) { columns[i].value = NONE; columns[i].count = 0; - } + }*/ } }; class ResultSet { public: - robin_hood::unordered_map> results; + robin_hood::unordered_map results; using RowPair = pair; using RowVector = vector; vector sortedResult; diff --git a/src/rpc_table.cpp b/src/rpc_table.cpp index de745f1..47a77e8 100644 --- a/src/rpc_table.cpp +++ b/src/rpc_table.cpp @@ -92,6 +92,7 @@ void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMa } const auto sourceEventOrder = request.xPath("/event_order"); + const auto sourcePropIndexes = request.xPath("/prop_indexes"); const auto sourceSettings = request.xPath("/settings"); auto sourcePropsList = sourceProps->getNodes(); @@ -138,7 +139,6 @@ void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMa } - globals::async->suspendAsync(); auto table = database->newTable(tableName, useNumericIds); auto columns = table->getProperties(); @@ -181,6 +181,8 @@ void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMa "invalid property type" }, message); + database->dropTable(tableName); + globals::async->resumeAsync(); return; } @@ -204,6 +206,77 @@ void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMa } } + if (sourcePropIndexes) + { + + auto props = table->getCustomerIndexProps(); + auto propNodes = sourcePropIndexes->getNodes(); + + auto idx = 0; + for (auto n : propNodes) + { + const auto propName = n->getString(); + const auto propInfo = columns->getProperty(propName); + + if (!propInfo) + { + RpcError( + openset::errors::Error{ + openset::errors::errorClass_e::config, + openset::errors::errorCode_e::general_config_error, + "prop_indexes: property '" + propName + "' not found" }, + message); + database->dropTable(tableName); + globals::async->resumeAsync(); + return; + } + + if (!propInfo->isCustomerProperty) + { + RpcError( + openset::errors::Error{ + openset::errors::errorClass_e::config, + openset::errors::errorCode_e::general_config_error, + "prop_indexes: property '" + propName + "' must be configured as a 'customer_property'" }, + message); + database->dropTable(tableName); + globals::async->resumeAsync(); + return; + } + + if (propInfo->isSet) + { + RpcError( + openset::errors::Error{ + openset::errors::errorClass_e::config, + openset::errors::errorCode_e::general_config_error, + "prop_indexes: property '" + propName + "' cannot be a 'set' type" }, + message); + database->dropTable(tableName); + globals::async->resumeAsync(); + return; + } + + if (propInfo->type != PropertyTypes_e::intProp && propInfo->type != PropertyTypes_e::doubleProp) + { + RpcError( + openset::errors::Error{ + openset::errors::errorClass_e::config, + openset::errors::errorCode_e::general_config_error, + "prop_indexes: property '" + propName + "' must be an 'int' or 'double' type" }, + message); + database->dropTable(tableName); + globals::async->resumeAsync(); + return; + } + + props->push_back(propInfo->idx); + } + + table->propagateCustomerIndexes(); + + } + if (sourceSettings) { table->deserializeSettings(sourceSettings); diff --git a/src/table.cpp b/src/table.cpp index b3eb6aa..486a29f 100644 --- a/src/table.cpp +++ b/src/table.cpp @@ -100,6 +100,12 @@ void Table::releasePartitionObjects(const int32_t partition) } } +void Table::propagateCustomerIndexes() +{ + for (auto& part : partitions) + part.second->attributes.createCustomerPropIndexes(); +} + void Table::setSegmentRefresh( const std::string& segmentName, const openset::query::Macro_s& macros, diff --git a/src/table.h b/src/table.h index 2afceb5..16e34ae 100644 --- a/src/table.h +++ b/src/table.h @@ -9,6 +9,7 @@ #include "querycommon.h" #include "var/var.h" #include "property_mapping.h" +#include "robin_hood.h" using namespace std; @@ -22,6 +23,8 @@ namespace openset class TablePartitioned; class AttributeBlob; + using CustomerIndexProps = std::vector; + struct SegmentTtl_s { string segmentName; @@ -85,9 +88,15 @@ namespace openset // segmentRefresh maps CriticalSection segmentCS; // map of segments, their TTLs, last refresh times, etc - std::unordered_map segmentTTL; + using SegmentTtl = robin_hood::unordered_map>; + using SegmentRefresh = robin_hood::unordered_map>; + + SegmentTtl segmentTTL; // list of segments that auto update and the code to update them - std::unordered_map segmentRefresh; + SegmentRefresh segmentRefresh; + + // customer list ordering indexes + CustomerIndexProps indexedProps; // global variables CriticalSection globalVarCS; @@ -98,9 +107,9 @@ namespace openset PropertyMapping propertyMap; openset::revent::MessageBroker messages; - using EventOrderMapStr = std::unordered_map; - using EventOrderMapHash = std::unordered_map; - using PartitionMap = unordered_map; + using EventOrderMapStr = robin_hood::unordered_map>; + using EventOrderMapHash = robin_hood::unordered_map>; + using PartitionMap = robin_hood::unordered_map>; using ZombiePartitions = std::queue; EventOrderMapStr eventOrderStrings; @@ -142,6 +151,8 @@ namespace openset TablePartitioned* getPartitionObjects(const int32_t partition, const bool create); void releasePartitionObjects(const int32_t partition); + void propagateCustomerIndexes(); + int64_t getSessionTime() const { return sessionTime; @@ -162,6 +173,11 @@ namespace openset return &eventOrderInts; } + CustomerIndexProps* getCustomerIndexProps() + { + return &indexedProps; + } + EventOrderMapStr* getEventOrderStrings() { return &eventOrderStrings; @@ -199,12 +215,12 @@ namespace openset return &segmentCS; } - std::unordered_map* getSegmentTTL() + SegmentTtl* getSegmentTTL() { return &segmentTTL; } - std::unordered_map* getSegmentRefresh() + SegmentRefresh* getSegmentRefresh() { return &segmentRefresh; } From cd6b5ba059aa5ac9328120ae59008a88caceacad Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Fri, 22 Nov 2019 23:09:25 -0500 Subject: [PATCH 07/31] IndexBit caching to reduce index thrashing on insert --- lib/cjson/cjson.cpp | 28 ++++++ lib/cjson/cjson.h | 6 +- lib/mem/blhash.h | 25 +---- src/attributes.cpp | 106 ++++++++++++++++---- src/attributes.h | 8 +- src/customer_index.cpp | 5 +- src/customer_index.h | 12 +-- src/customers.cpp | 31 +++++- src/customers.h | 4 +- src/indexbits.cpp | 2 +- src/indexbits.h | 62 ++++++++++++ src/oloop_customer_list.cpp | 28 ++++-- src/oloop_customer_list.h | 7 ++ src/oloop_histogram.cpp | 3 +- src/oloop_insert.cpp | 13 +-- src/oloop_property.cpp | 17 ++-- src/oloop_query.cpp | 3 +- src/oloop_seg_refresh.cpp | 10 +- src/oloop_segment.cpp | 14 +-- src/queryindexing.cpp | 5 +- src/result.cpp | 94 +++++++++++++----- src/result.h | 2 +- src/rpc_insert.cpp | 18 +++- src/rpc_query.cpp | 186 +++++++++++++++++++++++++++++++++--- src/sidelog.h | 18 +++- src/tablepartitioned.cpp | 55 ++++------- src/tablepartitioned.h | 9 +- test/test_db.h | 9 +- test/test_helper.cpp | 1 + 29 files changed, 575 insertions(+), 206 deletions(-) diff --git a/lib/cjson/cjson.cpp b/lib/cjson/cjson.cpp index 7eb6999..4229162 100644 --- a/lib/cjson/cjson.cpp +++ b/lib/cjson/cjson.cpp @@ -134,6 +134,34 @@ cjson::cjson(HeapStack* mem) : scratchPad = mem->firstBlock()->data; } +cjson::cjson(const cjson& other) +{ + auto newNode = parse(stringify(const_cast(&other))); + + mem = newNode->mem; + nodeType = newNode->nodeType; + nodeName = newNode->nodeName; + nodeData = newNode->nodeData; + membersHead = newNode->membersHead; + membersTail = newNode->membersTail; + memberCount = newNode->memberCount; + scratchPad = newNode->scratchPad; + siblingPrev = newNode->siblingPrev; + siblingNext = newNode->siblingNext; + parentNode = newNode->parentNode; + selfConstructed = newNode->selfConstructed; + + newNode->selfConstructed = false; + newNode->mem = nullptr; + newNode->membersHead = nullptr; + newNode->membersTail = nullptr; + newNode->scratchPad = nullptr; + newNode->siblingNext = nullptr; + newNode->siblingPrev = nullptr; + newNode->parentNode = nullptr; + newNode->nodeType = Types_e::VOIDED; +} + cjson::cjson(cjson&& other) noexcept : mem(other.mem), nodeType(other.nodeType), diff --git a/lib/cjson/cjson.h b/lib/cjson/cjson.h index 3fcc2e0..04c50d8 100644 --- a/lib/cjson/cjson.h +++ b/lib/cjson/cjson.h @@ -68,10 +68,10 @@ class cjson private: - HeapStack* mem; + HeapStack* mem { nullptr }; Types_e nodeType; - char* nodeName; + char* nodeName { nullptr }; // dataUnion uses the often ignored but always awesome // union feature of C++ @@ -222,7 +222,7 @@ class cjson cjson(char* data, const size_t length); cjson(HeapStack* mem); - cjson(const cjson&) = delete; // can't copy - actually we could... but.. + cjson(const cjson&); // can't copy - actually we could... but.. cjson(cjson&& other) noexcept; // moveable ~cjson(); diff --git a/lib/mem/blhash.h b/lib/mem/blhash.h index 36cdf94..bb57f60 100644 --- a/lib/mem/blhash.h +++ b/lib/mem/blhash.h @@ -177,25 +177,6 @@ class BinaryListHash { return reinterpret_cast(words); } - - - bool operator >(const overlay &right) const - { - auto leftPtr = const_cast(words + elements - 1); - auto rightPtr = const_cast(right.words + elements - 1); - - while (leftPtr >= words) - { - if (*leftPtr > *rightPtr) - return true; - if (*leftPtr < *rightPtr) - return false; - --leftPtr; - --rightPtr; - } - return false; - } - }; #pragma pack(pop) @@ -206,7 +187,6 @@ class BinaryListHash // serialize variables (passing them as params is just really slow) overlay serializeOver; - overlay serializeStart; int serializeLimit; FilterCB serializeCB; @@ -379,11 +359,10 @@ class BinaryListHash }; - HashVector& serialize(tKey& start, int limit, FilterCB filterCallBack) + HashVector& serialize(int limit, FilterCB filterCallBack) { tKey key; serializeOver.set(&key); - serializeStart.set(&start); serializeList.clear(); serializeList.reserve(distinct); @@ -409,7 +388,7 @@ class BinaryListHash if (depth == serializeOver.elements - 1) { - if (serializeOver > serializeStart && + if (//serializeOver > serializeStart && serializeCB(serializeOver.getKeyPtr(), reinterpret_cast(&node->nodes[idx].next))) { serializeList.emplace_back(*serializeOver.getKeyPtr(), *reinterpret_cast(&node->nodes[idx].next)); diff --git a/src/attributes.cpp b/src/attributes.cpp index 183b6b4..68c16ff 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -6,20 +6,12 @@ using namespace openset::db; -IndexBits* Attr_s::getBits() -{ - auto bits = new IndexBits(); - - bits->mount(index, ints, ofs, len, linId); - - return bits; -} - Attributes::Attributes(const int partition, Table* table, AttributeBlob* attributeBlob, Properties* properties) : table(table), blob(attributeBlob), properties(properties), - partition(partition) + partition(partition), + indexCache(50) {} Attributes::~Attributes() @@ -31,8 +23,63 @@ Attributes::~Attributes() } } +IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) +{ + + if (const auto bits = indexCache.get(propIndex, value); bits) + return bits; + + const auto attribute = Attributes::getMake(propIndex, value); + + auto bits = new IndexBits(); + bits->mount(attribute->index, attribute->ints, attribute->ofs, attribute->len, attribute->linId); + + // cache these bits + const auto [evictPropIndex, evictValue, evictBits] = indexCache.set(propIndex, value, bits); + + // if anything got squeezed out compress it + if (evictBits) + { + const auto attrPair = propertyIndex.find({ evictPropIndex, evictValue }); + const auto evictAttribute = attrPair->second; + + int64_t compBytes = 0; // OUT value via reference + int64_t linId; + int32_t ofs, len; + + // compress the data, get it back in a pool ptr + const auto compData = bits->store(compBytes, linId, ofs, len, table->indexCompression); + const auto destAttr = recast(PoolMem::getPool().getPtr(sizeof(Attr_s) + compBytes)); + + // copy header + memcpy(destAttr, evictAttribute, sizeof(Attr_s)); + if (compData) + { + memcpy(destAttr->index, compData, compBytes); + // return work buffer from bits.store to the pool + PoolMem::getPool().freePtr(compData); + } + + destAttr->ints = bits->ints;//(isList) ? 0 : bits.ints; + destAttr->comp = static_cast(compBytes); + destAttr->linId = linId; + destAttr->ofs = ofs; + destAttr->len = len; + + attrPair->second = destAttr; + PoolMem::getPool().freePtr(evictAttribute); + + delete evictBits; + } + + return bits; +} + void Attributes::addChange(const int64_t customerId, const int32_t propIndex, const int64_t value, const int32_t linearId, const bool state) { + if (propIndex == PROP_STAMP || propIndex == PROP_UUID || propIndex == PROP_SESSION) + return; + const auto key = attr_key_s{ propIndex, value }; if (state) @@ -46,7 +93,7 @@ void Attributes::addChange(const int64_t customerId, const int32_t propIndex, co return; } - changeIndex.emplace(key, std::vector{Attr_changes_s{linearId, state}}); + changeIndex.emplace(key, std::vector{Attr_changes_s{linearId, state}}); } Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) @@ -121,15 +168,29 @@ void Attributes::setDirty(const int64_t customerId, const int32_t linId, const i void Attributes::clearDirty() { - IndexBits bits; + //IndexBits bits; for (auto& change : changeIndex) { - const auto attrPair = propertyIndex.find({ change.first.index, change.first.value }); + getMake(change.first.index, change.first.value); + + //if (attrPair == propertyIndex.end() || !attrPair->second) + // continue; + + auto bits = getBits(change.first.index, change.first.value); + + for (const auto& t : change.second) + { + if (t.state) + bits->bitSet(t.linId); + else + bits->bitClear(t.linId); + } + + // TODO - check for non-existent prop. - if (attrPair == propertyIndex.end() || !attrPair->second) - continue; + /* const auto attr = attrPair->second; bits.mount(attr->index, attr->ints, attr->ofs, attr->len, attr->linId); @@ -178,10 +239,12 @@ void Attributes::clearDirty() attrPair->second = destAttr; PoolMem::getPool().freePtr(attr); } + */ } changeIndex.clear(); } +/* void Attributes::swap(const int32_t propIndex, const int64_t value, IndexBits* newBits) { auto attrPair = propertyIndex.find(attr_key_s{ propIndex, value }); @@ -222,6 +285,7 @@ void Attributes::swap(const int32_t propIndex, const int64_t value, IndexBits* n // FIX - memory leak PoolMem::getPool().freePtr(attr); } +*/ AttributeBlob* Attributes::getBlob() const { @@ -250,7 +314,7 @@ Attributes::AttrList Attributes::getPropertyValues(const int32_t propIndex, cons case listMode_e::NEQ: case listMode_e::EQ: if (const auto tAttr = get(propIndex, value); tAttr) - result.push_back(tAttr); + result.emplace_back(propIndex, value); return result; //case listMode_e::PRESENT_FAST: // fast for reducing set in `!= nil` test // if (const auto tAttr = get(propIndex, NONE); tAttr) @@ -267,23 +331,23 @@ Attributes::AttrList Attributes::getPropertyValues(const int32_t propIndex, cons switch (mode) { case listMode_e::PRESENT: // sum of all indexes - slow but accurate for `== nil` test - result.push_back(kv.second); + result.push_back(kv.first); break; case listMode_e::GT: if (kv.first.value > value) - result.push_back(kv.second); + result.push_back(kv.first); break; case listMode_e::GTE: if (kv.first.value >= value) - result.push_back(kv.second); + result.push_back(kv.first); break; case listMode_e::LT: if (kv.first.value < value) - result.push_back(kv.second); + result.push_back(kv.first); break; case listMode_e::LTE: if (kv.first.value <= value) - result.push_back(kv.second); + result.push_back(kv.first); break; default: // never happens diff --git a/src/attributes.h b/src/attributes.h index 249d2cf..7ee90e7 100644 --- a/src/attributes.h +++ b/src/attributes.h @@ -92,7 +92,6 @@ namespace openset::db char index[1]{ 0 }; // char* (1st byte) of packed index bits struct Attr_s() = default; - IndexBits* getBits(); }; #pragma pack(pop) @@ -126,7 +125,7 @@ namespace openset::db }; using AttrListExpanded = vector>; // pair, value and bits - using AttrList = vector; + using AttrList = vector; // value and attribute info using ColumnIndex = robin_hood::unordered_map>; @@ -136,6 +135,7 @@ namespace openset::db ColumnIndex propertyIndex; // prop/value store ChangeIndex changeIndex; // cache for property changes CustomerIndexing customerIndexing; // indexes for customer_list sort ordering + IndexLRU indexCache; Table* table; AttributeBlob* blob; @@ -145,6 +145,8 @@ namespace openset::db explicit Attributes(const int partition, Table* table, AttributeBlob* attributeBlob, Properties* properties); ~Attributes(); + IndexBits* getBits(const int32_t propIndex, const int64_t value); + void addChange(const int64_t customerId, const int32_t propIndex, const int64_t value, const int32_t linearId, const bool state); Attr_s* getMake(const int32_t propIndex, const int64_t value); @@ -159,7 +161,7 @@ namespace openset::db void clearDirty(); // replace an indexes bits with new ones, used when generating segments - void swap(const int32_t propIndex, const int64_t value, IndexBits* newBits); + //void swap(const int32_t propIndex, const int64_t value, IndexBits* newBits); AttributeBlob* getBlob() const; diff --git a/src/customer_index.cpp b/src/customer_index.cpp index 82efeb8..c4627e1 100644 --- a/src/customer_index.cpp +++ b/src/customer_index.cpp @@ -1,11 +1,8 @@ #include "customer_index.h" openset::db::CustomerIndexList openset::db::CustomerPropIndex::serialize( - int64_t startCustomer, - int64_t startValue, int limit, const std::function& filterCallback) { - SortKeyOneProp_s startKey(startCustomer, startValue); - return index.serialize(startKey, limit, filterCallback); + return index.serialize(limit, filterCallback); } diff --git a/src/customer_index.h b/src/customer_index.h index 2581f41..2b7405c 100644 --- a/src/customer_index.h +++ b/src/customer_index.h @@ -42,8 +42,6 @@ namespace openset } CustomerIndexList serialize( - int64_t startCustomer, - int64_t startValue, int limit, const std::function& filterCallback); }; @@ -71,25 +69,23 @@ namespace openset if (value == NONE) return; - if (auto& iter = indexes.find(propIndex); iter != indexes.end()) + if (const auto& iter = indexes.find(propIndex); iter != indexes.end()) iter->second->insert(customerId, linId, value); } void erase(int propIndex, int64_t customerId, int64_t value) { - if (auto& iter = indexes.find(propIndex); iter != indexes.end()) + if (const auto& iter = indexes.find(propIndex); iter != indexes.end()) iter->second->erase(customerId, value); } CustomerIndexList getListAscending( int propIndex, - int64_t startCustomer, - int64_t startValue, int limit, const std::function& filterCallback) { - if (auto& iter = indexes.find(propIndex); iter != indexes.end()) - return iter->second->serialize(startCustomer, startValue, limit, filterCallback); + if (const auto& iter = indexes.find(propIndex); iter != indexes.end()) + return iter->second->serialize(limit, filterCallback); return {}; } }; diff --git a/src/customers.cpp b/src/customers.cpp index b4d4fc3..6a8da62 100644 --- a/src/customers.cpp +++ b/src/customers.cpp @@ -19,8 +19,11 @@ PersonData_s* Customers::getCustomerByID(int64_t userId) { int32_t linId; - if (const auto entry = customerMap.find(userId); entry != customerMap.end()) - return getCustomerByLIN(entry->second); + if (customerMap.get(userId, linId)) + return getCustomerByLIN(linId); + + //if (const auto entry = customerMap.find(userId); entry != customerMap.end()) + // return getCustomerByLIN(entry->second); return nullptr; } @@ -55,6 +58,25 @@ PersonData_s* Customers::getCustomerByLIN(const int64_t linId) PersonData_s* Customers::createCustomer(int64_t userId) { + int linId; + if (customerMap.get(userId, linId)) + { + return customerLinear.at(linId); + } + + const auto newUser = recast(PoolMem::getPool().getPtr(sizeof(PersonData_s))); + newUser->id = userId; + newUser->linId = static_cast(customerLinear.size());; + newUser->idBytes = 0; + newUser->bytes = 0; + newUser->comp = 0; + newUser->props = nullptr; + + customerMap.set(userId, newUser->linId); + customerLinear.emplace_back(newUser); + return newUser; + + /* if (auto& res = customerMap.emplace(userId, 0); res.second == true) { const auto newUser = recast(PoolMem::getPool().getPtr(sizeof(PersonData_s))); @@ -73,6 +95,7 @@ PersonData_s* Customers::createCustomer(int64_t userId) { return customerLinear.at(res.first->second); } + */ } PersonData_s* Customers::createCustomer(string userIdString) @@ -103,7 +126,7 @@ PersonData_s* Customers::createCustomer(string userIdString) newUser->props = nullptr; newUser->setIdStr(userIdString); - customerMap[hashId] = newUser->linId; + customerMap.set(hashId, newUser->linId); customerLinear.emplace_back(newUser); return newUser; @@ -204,7 +227,7 @@ int64_t Customers::deserialize(char* mem) // index this customer customerLinear[customer->linId] = customer; - customerMap[customer->id] = customer->linId; + customerMap.set(customer->id, customer->linId); // next block please read += size; diff --git a/src/customers.h b/src/customers.h index 6e3b506..04d38da 100644 --- a/src/customers.h +++ b/src/customers.h @@ -3,8 +3,6 @@ #include "common.h" #include "logger.h" #include "customer.h" -//#include "mem/bigring.h" -#include "robin_hood.h" #include "mem/blhash.h" #include "grid.h" @@ -21,7 +19,7 @@ namespace openset class Customers { public: - robin_hood::unordered_map> customerMap; + BinaryListHash customerMap; vector customerLinear; int partition; diff --git a/src/indexbits.cpp b/src/indexbits.cpp index e334987..e0bf916 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -216,7 +216,7 @@ void IndexBits::grow(int64_t required, bool exact) return; if (!exact) - required += 32; + required += ints > 100 ? (ints > 1000 ? 128 : 64) : 32; const auto bytes = required * sizeof(uint64_t); const auto write = cast(PoolMem::getPool().getPtr(bytes)); diff --git a/src/indexbits.h b/src/indexbits.h index 975730f..5e77614 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -102,5 +102,67 @@ namespace openset return os; } }; + + class IndexLRU + { + using Key = std::pair; + using Value = std::pair::iterator>; + + list items; + unordered_map keyValuesMap; + int cacheSize; + + public: + IndexLRU(int cacheSize) : + cacheSize(cacheSize) + {} + + std::tuple set(int propIndex, int64_t value, IndexBits* bits) + { + const Key key(propIndex, value); + + if (const auto iter = keyValuesMap.find(key); iter == keyValuesMap.end()) + { + items.push_front(key); + + const Value listMap(bits, items.begin()); + keyValuesMap[key] = listMap; + + if (keyValuesMap.size() > cacheSize) { + const auto evicted = keyValuesMap[items.back()].first; + keyValuesMap.erase(items.back()); + items.pop_back(); + return {key.first, key.second, evicted}; + } + } + else + { + items.erase(iter->second.second); + items.push_front(key); + const Value listMap(bits, items.begin()); + keyValuesMap[key] = listMap; + } + + return {0,0,0}; + } + + IndexBits* get(int propIndex, int64_t value) + { + + const Key key(propIndex, value); + + if (auto iter = keyValuesMap.find(key); iter == keyValuesMap.end()) + { + return nullptr; + } + else + { + items.erase(iter->second.second); + items.push_front(key); + keyValuesMap[key] = { iter->second.first, items.begin() }; + return iter->second.first; + } + } + }; }; }; diff --git a/src/oloop_customer_list.cpp b/src/oloop_customer_list.cpp index 25682fc..b4ca022 100644 --- a/src/oloop_customer_list.cpp +++ b/src/oloop_customer_list.cpp @@ -14,6 +14,9 @@ OpenLoopCustomerList::OpenLoopCustomerList( Database::TablePtr table, Macro_s macros, openset::result::ResultSet* result, + const std::vector &sortOrderProperties, + const std::vector &cursor, + const int limit, int instance) : OpenLoop(table->getName(), oloopPriority_e::realtime), // queries are high priority and will preempt other running cells @@ -29,7 +32,10 @@ OpenLoopCustomerList::OpenLoopCustomerList( startTime(0), population(0), index(nullptr), - result(result) + result(result), + cursor(cursor), + sortOrderProperties(sortOrderProperties), + limit(limit) {} OpenLoopCustomerList::~OpenLoopCustomerList() @@ -98,7 +104,7 @@ void OpenLoopCustomerList::prepare() return; } - segments.push_back(parts->segments[segmentName].bits); + segments.push_back(parts->segments[segmentName].getBits()); } } @@ -117,8 +123,16 @@ void OpenLoopCustomerList::prepare() person.setSessionTime(macros.sessionTime); - const auto filter = [&](SortKeyOneProp_s* key, int* value) -> bool { - return true; + + + const auto filterAscending = [&](SortKeyOneProp_s* key, int* value) -> bool { + if (key->value == cursor[0] && key->customerId == cursor[1]) + return false; + if (key->value < cursor[0]) + return false; + if (key->value > cursor[0] || key->customerId >= cursor[1]) + return true; + return false; }; @@ -126,10 +140,8 @@ void OpenLoopCustomerList::prepare() indexedList = std::move(parts->attributes.customerIndexing.getListAscending( propIndex, - 10000, - 4, - 1000, - filter + limit, + filterAscending )); iter = indexedList.begin(); diff --git a/src/oloop_customer_list.h b/src/oloop_customer_list.h index ad6a24d..9bbe787 100644 --- a/src/oloop_customer_list.h +++ b/src/oloop_customer_list.h @@ -37,6 +37,10 @@ namespace openset openset::db::IndexBits* index; openset::result::ResultSet* result; + std::vector sortOrderProperties; + std::vector cursor; + int limit; + CustomerIndexList indexedList; CustomerIndexList::iterator iter; @@ -45,6 +49,9 @@ namespace openset openset::db::Database::TablePtr table, openset::query::Macro_s macros, openset::result::ResultSet* result, + const std::vector& indexProperties, + const std::vector& cursor, + const int limit, int instance); ~OpenLoopCustomerList() final; diff --git a/src/oloop_histogram.cpp b/src/oloop_histogram.cpp index 2bed2f2..2e38e61 100644 --- a/src/oloop_histogram.cpp +++ b/src/oloop_histogram.cpp @@ -173,11 +173,10 @@ void OpenLoopHistogram::prepare() return; } - segments.push_back(parts->segments[segmentName].bits); + segments.push_back(parts->segments[segmentName].getBits()); } } - interpreter->setCompareSegments(index, segments); } diff --git a/src/oloop_insert.cpp b/src/oloop_insert.cpp index 7d5ff1f..ae95e41 100644 --- a/src/oloop_insert.cpp +++ b/src/oloop_insert.cpp @@ -2,6 +2,8 @@ #include "cjson/cjson.h" #include "str/strtools.h" +#include "robin_hood.h" + #include "customers.h" #include "customer.h" #include "database.h" @@ -83,8 +85,7 @@ bool OpenLoopInsert::run() // if we are not in owner or clone state we are just going to backlog // the inserts until our state changes, then we will perform inserts Logger::get().info("skipping partition " + to_string(tablePartitioned->partition) + " not active or clone."); - this->scheduleFuture(1000); - sleepCounter = 0; + this->scheduleFuture(250); tablePartitioned->attributes.clearDirty(); @@ -97,16 +98,12 @@ bool OpenLoopInsert::run() if (inserts.empty()) { SideLog::getSideLog().updateReadHead(table.get(), loop->partition, readHandle); - scheduleFuture((sleepCounter > 10 ? 10 : sleepCounter) * 100); // lazy back-off function - ++sleepCounter; // inc after, this will make it run one more time before sleeping - + scheduleFuture(250); // lazy back-off function tablePartitioned->attributes.clearDirty(); return false; } - sleepCounter = 0; - // reusable object representing a customer Customer person; @@ -126,7 +123,7 @@ bool OpenLoopInsert::run() // pass. This can greatly reduce redundant calls to Mount and Commit // which can be expensive as they both call LZ4 (which is fast, but still // has it's overhead) - std::unordered_map < std::string, std::vector> evtByPerson; + robin_hood::unordered_map < std::string, std::vector, robin_hood::hash> evtByPerson; auto insertIter = inserts.begin(); for (; insertIter != inserts.end(); ++insertIter) diff --git a/src/oloop_property.cpp b/src/oloop_property.cpp index 2c8807f..bc3c59d 100644 --- a/src/oloop_property.cpp +++ b/src/oloop_property.cpp @@ -74,15 +74,15 @@ void OpenLoopProperty::prepare() return; } - segments.push_back(parts->segments[segmentName].bits); + segments.push_back(parts->segments[segmentName].getBits()); } } } // get the root value - const auto all = parts->attributes.get(config.propIndex, NONE); + const auto allBits = parts->attributes.getBits(config.propIndex, NONE); - if (!all) + if (!allBits) { shuttle->reply( 0, @@ -127,7 +127,7 @@ void OpenLoopProperty::prepare() auto idx = 0; for (auto s : segments) { - auto bits = all->getBits(); + auto bits = allBits; bits->opAnd(*s); aggs->columns[idx].value = bits->population(stopBit); delete bits; @@ -136,7 +136,7 @@ void OpenLoopProperty::prepare() } // turn ints and doubles into their bucketed name - auto toBucket = [&](const int64_t value)->int64_t + const auto toBucket = [&](const int64_t value)->int64_t { if (config.bucket == 0) return value; @@ -257,15 +257,12 @@ bool OpenLoopProperty::run() for (auto value : groupsIter->second) { + const auto bits = parts->attributes.getBits(config.propIndex, value); - auto attr = parts->attributes.get(config.propIndex, value); - - if (!attr) + if (!bits) continue; - const auto bits = attr->getBits(); sumBits->opOr(*bits); - delete bits; } // remove bits not in the segment diff --git a/src/oloop_query.cpp b/src/oloop_query.cpp index 928dee8..737c7ab 100644 --- a/src/oloop_query.cpp +++ b/src/oloop_query.cpp @@ -98,8 +98,7 @@ void OpenLoopQuery::prepare() return; } - segments.push_back(parts->segments[segmentName].bits); - + segments.push_back(parts->segments[segmentName].getBits()); } } diff --git a/src/oloop_seg_refresh.cpp b/src/oloop_seg_refresh.cpp index fd37d4a..4b47461 100644 --- a/src/oloop_seg_refresh.cpp +++ b/src/oloop_seg_refresh.cpp @@ -28,17 +28,12 @@ OpenLoopSegmentRefresh::~OpenLoopSegmentRefresh() { if (prepared) --parts->segmentUsageCount; - - parts->storeAllChangedSegments(); parts->flushMessageMessages(); } } void OpenLoopSegmentRefresh::storeSegment() const { - // store any changes we've made to the segments - parts->storeAllChangedSegments(); - const auto delta = bits->population(maxLinearId) - startPopulation; // update the segment refresh @@ -219,6 +214,11 @@ bool OpenLoopSegmentRefresh::run() openset::db::PersonData_s* personData; + // get a fresh pointer to bits on each entry in case they left the LRU + maxLinearId = parts->people.customerCount(); + segmentName = segmentsIter->first; + interpreter->setBits(parts->getBits(segmentName), maxLinearId); + while (true) { if (sliceComplete()) diff --git a/src/oloop_segment.cpp b/src/oloop_segment.cpp index 6bba7ce..85c31ee 100644 --- a/src/oloop_segment.cpp +++ b/src/oloop_segment.cpp @@ -39,7 +39,6 @@ OpenLoopSegment::~OpenLoopSegment() { if (prepared) --parts->segmentUsageCount; - parts->storeAllChangedSegments(); parts->flushMessageMessages(); } } @@ -78,9 +77,6 @@ void OpenLoopSegment::storeSegments() * are local to the partition */ - // store any changes we've made to the segments - parts->storeAllChangedSegments(); - for (auto& macro : macrosList) { const auto &segmentName = macro.first; @@ -269,8 +265,13 @@ void OpenLoopSegment::prepare() bool OpenLoopSegment::run() { - openset::db::PersonData_s* personData; + + // get a fresh pointer to bits on each entry in case they left the LRU + maxLinearId = parts->people.customerCount(); + segmentName = macroIter->first; + interpreter->setBits(parts->getBits(segmentName), maxLinearId); + while (true) { if (sliceComplete()) @@ -326,8 +327,7 @@ bool OpenLoopSegment::run() if (interpreter->error.inError()) { - openset::errors::Error error; - error = interpreter->error; + const openset::errors::Error error = interpreter->error; interpreter = nullptr; storeSegments(); diff --git a/src/queryindexing.cpp b/src/queryindexing.cpp index 97b1ed2..aa19b99 100644 --- a/src/queryindexing.cpp +++ b/src/queryindexing.cpp @@ -87,7 +87,7 @@ openset::db::IndexBits Indexing::compositeBits(Attributes::listMode_e mode) for (auto attr: attrList) { // get the bits - const auto workBits = attr->getBits(); + const auto workBits = parts->attributes.getBits(attr.index, attr.value); if (initialized) { @@ -98,9 +98,6 @@ openset::db::IndexBits Indexing::compositeBits(Attributes::listMode_e mode) resultBits.opCopy(*workBits); initialized = true; } - - // clean up them bits - delete workBits; } if (!initialized) diff --git a/src/result.cpp b/src/result.cpp index a82353f..5b5d6e7 100644 --- a/src/result.cpp +++ b/src/result.cpp @@ -1088,31 +1088,81 @@ void ResultMuxDemux::jsonResultHistogramFill( } } -void ResultMuxDemux::flatColumnMultiSort(cjson* doc, const ResultSortOrder_e sort, const int column) +void ResultMuxDemux::flatColumnMultiSort(cjson* doc, const ResultSortOrder_e sort, std::vector sortProps) { - doc->recurseSort( - "_", - [&](const cjson* left, const cjson* right) -> bool - { - switch (left->at(column)->type()) + if (sortProps.size() == 1) + { + const auto column = sortProps[0]; + doc->recurseSort( + "_", + [&](const cjson* left, const cjson* right) -> bool { - case cjson::Types_e::BOOL: - case cjson::Types_e::INT: - if (sort == ResultSortOrder_e::Asc) - return (left->at(column)->getInt() < right->at(column)->getInt()); - return (left->at(column)->getInt() > right->at(column)->getInt()); - case cjson::Types_e::DBL: - if (sort == ResultSortOrder_e::Asc) - return (left->at(column)->getDouble() < right->at(column)->getDouble()); - return (left->at(column)->getDouble() > right->at(column)->getDouble()); - case cjson::Types_e::STR: - if (sort == ResultSortOrder_e::Asc) - return (left->at(column)->getString() < right->at(column)->getString()); - return (left->at(column)->getString() > right->at(column)->getString()); - default: - return false; + switch (left->at(column)->type()) + { + case cjson::Types_e::BOOL: + case cjson::Types_e::INT: + if (sort == ResultSortOrder_e::Asc) + return (left->at(column)->getInt() < right->at(column)->getInt()); + return (left->at(column)->getInt() > right->at(column)->getInt()); + case cjson::Types_e::DBL: + if (sort == ResultSortOrder_e::Asc) + return (left->at(column)->getDouble() < right->at(column)->getDouble()); + return (left->at(column)->getDouble() > right->at(column)->getDouble()); + case cjson::Types_e::STR: + if (sort == ResultSortOrder_e::Asc) + return (left->at(column)->getString() < right->at(column)->getString()); + return (left->at(column)->getString() > right->at(column)->getString()); + default: + return false; + } } - }); + ); + } + else if (sortProps.size() == 2) + { + const auto firstColumn = sortProps[0]; + const auto secondColumn = sortProps[1]; + + doc->recurseSort( + "_", + [&](const cjson* left, const cjson* right) -> bool + { + switch (left->at(firstColumn)->type()) + { + case cjson::Types_e::BOOL: + case cjson::Types_e::INT: + if (sort == ResultSortOrder_e::Asc) + return ((left->at(firstColumn)->getInt() < right->at(firstColumn)->getInt()) || + (left->at(firstColumn)->getInt() == right->at(firstColumn)->getInt() && + left->at(secondColumn)->getInt() < right->at(secondColumn)->getInt())); + + return ((left->at(firstColumn)->getInt() > right->at(firstColumn)->getInt()) || + (left->at(firstColumn)->getInt() == right->at(firstColumn)->getInt() && + left->at(secondColumn)->getInt() > right->at(secondColumn)->getInt())); + case cjson::Types_e::DBL: + if (sort == ResultSortOrder_e::Asc) + return ((left->at(firstColumn)->getDouble() < right->at(firstColumn)->getDouble()) || + (left->at(firstColumn)->getDouble() == right->at(firstColumn)->getDouble() && + left->at(secondColumn)->getDouble() < right->at(secondColumn)->getDouble())); + + return ((left->at(firstColumn)->getDouble() > right->at(firstColumn)->getDouble()) || + (left->at(firstColumn)->getDouble() == right->at(firstColumn)->getDouble() && + left->at(secondColumn)->getDouble() > right->at(secondColumn)->getDouble())); + case cjson::Types_e::STR: + if (sort == ResultSortOrder_e::Asc) + return ((left->at(firstColumn)->getString() < right->at(firstColumn)->getString()) || + (left->at(firstColumn)->getString() == right->at(firstColumn)->getString() && + left->at(secondColumn)->getString() < right->at(secondColumn)->getString())); + + return ((left->at(firstColumn)->getString() > right->at(firstColumn)->getString()) || + (left->at(firstColumn)->getString() == right->at(firstColumn)->getString() && + left->at(secondColumn)->getString() > right->at(secondColumn)->getString())); + default: + return false; + } + } + ); + } } void ResultMuxDemux::jsonResultSortByColumn(cjson* doc, const ResultSortOrder_e sort, const int column) diff --git a/src/result.h b/src/result.h index d41d41b..2527c5b 100644 --- a/src/result.h +++ b/src/result.h @@ -372,7 +372,7 @@ namespace openset int64_t bucket, int64_t forceMin = std::numeric_limits::min(), int64_t forceMax = std::numeric_limits::min()); - static void flatColumnMultiSort(cjson* doc, ResultSortOrder_e sort, int column); + static void flatColumnMultiSort(cjson* doc, ResultSortOrder_e sort, std::vector sortProps); static void jsonResultSortByColumn(cjson* doc, ResultSortOrder_e sort, int column); static void jsonResultSortByGroup(cjson* doc, ResultSortOrder_e sort); diff --git a/src/rpc_insert.cpp b/src/rpc_insert.cpp index 7cf5f59..cb20661 100644 --- a/src/rpc_insert.cpp +++ b/src/rpc_insert.cpp @@ -154,7 +154,7 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa const auto destination = cast((std::abs(uuid) % 13337) % partitions->getPartitionMax()); int64_t len; - SideLog::getSideLog().add(table.get(), destination, cjson::stringifyCstr(row, len)); + auto logSize = SideLog::getSideLog().add(table.get(), destination, cjson::stringifyCstr(row, len)); } SideLog::getSideLog().unlock(); @@ -216,7 +216,21 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa } } - message->reply(http::StatusCode::success_ok, response); + if (SideLog::getSideLog().getLogSize() < 25000) + { + message->reply(http::StatusCode::success_ok, response); + } + else + { + thread work([=]() + { + while (SideLog::getSideLog().getLogSize() > 25000) + ThreadSleep(55); + + message->reply(http::StatusCode::success_ok, response); + }); + work.detach(); + } } void RpcInsert::insert(const openset::web::MessagePtr& message, const RpcMapping& matches) diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index eccb28a..de21fc3 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -202,22 +202,47 @@ shared_ptr forkQuery( ResultMuxDemux::resultFlatColumnsToJson(resultColumnCount, setCount, resultSets, resultJson.get()); const auto toJsonEndTime = Now(); + const auto resultNode = resultJson.get()->find("_"); + const auto rowsInResult = resultNode ? resultNode->memberCount : 0; + // free up the responses openset::globals::mapper->releaseResponses(result); // clean up all those resultSet* for (auto res : resultSets) delete res; + const auto sortStartTime = Now(); + ResultMuxDemux::flatColumnMultiSort(resultJson.get(), sortOrder, sortColumn); + const auto sortEndTime = Now(); + const auto trimStartTime = Now(); - //ResultMuxDemux::flatColumnMultiSort(resultJson.get(), sortOrder, sortColumn[0]); ResultMuxDemux::jsonResultTrim(resultJson.get(), trim); // local function to fill Meta data in result JSON const auto trimEndTime = Now(); cout << "dispatch: " << (dispatchEndTime - dispatchStartTime) << " gather: " << (gatherEndTime - gatherStartTime) << " json: " << (toJsonEndTime - toJsonStartTime) << + " sort: " << (sortEndTime - sortStartTime) << " trim: " << (trimEndTime - trimStartTime) << endl; + const auto rowsAfterTrim = resultNode ? resultNode->memberCount : 0; + + const auto info = resultJson.get()->setObject("info"); + + if (rowsAfterTrim != 0 && rowsInResult == rowsAfterTrim) + { + info->set("more", true); + + std::string cursor = + to_string(resultNode->membersTail->at(sortColumn[0])->getInt()) + "," + + to_string(resultNode->membersTail->at(sortColumn[1])->getInt()); + info->set("cursor", cursor); + } + else + { + info->set("more", false); + } + return resultJson; } @@ -691,6 +716,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM ? ResultSortOrder_e::Asc : ResultSortOrder_e::Desc; auto sortKeyString = message->getParamString("sort", ""); + auto cursorString = message->getParamString("cursor", ""); if (!sortKeyString.length()) sortKeyString = "id"; @@ -766,11 +792,14 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM return; } - // validate that sortKeys are in the select statement - const auto sortKeyParts = split(sortKeyString, ','); + // Ordering keys (at this point we only use one) + std::vector sortOrderProperties; + - std::vector sortOrders; + int customerIdIndex = -1; + // validate that sortKeys are in the select statement + const auto sortKeyParts = split(sortKeyString, ','); for (auto key : sortKeyParts) { key = trim(key); @@ -778,15 +807,55 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM if (key.length()) { + + auto propInfo = table->getProperties()->getProperty(key); + + if (!propInfo) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'sort': property '" + key + "' not found" + }, + message); + return; + } + + if (propInfo->isSet) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'sort': property '" + key + "' cannot be a 'set' type" + }, + message); + return; + } + + if (propInfo->type != PropertyTypes_e::intProp && propInfo->type != PropertyTypes_e::doubleProp) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'sort': property '" + key + "' must be an 'int' or 'double' type" + }, + message); + return; + } + auto index = 0; for (auto& column : queryMacros.vars.columnVars) { + if (column.alias == "id") + customerIdIndex = index; + if (column.alias == key) { found = true; - - queryMacros.vars.autoGrouping.push_back(index); - sortOrders.push_back(index); + sortOrderProperties.push_back(index); break; } @@ -800,13 +869,100 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM errors::Error { errors::errorClass_e::query, errors::errorCode_e::general_error, - "sort key in query string not found in query script select statement" + "param 'sort': sort property must be part of query 'select' statement" }, message); return; } } + if (customerIdIndex == -1) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'sort': sorting requires that customer 'id' is part of a 'select' statement" + }, + message); + return; + } + + if (sortOrderProperties.size() > 1) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'sort': currently only 1 sort property can be specified" + }, + message); + return; + } + + // add customerId as secondary sort + if (sortOrderProperties.size() == 1) + sortOrderProperties.push_back(customerIdIndex); + + for (const auto propIndex: sortOrderProperties) + queryMacros.vars.autoGrouping.push_back(propIndex); + + std::vector cursorValues; + + // validate that sortKeys are in the select statement + const auto cursorParts = split(cursorString, ','); + for (auto key : cursorParts) + { + key = trim(key); + auto found = false; + + if (key.length()) + { + try + { + cursorValues.push_back(stoll(key)); + } + catch (const std::runtime_error&) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'cursor': expecting a numeric value" + }, + message); + return; + } + catch (...) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'cursor': expecting a numeric value" + }, + message); + return; + } + } + } + + if (cursorValues.size() == 0) + { + cursorValues = { LLONG_MIN, LLONG_MIN }; + } + else if (cursorValues.size() != 2) + { + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'cursor': expecting two numeric values (separated by a comma)" + }, + message); + return; + } + if (message->isParam("segments")) { const auto segmentText = message->getParamString("segments"); @@ -865,7 +1021,7 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM queryMacros.scriptMode, sortMode, sortOrder, - sortOrders, + sortOrderProperties, trimSize); if (json) // if null/empty we had an error message->reply(http::StatusCode::success_ok, *json); @@ -991,10 +1147,18 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM // pass factory function (as lambda) to create new cell objects partitions->cellFactory( activeList, - [shuttle, table, queryMacros, resultSets, &instance](AsyncLoop* loop) -> OpenLoop* + [shuttle, table, queryMacros, resultSets, &instance, sortOrderProperties, cursorValues, trimSize](AsyncLoop* loop) -> OpenLoop* { instance++; - return new OpenLoopCustomerList(shuttle, table, queryMacros, resultSets[loop->getWorkerId()], instance); + return new OpenLoopCustomerList( + shuttle, + table, + queryMacros, + resultSets[loop->getWorkerId()], + sortOrderProperties, + cursorValues, + trimSize, + instance); }); } diff --git a/src/sidelog.h b/src/sidelog.h index b6cf3a2..9321a65 100644 --- a/src/sidelog.h +++ b/src/sidelog.h @@ -2,6 +2,7 @@ #include #include +#include #include "sba/sba.h" #include "threads/locks.h" @@ -72,10 +73,10 @@ namespace openset::db class SideLog { - const int64_t LOG_MAX_AGE = 15'000; + //const int64_t LOG_MAX_AGE = 1'000; const int64_t MIN_LOG_SIZE = 1'000; - int64_t logSize{ 0 }; + atomic logSize{ 0 }; int64_t lastLogSize{ 0 }; SideLogCursor_s* head { nullptr }; @@ -137,7 +138,7 @@ namespace openset::db lastLogSize = logSize; } - const auto keepStamp = Now() - LOG_MAX_AGE; + //const auto keepStamp = Now() - LOG_MAX_AGE; const auto referencedEntries = getReferencedEntries(); if (referencedEntries.count(nullptr)) @@ -149,7 +150,7 @@ namespace openset::db while (cursor && logSize > MIN_LOG_SIZE && - cursor->stamp < keepStamp && + //cursor->stamp < keepStamp && referencedEntries.count(cursor) == 0) { const auto nextEntry = cursor->next; @@ -198,8 +199,13 @@ namespace openset::db cs.unlock(); } + int64_t getLogSize() const + { + return logSize; + } + // lock/unlock from caller using lock() and unlock() to accelerate inserts - void add(const Table* table, const int32_t partition, char* json) + int add(const Table* table, const int32_t partition, char* json) { const auto tableHash = table->getTableHash(); @@ -218,6 +224,8 @@ namespace openset::db tail->next = newEntry; tail = newEntry; + + return logSize; } JsonList read(const Table* table, const int32_t partition, const int limit, int64_t& readPosition) diff --git a/src/tablepartitioned.cpp b/src/tablepartitioned.cpp index c2f27ea..f3f9b57 100644 --- a/src/tablepartitioned.cpp +++ b/src/tablepartitioned.cpp @@ -10,46 +10,33 @@ using namespace openset::db; SegmentPartitioned_s::~SegmentPartitioned_s() { - if (bits) - delete bits; - if (interpreter) delete interpreter; } -openset::db::IndexBits* openset::db::SegmentPartitioned_s::prepare(Attributes& attributes) +void openset::db::SegmentPartitioned_s::prepare(Attributes& attr) { - if (bits) - return bits; - - changeCount = 0; - const auto attr = attributes.getMake(PROP_SEGMENT, segmentName); - bits = new IndexBits(); - bits->mount(attr->index, attr->ints, attr->ofs, attr->len, attr->linId); - - return bits; + attributes = &attr; + attributes->getMake(PROP_SEGMENT, segmentName); } -void openset::db::SegmentPartitioned_s::commit(Attributes& attributes) +openset::db::IndexBits* openset::db::SegmentPartitioned_s::getBits() { - if (changeCount) - attributes.swap(PROP_SEGMENT, MakeHash(segmentName), bits); - changeCount = 0; + return attributes->getBits(PROP_SEGMENT, MakeHash(segmentName)); } openset::db::SegmentPartitioned_s::SegmentChange_e openset::db::SegmentPartitioned_s::setBit(int64_t linearId, bool state) { + const auto bits = getBits(); const auto currentState = bits->bitState(linearId); if (state && !currentState) { - ++changeCount; bits->bitSet(linearId); return SegmentChange_e::enter; } if (!state && currentState) { - ++changeCount; bits->bitClear(linearId); return SegmentChange_e::exit; } @@ -62,11 +49,11 @@ openset::query::Interpreter * openset::db::SegmentPartitioned_s::getInterpreter( if (!interpreter) interpreter = new openset::query::Interpreter(macros, openset::query::InterpretMode_e::count); + const auto bits = getBits(); + if (!bits) throw std::runtime_error("call prepare before calling getInterpreter"); - interpreter->setBits(bits, maxLinearId); - return interpreter; } @@ -81,7 +68,6 @@ TablePartitioned::TablePartitioned( attributeBlob(attributeBlob), people(partition), asyncLoop(openset::globals::async->getPartition(partition)), - //triggers(new openset::revent::ReventManager(this)), insertBacklog(0) { // this will stop any translog purging until the insertCell (below) @@ -101,7 +87,6 @@ TablePartitioned::TablePartitioned( async::OpenLoop* cleanerCell = new async::OpenLoopCleaner(sharedTablePtr); cleanerCell->scheduleFuture(table->maintInterval); asyncLoop->queueCell(cleanerCell); - } TablePartitioned::~TablePartitioned() @@ -135,8 +120,6 @@ void TablePartitioned::checkForSegmentChanges() if (segmentUsageCount) return; - storeAllChangedSegments(); - std::vector orphanedSegments; InterpreterList onInsertList; @@ -212,32 +195,26 @@ std::function TablePartitioned::g if (this->segments.count(segmentName)) { deleteAfterUsing = false; - return this->segments[segmentName].prepare(this->attributes); + this->segments[segmentName].prepare(this->attributes); + return this->segments[segmentName].getBits(); } // if there are no bits with this name created in this query // then look in the index - auto attr = this->attributes.get(PROP_SEGMENT, segmentName); - - if (!attr) - return nullptr; - + const auto bits = this->attributes.getBits(PROP_SEGMENT, MakeHash(segmentName)); deleteAfterUsing = true; - return attr->getBits(); + return bits; }; } -void TablePartitioned::storeAllChangedSegments() -{ - for (auto& seg: segments) - seg.second.commit(attributes); -} - openset::db::IndexBits* TablePartitioned::getBits(std::string& segmentName) { if (this->segments.count(segmentName)) - return this->segments[segmentName].prepare(attributes); + { + this->segments[segmentName].prepare(attributes); + return this->segments[segmentName].getBits(); + } return nullptr; } diff --git a/src/tablepartitioned.h b/src/tablepartitioned.h index 37ca352..6d89eb1 100644 --- a/src/tablepartitioned.h +++ b/src/tablepartitioned.h @@ -45,9 +45,8 @@ namespace openset int64_t lastModified {0}; bool onInsert {false}; query::Interpreter* interpreter { nullptr }; - IndexBits* bits { nullptr }; - int changeCount {0}; + Attributes* attributes; SegmentPartitioned_s( const std::string& segmentName, @@ -75,8 +74,8 @@ namespace openset * * setBit - flips a bit to the desired state and returns the state change that took place */ - IndexBits* prepare(Attributes& attributes); // mounts bits, if they are not already - void commit(Attributes& attributes); // commits changed bits, if any + void prepare(Attributes& attributes); // mounts bits, if they are not already + IndexBits* getBits(); SegmentChange_e setBit(int64_t linearId, bool state); // flip bits by persion linear id // returns a new or cached interpreter. Call prepare before calling get Interpreter @@ -192,8 +191,6 @@ namespace openset // The Interpreter needs this callback to operate when performing segment math std::function getSegmentCallback(); - void storeAllChangedSegments(); - openset::db::IndexBits* getBits(std::string& segmentName); void pushMessage(const int64_t segmentHash, const SegmentPartitioned_s::SegmentChange_e state, std::string uuid); diff --git a/test/test_db.h b/test/test_db.h index 7c9bca9..147a2de 100644 --- a/test/test_db.h +++ b/test/test_db.h @@ -214,7 +214,7 @@ inline Tests test_db() const auto attr = parts->attributes.get(4000, "huge"); ASSERT(attr != nullptr); - const auto bits = attr->getBits(); + const auto bits = parts->attributes.getBits(4000, MakeHash("huge")); ASSERT(bits != nullptr); const auto pop = bits->population(parts->people.customerCount()); ASSERT(pop == 1); @@ -330,13 +330,14 @@ inline Tests test_db() auto attr = interpreter->interpreter->attrs->get(4000, "hello"); ASSERT(attr != nullptr); - auto bits = attr->getBits(); + auto bits = interpreter->interpreter->attrs->getBits(4000, MakeHash("hello")); ASSERT(bits != nullptr); auto pop = bits->population(parts->people.customerCount()); ASSERT(pop == 1); - attr = interpreter->interpreter->attrs->get(4000, "huge"); - ASSERT(attr == nullptr); + //attr = interpreter->interpreter->attrs->get(4000, "huge"); + //ASSERT(attr == nullptr); + // TODO - re-implement this test auto& debug = interpreter->debugLog(); ASSERT(debug.size() == 5); diff --git a/test/test_helper.cpp b/test/test_helper.cpp index 8acf280..27ab41f 100644 --- a/test/test_helper.cpp +++ b/test/test_helper.cpp @@ -42,6 +42,7 @@ TestEngineContainer_s* TestScriptRunner(const std::string& tableName, const std: // this mounts the now decompressed data (in the customer overlay) // into the interpreter + engine->interpreter->setBits(new IndexBits(), parts->people.customerCount()); engine->interpreter->mount(&person); // run it engine->interpreter->exec(); From cc97316273e278b8c545eee5f66834cbda187e1b Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 25 Nov 2019 02:53:06 -0500 Subject: [PATCH 08/31] new memory management for index bits, LRU for bits --- README.md | 14 +- lib/sba/sba.cpp | 136 ++++++------ lib/sba/sba.h | 160 ++++++------- src/attributes.cpp | 48 +--- src/attributes.h | 8 +- src/indexbits.cpp | 469 ++++++++++++++------------------------- src/indexbits.h | 327 ++++++++++++++++++++------- src/oloop_property.cpp | 1 - src/oloop_segment.cpp | 4 +- src/queryindexing.cpp | 8 +- src/queryinterpreter.cpp | 2 +- src/rpc_insert.cpp | 2 +- src/tablepartitioned.cpp | 2 +- src/ver.h | 2 +- 14 files changed, 587 insertions(+), 596 deletions(-) diff --git a/README.md b/README.md index cac4dbc..4c3ce9f 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,10 @@ | Platform | Version | Info | Status | | :---------- | :-----: | :------------------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Linux x64 | 0.4.4 | gcc 7.2, release, debug | [![Build Status](https://travis-ci.org/opset/openset.svg?branch=master)](https://travis-ci.org/opset/openset) | -| Windows x64 | 0.4.4 | Visual C++ 2017, release, debug | [![Build status](https://ci.appveyor.com/api/projects/status/pr8jrhfth2bt7j6r/branch/master?svg=true)](https://ci.appveyor.com/project/SethHamilton/openset/branch/master) | +| Linux x64 | 0.4.5 | gcc 7.2, release, debug | [![Build Status](https://travis-ci.org/opset/openset.svg?branch=master)](https://travis-ci.org/opset/openset) | +| Windows x64 | 0.4.5 | Visual C++ 2017, release, debug | [![Build status](https://ci.appveyor.com/api/projects/status/pr8jrhfth2bt7j6r/branch/master?svg=true)](https://ci.appveyor.com/project/SethHamilton/openset/branch/master) | -:coffee: OpenSet is currently in alpha. Please see v0.4.4 release notes below. +:coffee: OpenSet is currently in alpha. Please see v0.4.5 release notes below. # What's it do? @@ -62,7 +62,7 @@ git clone https://github.com/opset/openset_samples.git **2. Install [Docker](https://www.docker.com/) and start OpenSet (in interactive mode).** ```bash -docker run -p 8080:8080 -e OS_HOST=127.0.0.1 -e OS_PORT=8080 --rm=true -it opset/openset_x64_rel:0.4.4 +docker run -p 8080:8080 -e OS_HOST=127.0.0.1 -e OS_PORT=8080 --rm=true -it opset/openset_x64_rel:0.4.5 ``` > **Note** The OpenSet images can always be found on [dockerhub](https://cloud.docker.com/u/opset/repository/docker/opset/openset_x64_rel). @@ -146,7 +146,7 @@ response: > :bulb: view the event data [here](https://github.com/opset/openset_samples/blob/master/data/highstreet_events.json) -**7. Let's perform an `event` query.** +**7. Let's generate a report.** This query searches through each customer looking for matching events in a customers history. @@ -156,7 +156,7 @@ A cool feature of OpenSet grouping is that all branches of the result set will b ```ruby curl \ --X POST http://127.0.0.1:8080/v1/query/highstreet/event \ +-X POST http://127.0.0.1:8080/v1/query/highstreet/report \ --data-binary @- << EOF | json_pp # define which properties we want to aggregate @@ -527,7 +527,7 @@ The query then searches for the next subsequent `purchase` event and records the ```ruby curl \ --X POST http://127.0.0.1:8080/v1/query/highstreet/event \ +-X POST http://127.0.0.1:8080/v1/query/highstreet/report \ --data-binary @- << EOF | json_pp # our osl script diff --git a/lib/sba/sba.cpp b/lib/sba/sba.cpp index eca694d..631017d 100644 --- a/lib/sba/sba.cpp +++ b/lib/sba/sba.cpp @@ -5,70 +5,70 @@ using namespace std; PoolMem::PoolMem() { - // set indexes in bucket objects - auto idx = 0; - for (auto &b : breakPoints) - { - b.index = idx; - ++idx; - } - - // build the reverse lookup - once - auto bits = 0; - while (true) - { - const auto size = pow(bits, 2); - auto bucket = -1; - for (auto &b : breakPoints) - if (b.maxSize >= size) - { - bucket = b.index; - break; - } - bucketLookup.push_back(bucket == 0 ? 1 : bucket); - ++bits; - - if (size >= breakPoints.back().maxSize) - break; - } + // set indexes in bucket objects + auto idx = 0; + for (auto &b : breakPoints) + { + b.index = idx; + ++idx; + } + + // build the reverse lookup - once + auto bits = 0; + while (true) + { + const auto size = pow(bits, 2); + auto bucket = -1; + for (auto &b : breakPoints) + if (b.maxSize >= size) + { + bucket = b.index; + break; + } + bucketLookup.push_back(bucket == 0 ? 1 : bucket); + ++bits; + + if (size >= breakPoints.back().maxSize) + break; + } } void* PoolMem::getPtr(int64_t size) { - // give us the starting bucket for iteration - int64_t bucket = std::sqrt(size); + // give us the starting bucket for iteration + int64_t bucket = std::sqrt(size); - // will iterate through bucekts of matching sqrt until one fits or we hit the end. + // will iterate through bucekts of matching sqrt until one fits or we hit the end. // this will iteratate once or twice - while (bucket < bucketLookup.size() && size > breakPoints[bucketLookup[bucket]].maxSize) - ++bucket; - - // bucket index beyond lookup, so this is a non-pooled allocation - if (bucket >= bucketLookup.size()) - { - // this is a big allocation (outside our bucket sizes), so grab it from heap - const auto alloc = reinterpret_cast(new char[size + MemConstants::PoolMemHeaderSize]); - alloc->poolIndex = -1; // -1 = non-pooled - return alloc->data; - } - - // figure out which bucket size (if any) this allocation will fit - auto &mem = breakPoints[bucketLookup[bucket]]; - - csLock lock(mem.memLock); - - if (!mem.freed.empty()) - { - const auto alloc = mem.freed.back(); - mem.freed.pop_back(); - alloc->poolIndex = mem.index; - return alloc->data; - } + while (bucket < bucketLookup.size() && size > breakPoints[bucketLookup[bucket]].maxSize) + ++bucket; + + // bucket index beyond lookup, so this is a non-pooled allocation + if (bucket >= bucketLookup.size()) + { + // this is a big allocation (outside our bucket sizes), so grab it from heap + const auto alloc = reinterpret_cast(new char[size + MemConstants::PoolMemHeaderSize]); + alloc->poolIndex = -1; // -1 = non-pooled + return alloc->data; + } + + // figure out which bucket size (if any) this allocation will fit + auto &mem = breakPoints[bucketLookup[bucket]]; + + csLock lock(mem.memLock); + + if (!mem.freed.empty()) + { + const auto alloc = mem.freed.back(); + mem.freed.pop_back(); + alloc->poolIndex = mem.index; + return alloc->data; + } //reinterpret_cast(mem.heap.newPtr(mem.maxSize + MemConstants::PoolMemHeaderSize)); - const auto alloc = reinterpret_cast(new char[mem.maxSize + MemConstants::PoolMemHeaderSize]); - alloc->poolIndex = mem.index; - return alloc->data; + const auto alloc = reinterpret_cast(new char[mem.maxSize + MemConstants::PoolMemHeaderSize]); + alloc->poolIndex = mem.index; + return alloc->data; } void PoolMem::freePtr(void* ptr) @@ -79,18 +79,18 @@ void PoolMem::freePtr(void* ptr) return; // nice place for a breakpoint in debug // -1 means this was non-pooled so just delete it - if (alloc->poolIndex == -1) - { - delete[](static_cast(ptr) - MemConstants::PoolMemHeaderSize); - return; - } + if (alloc->poolIndex == -1) + { + delete[](static_cast(ptr) - MemConstants::PoolMemHeaderSize); + return; + } - auto& mem = breakPoints[alloc->poolIndex]; + auto& mem = breakPoints[alloc->poolIndex]; - csLock lock(mem.memLock); - - alloc->poolIndex = -2; - mem.freed.push_back(alloc); + csLock lock(mem.memLock); + + alloc->poolIndex = -2; + mem.freed.push_back(alloc); // if a pool gets to large, trim it back if (mem.freed.size() > MemConstants::CullSize) @@ -98,8 +98,8 @@ void PoolMem::freePtr(void* ptr) const auto cullTo = MemConstants::CullSize / 5; while (mem.freed.size() > cullTo) { - delete [] reinterpret_cast(mem.freed.back()); - mem.freed.pop_back(); + delete [] reinterpret_cast(mem.freed.back()); + mem.freed.pop_back(); } } } diff --git a/lib/sba/sba.h b/lib/sba/sba.h index 4980fca..41d4c5f 100644 --- a/lib/sba/sba.h +++ b/lib/sba/sba.h @@ -5,10 +5,10 @@ namespace MemConstants { - const int64_t PoolMemHeaderSize = 4; - const int PoolBuckets = 257; - const int PoolBucketOffset = 4; - const int PoolBucketAlign = 8; + const int64_t PoolMemHeaderSize = 4; + const int PoolBuckets = 257; + const int PoolBucketOffset = 4; + const int PoolBucketAlign = 8; const int CullSize = 10; } @@ -17,93 +17,93 @@ class PoolMem private: #pragma pack(push,1) - struct alloc_s - { - int32_t poolIndex; - char data[1]; - }; + struct alloc_s + { + int32_t poolIndex; + char data[1]; + }; #pragma pack(pop) - struct memory_s - { - CriticalSection memLock; - int32_t index{ 0 }; - const int64_t maxSize; - std::vector freed; + struct memory_s + { + CriticalSection memLock; + int32_t index{ 0 }; + const int64_t maxSize; + std::vector freed; - memory_s(const int64_t maxSize) : - maxSize(maxSize) - {} - }; + memory_s(const int64_t maxSize) : + maxSize(maxSize) + {} + }; - std::vector breakPoints = { - { 16 }, - { 20 }, - { 24 }, - { 28 }, - { 36 }, - { 52 }, - { 64 }, - { 100 }, - { 144 }, - { 256 }, - { 400 }, - { 576 }, - { 784 }, - { 1024 }, - { 1296 }, - { 1600 }, - { 1936 }, - { 2304 }, - { 2704 }, - { 3136 }, - { 3600 }, - { 4096 }, - { 4624 }, - { 5184 }, - { 5776 }, - { 6400 }, - { 7056 }, - { 7744 }, - { 9216 }, - { 10816 }, - { 12544 }, - { 14400 }, - { 16384 }, + std::vector breakPoints = { + { 16 }, + { 20 }, + { 24 }, + { 28 }, + { 36 }, + { 52 }, + { 64 }, + { 100 }, + { 144 }, + { 256 - MemConstants::PoolMemHeaderSize }, + { 400 }, + { 576 }, + { 784 }, + { 1024 - MemConstants::PoolMemHeaderSize }, + { 1296 }, + { 1600 }, + { 1936 }, + { 2304 }, + { 2704 }, + { 3136 }, + { 3600 }, + { 4096 - MemConstants::PoolMemHeaderSize }, + { 4624 }, + { 5184 }, + { 5776 }, + { 6400 }, + { 7056 }, + { 7744 }, + { 9216 }, + { 10816 }, + { 12544 }, + { 14400 }, + { 16384 }, /* { 18496 }, - { 20736 }, - { 23104 }, - { 25600 }, - { 28224 }, - { 30976 }, - { 33856 }, - { 36864 }, - { 40000 }, - { 43264 }, - { 46656 }, - { 50176 }, - { 53824 }, - { 57600 }, - { 61504 }, - { 65536 }, */ - }; + { 20736 }, + { 23104 }, + { 25600 }, + { 28224 }, + { 30976 }, + { 33856 }, + { 36864 }, + { 40000 }, + { 43264 }, + { 46656 }, + { 50176 }, + { 53824 }, + { 57600 }, + { 61504 }, + { 65536 }, */ + }; - std::vector bucketLookup; + std::vector bucketLookup; - PoolMem(); - ~PoolMem() = default; // we never clean anything up, this is forever. + PoolMem(); + ~PoolMem() = default; // we never clean anything up, this is forever. public: - // singleton - static PoolMem& getPool() - { - static PoolMem pool; - return pool; - } + // singleton + static PoolMem& getPool() + { + static PoolMem pool; + return pool; + } - void* getPtr(int64_t size); - void freePtr(void* ptr); + void* getPtr(int64_t size); + void freePtr(void* ptr); }; //extern PoolMem* POOL; diff --git a/src/attributes.cpp b/src/attributes.cpp index 68c16ff..3321670 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -32,7 +32,7 @@ IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) const auto attribute = Attributes::getMake(propIndex, value); auto bits = new IndexBits(); - bits->mount(attribute->index, attribute->ints, attribute->ofs, attribute->len, attribute->linId); + bits->mount(attribute->data); // cache these bits const auto [evictPropIndex, evictValue, evictBits] = indexCache.set(propIndex, value, bits); @@ -40,34 +40,11 @@ IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) // if anything got squeezed out compress it if (evictBits) { - const auto attrPair = propertyIndex.find({ evictPropIndex, evictValue }); - const auto evictAttribute = attrPair->second; - - int64_t compBytes = 0; // OUT value via reference - int64_t linId; - int32_t ofs, len; + const auto& attrPair = propertyIndex.find({ evictPropIndex, evictValue }); + const auto& evictAttribute = attrPair->second; // compress the data, get it back in a pool ptr - const auto compData = bits->store(compBytes, linId, ofs, len, table->indexCompression); - const auto destAttr = recast(PoolMem::getPool().getPtr(sizeof(Attr_s) + compBytes)); - - // copy header - memcpy(destAttr, evictAttribute, sizeof(Attr_s)); - if (compData) - { - memcpy(destAttr->index, compData, compBytes); - // return work buffer from bits.store to the pool - PoolMem::getPool().freePtr(compData); - } - - destAttr->ints = bits->ints;//(isList) ? 0 : bits.ints; - destAttr->comp = static_cast(compBytes); - destAttr->linId = linId; - destAttr->ofs = ofs; - destAttr->len = len; - - attrPair->second = destAttr; - PoolMem::getPool().freePtr(evictAttribute); + evictAttribute->data = bits->store(); delete evictBits; } @@ -391,7 +368,10 @@ void Attributes::serialize(HeapStack* mem) const auto blockHeader = recast(mem->newPtr(sizeof(serializedAttr_s))); // fill in the header - blockHeader->column = kv.first.index; + // + // TODO - copy the shizzle + +/* blockHeader->column = kv.first.index; blockHeader->hashValue = kv.first.value; blockHeader->ints = kv.second->ints; blockHeader->ofs = kv.second->ofs; @@ -421,7 +401,9 @@ void Attributes::serialize(HeapStack* mem) sizeof(serializedAttr_s) + blockHeader->textSize + blockHeader->compSize; + */ } + } int64_t Attributes::deserialize(char* mem) @@ -465,14 +447,8 @@ int64_t Attributes::deserialize(char* mem) // create an attr_s object const auto attr = recast(PoolMem::getPool().getPtr(sizeof(Attr_s) + blockHeader->compSize)); attr->text = blobPtr; - attr->ints = blockHeader->ints; - attr->ofs = blockHeader->ofs; - attr->len = blockHeader->len; - attr->comp = blockHeader->compSize; - attr->linId = blockHeader->linId; - - // copy the data in - memcpy(attr->index, dataPtr, blockHeader->compSize); + + // TODO - copy the data // add it to the index propertyIndex.emplace(attr_key_s{ blockHeader->column, blockHeader->hashValue }, attr); diff --git a/src/attributes.h b/src/attributes.h index 7ee90e7..1a951bf 100644 --- a/src/attributes.h +++ b/src/attributes.h @@ -82,14 +82,8 @@ namespace openset::db * each int32_t is a linear_id (linear user id). * */ - //Attr_changes_s* changeTail{ nullptr }; char* text{ nullptr }; - int32_t ints{ 0 }; // number of unsigned int64 integers uncompressed data uses - int32_t ofs{ 0 }; - int32_t len{ 0 }; - int32_t comp{ 0 }; // compressed size in bytes - int32_t linId{ -1 }; - char index[1]{ 0 }; // char* (1st byte) of packed index bits struct + char* data{ nullptr }; Attr_s() = default; }; diff --git a/src/indexbits.cpp b/src/indexbits.cpp index e0bf916..1856dd3 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -7,35 +7,115 @@ using namespace std; using namespace openset::db; +void IndexMemory::decompress(char* compressedData) +{ + if (!compressedData) + return; + + reset(); + + auto rawPage = reinterpret_cast(compressedData); + + while (rawPage) + { + const auto indexPage = getPageByPageIndex(rawPage->index, false); + LZ4_decompress_fast( + rawPage->compressedData, + reinterpret_cast(indexPage->bitArray), + IndexPageDataSize); + + // next block + rawPage = rawPage->next; + } +} + +char* IndexMemory::compress() +{ + const auto compBuffer = static_cast(PoolMem::getPool().getPtr(IndexPageDataSize + Overflow)); + + RawPageList newRawPages; + + auto pageIdx = -1; + for (auto indexPage : indexPages) + { + ++pageIdx; + + const auto rawPage = getRawPage(pageIdx); + + // we have no bits in this page (skip, and cleanup the old page) + if (!pagePopulation(indexPage)) + { + if (rawPage) + PoolMem::getPool().freePtr(rawPage); + continue; + } + + // use existing if we already have compressed version of this and nothing changed + if (rawPage) + { + if (!indexPage->dirty) + { + newRawPages.push_back(rawPage); + continue; + } + PoolMem::getPool().freePtr(rawPage); + } + + indexPage->dirty = false; + + const auto compressedSize = LZ4_compress_fast( + reinterpret_cast(indexPage->bitArray), + compBuffer, + IndexPageDataSize, + IndexPageDataSize + Overflow, + 5 + ); + + const auto newRawPage = static_cast(PoolMem::getPool().getPtr(CompPageHeaderSize + compressedSize)); + + newRawPage->index = pageIdx; + newRawPage->next = nullptr; + memcpy(newRawPage->compressedData, compBuffer, compressedSize); + + if (newRawPages.size()) + newRawPages.back()->next = newRawPage; + + newRawPages.push_back(newRawPage); + } + + PoolMem::getPool().freePtr(compBuffer); + + rawPages = std::move(newRawPages); + + if (rawPages.size()) + return reinterpret_cast(newRawPages.front()); + + return nullptr; +} + IndexBits::IndexBits() - : bits(nullptr), - ints(0), + : data(), placeHolder(false) {} // move constructor IndexBits::IndexBits(IndexBits&& source) noexcept + : data(std::move(source.data)) { - bits = source.bits; - ints = source.ints; placeHolder = source.placeHolder; - source.bits = nullptr; - source.ints = 0; source.placeHolder = false; } // copy constructor IndexBits::IndexBits(const IndexBits& source) - : bits(nullptr), - ints(0), + : data(data), placeHolder(false) { opCopy(source); } IndexBits::IndexBits(IndexBits* source) - : bits(nullptr), - ints(0), + : data(), placeHolder(false) { opCopy(*source); @@ -52,11 +132,8 @@ IndexBits& IndexBits::operator=(IndexBits&& other) noexcept if (this != &other) { reset(); - bits = other.bits; - ints = other.ints; + data = std::move(other.data); placeHolder = other.placeHolder; - other.bits = nullptr; - other.ints = 0; other.placeHolder = false; } return *this; @@ -66,16 +143,16 @@ IndexBits& IndexBits::operator=(IndexBits&& other) noexcept IndexBits& IndexBits::operator=(const IndexBits& other) { if (this != &other) - opCopy(other); + { + data = other.data; + placeHolder = other.placeHolder; + } return *this; } void IndexBits::reset() { - if (bits) - PoolMem::getPool().freePtr(bits); - bits = nullptr; - ints = 0; + data.reset(); placeHolder = false; } @@ -83,198 +160,54 @@ void IndexBits::makeBits(const int64_t index, const int state) { reset(); - const auto pos = index >> 6ULL; // divide by 8 + const auto lastInt = index / 64LL; - if (pos >= ints) // is our buffer big enough? - grow(pos + 1); + for (auto i = 0; i <= lastInt; ++i) + *data.getInt(i) = state ? 0xFFFFFFFFFFFFFFFF : 0x0; - memset(bits, (state) ? 0xff : 0x00, ints * 8); - - // if we are 1 filling these bits, we must - // set every bit after index to zero if (state) { // zero the rest of the bits in the last int64 - const auto lastBit = (pos + 1) * 64LL; + const auto lastBit = data.intCount() * 64LL; for (auto i = index; i < lastBit; i++) - this->bitClear(i); + bitClear(i); } } -void IndexBits::mount( - char* compressedData, - const int32_t integers, - const int32_t offset, - const int32_t length, - const int32_t linId) +void IndexBits::mount(char* compressedData) { reset(); - - if (!integers || linId >= 0) - { - ints = 1; // LZ4 compressor uses 9 bytes with a bit set with one INT - bits = cast(PoolMem::getPool().getPtr(8)); - - *bits = 0; - - if (linId >= 0) - bitSet(linId); - - return; - } - - const auto bytes = integers * sizeof(int64_t); - const auto output = cast(PoolMem::getPool().getPtr(bytes)); - memset(output, 0, bytes); - - assert(bytes); - - const int64_t offsetPtr = offset * 8; - const int32_t byteLength = length * 8; - - // TODO - check for int overflow here - const auto code = LZ4_decompress_fast(compressedData, output + offsetPtr, byteLength); - - assert(code > 0); - - ints = integers; - bits = recast(output); - - if (linId >= 0) - bitSet(linId); + data.decompress(compressedData); } -int64_t IndexBits::getSizeBytes() const +char* IndexBits::store() { - return ints * sizeof(int64_t); -} - -char* IndexBits::store(int64_t& compressedBytes, int64_t& linId, int32_t& offset, int32_t& length, const int compRatio) -{ - if (!ints) - grow(1); - - if (const auto pop = population(ints * 64); pop == 0) - { - linId = -1; - compressedBytes = 0; - offset = 0; - length = 0; - return nullptr; - } - else if (pop == 1) - { - linId = -1; - compressedBytes = 0; - offset = 0; - length = 0; - - linearIter(linId, ints * 64); - return nullptr; - } - - // find start - - auto idx = 0; - auto firstIdx = -1; - auto lastIdx = -1; - - while (idx < ints) - { - if (bits[idx]) - { - if (firstIdx == -1) - firstIdx = idx; - lastIdx = idx; - } - ++idx; - } - - offset = firstIdx; - length = (lastIdx - firstIdx) + 1; - - const auto maxBytes = LZ4_compressBound(length * sizeof(int64_t)); - const auto compressionBuffer = cast(PoolMem::getPool().getPtr(maxBytes)); - - //memset(compressionBuffer, 0, maxBytes); - - compressedBytes = LZ4_compress_fast( - recast(bits + offset), - compressionBuffer, - length * sizeof(int64_t), - maxBytes, - compRatio); - - linId = -1; - - return compressionBuffer; -} - -void IndexBits::grow(int64_t required, bool exact) -{ - if (ints >= required) - return; - - if (!exact) - required += ints > 100 ? (ints > 1000 ? 128 : 64) : 32; - - const auto bytes = required * sizeof(uint64_t); - const auto write = cast(PoolMem::getPool().getPtr(bytes)); - - memset(write, 0, bytes); - - if (bits) - { - const auto read = recast(bits); - - // copy the old bytes over - memcpy(write, read, ints * sizeof(uint64_t)); - - // release the old buffer - PoolMem::getPool().freePtr(read); - } - - // make active - bits = recast(write); - ints = required; + return data.compress(); } void IndexBits::bitSet(const int64_t index) { - const int64_t pos = index >> 6ULL; // divide by 8 - - if (pos >= ints) // is our buffer big enough? - grow(pos + 1, false); - - bits[pos] |= BITMASK[index & 63ULL]; // mod 64 + const auto bits = data.getBitInt(index); + *bits |= BITMASK[index & 63ULL]; // mod 64 + data.setDirty(); } -void IndexBits::lastBit(const int64_t index) +void IndexBits::setSizeByBit(const int64_t index) { - const int64_t pos = index >> 6ULL; // divide by 8 - - if (pos >= ints) // is our buffer big enough? - grow(pos + 1, false); + data.getBitInt(index); } void IndexBits::bitClear(const int64_t index) { - const int64_t pos = index >> 6ULL; // divide by 8 - - if (pos >= ints) // is our buffer big enough? - grow(pos + 1, false); - - bits[pos] &= ~(BITMASK[index & 63ULL]); // mod 64 + const auto bits = data.getBitInt(index); + *bits &= ~(BITMASK[index & 63ULL]); // mod 64 + data.setDirty(); } -bool IndexBits::bitState(const int64_t index) const +bool IndexBits::bitState(const int64_t index) { - const int64_t pos = index >> 6ULL; // divide by 8 - - if (pos >= ints) // is our buffer big enough? - return false; - - return (bits[pos] & BITMASK[index & 63ULL]); + const auto bits = data.getBitInt(index); + return ((*bits) & BITMASK[index & 63ULL]); } /* @@ -290,44 +223,27 @@ bool IndexBits::bitState(const int64_t index) const but NOT operations will not the whole buffer, and this will result in incorrect counts. */ -int64_t IndexBits::population(int stopBit) const +int64_t IndexBits::population(const int64_t stopBit) { - if (!bits || !ints) - return 0; - int64_t count = 0; - auto pSource = bits; // truncates to the one we want - int64_t lastInt = stopBit / 64LL; - - // The stopBit might be beyond the end - // if the 'ints' buffer. In which case - // we will set lastInt to the size of ints - // and stopBit to the very last bit (which - // will stop it from entering the dangling - // bits loop) - - if (static_cast(stopBit / 64) > ints) - { - lastInt = ints; - stopBit = lastInt * 64; - } - - const auto pEnd = pSource + lastInt; + const auto lastInt = stopBit / 64LL; + int64_t idx = 0; - while (pSource < pEnd) + while (idx < lastInt) { + const auto value = data.getInt(idx); #ifdef _MSC_VER - count += __popcnt64(*pSource); + count += __popcnt64(*value); #else - count += __builtin_popcountll(*pSource); + count += __builtin_popcountll(*value); #endif - ++pSource; + ++idx; } // count any dangling single bits - for (auto idx = lastInt * 64; idx < stopBit; ++idx) + for (idx = lastInt * 64LL; idx < stopBit; ++idx) count += bitState(idx) ? 1 : 0; return count; @@ -336,12 +252,9 @@ int64_t IndexBits::population(int stopBit) const void IndexBits::opCopy(const IndexBits& source) { reset(); - grow(source.ints); - - if (source.ints && source.bits) - memcpy(bits, source.bits, source.ints * sizeof(int64_t)); - + data = source.data; placeHolder = source.placeHolder; + data.setDirtyAllPages(); } void IndexBits::opCopyNot(IndexBits& source) @@ -355,23 +268,15 @@ void IndexBits::opAnd(IndexBits& source) if (placeHolder || source.placeHolder) return; - if (!source.ints) - return; - - if (source.ints > ints) - grow(source.ints); - else if (source.ints < ints) - source.grow(ints); - - volatile auto pSource = source.bits; - volatile auto pDest = bits; - const volatile auto pEnd = source.bits + source.ints; + auto index = 0; + const auto end = source.data.intCount(); - while (pSource < pEnd) + while (index < end) { - *pDest = ((*pDest) & (*pSource)); - ++pSource; - ++pDest; + const auto dest = data.getInt(index); + *dest &= *source.data.getInt(index); + data.setDirty(); + ++index; } } @@ -380,23 +285,15 @@ void IndexBits::opOr(IndexBits& source) if (placeHolder || source.placeHolder) return; - if (!source.ints) - return; - - if (source.ints > ints) - grow(source.ints); - else if (source.ints < ints) - source.grow(ints); + auto index = 0; + const auto end = source.data.intCount(); - volatile auto pSource = source.bits; - volatile auto pDest = bits; - const volatile auto pEnd = source.bits + source.ints; - - while (pSource < pEnd) + while (index < end) { - *pDest = ((*pDest) | (*pSource)); - ++pSource; - ++pDest; + const auto dest = data.getInt(index); + *dest |= *source.data.getInt(index); + data.setDirty(); + ++index; } } @@ -405,71 +302,35 @@ void IndexBits::opAndNot(IndexBits& source) if (placeHolder || source.placeHolder) return; - if (!source.ints) - return; + auto index = 0; + const auto end = source.data.intCount(); - if (source.ints > ints) - grow(source.ints); - else if (source.ints < ints) - source.grow(ints); - - volatile auto pSource = source.bits; - volatile auto pDest = bits; - const volatile auto pEnd = source.bits + source.ints; - - while (pSource < pEnd) + while (index < end) { - *pDest = ((*pDest) & (~(*pSource))); - ++pSource; - ++pDest; + const auto dest = data.getInt(index); + *dest = *dest & ~(*source.data.getInt(index)); + data.setDirty(); + ++index; } } -void IndexBits::opNot() const +void IndexBits::opNot() { if (placeHolder) return; - if (!ints || !bits) - return; - - volatile auto pSource = bits; - const volatile auto pEnd = bits + ints; + auto index = 0; + const auto end = data.intCount(); - while (pSource < pEnd) + while (index < end) { - *pSource = (~(*pSource)); - ++pSource; + const auto dest = data.getInt(index); + *dest = ~(*dest); + data.setDirty(); + ++index; } } -string IndexBits::debugBits(const IndexBits& bits, int limit) -{ - string result; - auto counter = 0; - for (auto i = 0; i < bits.ints; i++) - { - auto i64 = bits.bits[i]; - for (auto b = 0; b < 64; b++) - { - if (i64 & 1) - result += '1'; - else - result += '0'; - - if (b % 8 == 7) - result += ' '; - - i64 = i64 >> 1; - - ++counter; - if (counter == limit) - return result; - } - } - return result; -} - /* linearIter(int64_t &linId, int stopBit) @@ -484,32 +345,34 @@ return true if a new linear id is found. recommend using in a while loop. */ -bool IndexBits::linearIter(int64_t& linId, const int64_t stopBit) const +bool IndexBits::linearIter(int64_t& linId, const int64_t stopBit) { ++linId; + const auto count = data.intCount(); auto currentInt = linId / 64LL; - while (currentInt < ints) + while (currentInt < count) { - if (bits[currentInt]) + const auto value = data.getInt(currentInt); + + if (*value) { - const int64_t bitNumber = linId % 64; + const auto bitNumber = linId % 64LL; - //if (bitIndex >= stopBit) if (linId >= stopBit) return false; - for (auto i = bitNumber; i < 64LL; i++) + for (auto i = bitNumber; i < 64LL; ++i) { - if (bits[currentInt] & BITMASK[i]) + if (*value & BITMASK[i]) { linId = (currentInt * 64LL) + i; return true; } } } - currentInt++; + ++currentInt; linId = (currentInt * 64); } diff --git a/src/indexbits.h b/src/indexbits.h index 5e77614..42eab49 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -1,16 +1,234 @@ #pragma once +#include + #include "common.h" +#include "sba/sba.h" +#include namespace openset { namespace db { + const int64_t BitArraySize = 510; + + struct IndexPageMemory_s + { + bool dirty { false }; + bool empty { true }; + // 4096 bytes + int64_t bitArray[BitArraySize]; + }; + + const int64_t IndexPageRecordSize = sizeof(IndexPageMemory_s); + const int64_t IndexPageDataSize = sizeof(uint64_t) * BitArraySize; + const int64_t IndexBitsPerPage = BitArraySize * 64; + const int64_t Overflow = 64; + +#pragma pack(push,1) + struct CompPageMemory_s + { + int index { 0 }; + CompPageMemory_s* next { nullptr }; + char compressedData[IndexPageDataSize]; + }; +#pragma pack(pop) + + const int64_t CompPageHeaderSize = sizeof(int) + sizeof(CompPageMemory_s*); + + class IndexMemory + { + using IndexPageList = std::vector; + using RawPageList = std::vector; + + IndexPageList indexPages; + RawPageList rawPages; + + IndexPageMemory_s* lastIndex { nullptr }; + + public: + + IndexMemory() = default; + + IndexMemory(IndexMemory&& source) noexcept + { + lastIndex = source.lastIndex; + indexPages = std::move(source.indexPages); + rawPages = std::move(source.rawPages); + + source.indexPages.clear(); + source.rawPages.clear(); + } + + IndexMemory(const IndexMemory& source) + { + // raw pages are not copied + lastIndex = source.lastIndex; + + for (auto sourcePage : source.indexPages) + { + const auto page = reinterpret_cast(PoolMem::getPool().getPtr(IndexPageRecordSize)); + memcpy(page, sourcePage, IndexPageRecordSize); + indexPages.push_back(page); + } + } + + IndexMemory(IndexMemory* source) + { + // raw pages are not copied + lastIndex = source->lastIndex; + + for (auto sourcePage : source->indexPages) + { + const auto page = reinterpret_cast(PoolMem::getPool().getPtr(IndexPageRecordSize)); + memcpy(page, sourcePage, IndexPageRecordSize); + indexPages.push_back(page); + } + } + + IndexMemory& operator=(IndexMemory&& source) noexcept + { + lastIndex = source.lastIndex; + indexPages = std::move(source.indexPages); + rawPages = std::move(source.rawPages); + + return *this; + } + + IndexMemory& operator=(const IndexMemory& source) + { + // raw pages are not copied + reset(); + lastIndex = source.lastIndex; + + for (auto sourcePage : source.indexPages) + { + const auto page = reinterpret_cast(PoolMem::getPool().getPtr(IndexPageRecordSize)); + memcpy(page, sourcePage, IndexPageRecordSize); + indexPages.push_back(page); + } + + return *this; + } + + ~IndexMemory() + { + reset(); + } + + void reset() + { + for (auto page : indexPages) + PoolMem::getPool().freePtr(page); + indexPages.clear(); + rawPages.clear(); + lastIndex = nullptr; + } + + int64_t intCount() const + { + return BitArraySize * static_cast(indexPages.size()); + } + + int64_t* getBitInt(const int64_t bitIndex) + { + const auto page = getPage(bitIndex); + lastIndex = page; + const auto intIndex = (bitIndex / 64LL) % BitArraySize; // convert bit index into int64 index + + return page->bitArray + intIndex; + } + + int64_t* getInt(const int64_t intIndex) + { + const auto page = getPage(intIndex * 64LL); + lastIndex = page; + const auto indexInPage = intIndex % BitArraySize; + + return page->bitArray + indexInPage; + } + + void setDirty() const + { + if (lastIndex) + lastIndex->dirty = true; + } + + void setDirtyAllPages() + { + for (const auto page : indexPages) + page->dirty = true; + } + + IndexPageMemory_s* getPage(const int64_t bitIndex) + { + const auto pageIndex = bitIndex / IndexBitsPerPage; // convert bit index into page in dex + + while (pageIndex >= static_cast(indexPages.size())) + { + const auto page = reinterpret_cast(PoolMem::getPool().getPtr(IndexPageRecordSize)); + memset(page->bitArray, 0, IndexPageDataSize); + indexPages.push_back(page); + } + + return indexPages.at(pageIndex); + } + + IndexPageMemory_s* getPageByPageIndex(const int64_t pageIndex, const bool clean = true) + { + while (pageIndex >= static_cast(indexPages.size())) + { + const auto page = reinterpret_cast(PoolMem::getPool().getPtr(IndexPageRecordSize)); + if (clean) + memset(page->bitArray, 0, IndexPageDataSize); + indexPages.push_back(page); + } + + return indexPages.at(pageIndex); + } + + CompPageMemory_s* getRawPage(const int pageIndex) + { + for (auto page : rawPages) + { + if (page->index > pageIndex) + break; + if (page->index == pageIndex) + return page; + } + + return nullptr; + } + + static int pagePopulation(IndexPageMemory_s* page) + { + auto source = static_cast(page->bitArray); + const auto end = source + BitArraySize; + + int64_t pop = 0; + + while (source < end) + { + #ifdef _MSC_VER + pop += __popcnt64(*source); + #else + pop += __builtin_popcountll(*source); + #endif + ++source; + } + + return static_cast(pop); + } + + void decompress(char* compressedData); + char* compress(); + }; + + class IndexBits { public: - uint64_t* bits; - int32_t ints; // length in int64's + IndexMemory data; bool placeHolder; IndexBits(); @@ -30,85 +248,34 @@ namespace openset // takes buffer to compressed data and actual size as parameters // note: actual size is number of long longs (in64_t) - void mount(char* compressedData, int32_t integers, int32_t offset, int32_t length, int32_t linId); - - int64_t getSizeBytes() const; + void mount(char* compressedData); // returns a POOL buffer ptr, and the number of bytes - char* store(int64_t& compressedBytes, int64_t& linId, int32_t& offset, int32_t& length, int compRatio = 1); + char* store(); - void grow(int64_t required, bool exact = true); - - void lastBit(int64_t index); + void setSizeByBit(int64_t index); void bitSet(int64_t index); void bitClear(int64_t index); - bool bitState(int64_t index) const; + bool bitState(int64_t index); - int64_t population(int stopBit) const; + int64_t population(const int64_t stopBit); void opCopy(const IndexBits& source); void opCopyNot(IndexBits& source); void opAnd(IndexBits& source); void opOr(IndexBits& source); void opAndNot(IndexBits& source); - void opNot() const; - - bool linearIter(int64_t& linId, int64_t stopBit) const; - - class BitProxy - { - public: - IndexBits* bits; - int idx; - int value; - - BitProxy(IndexBits* bits, const int idx) - : bits(bits), - idx(idx) - { - value = bits->bitState(idx); - } - - ~BitProxy() - { - cout << "destroyed" << endl; - } - - void operator=(const int rhs) - { - value = rhs; - if (rhs) - bits->bitSet(idx); - else - bits->bitClear(idx); - } - - operator int() const - { - return value; - } - }; - - static string debugBits(const IndexBits& bits, int limit = 64); - - BitProxy operator[](const int idx) - { - return BitProxy(this, idx); - } + void opNot(); - friend std::ostream& operator<<(std::ostream& os, const IndexBits& source) - { - os << debugBits(source, static_cast(os.width() ? os.width() : 128)); - return os; - } + bool linearIter(int64_t& linId, int64_t stopBit); }; class IndexLRU { using Key = std::pair; - using Value = std::pair::iterator>; + using Value = std::pair::iterator>; - list items; + std::list items; unordered_map keyValuesMap; int cacheSize; @@ -121,26 +288,17 @@ namespace openset { const Key key(propIndex, value); - if (const auto iter = keyValuesMap.find(key); iter == keyValuesMap.end()) - { - items.push_front(key); + items.push_front(key); - const Value listMap(bits, items.begin()); - keyValuesMap[key] = listMap; + const Value listMap(bits, items.begin()); + keyValuesMap[key] = listMap; - if (keyValuesMap.size() > cacheSize) { - const auto evicted = keyValuesMap[items.back()].first; - keyValuesMap.erase(items.back()); - items.pop_back(); - return {key.first, key.second, evicted}; - } - } - else - { - items.erase(iter->second.second); - items.push_front(key); - const Value listMap(bits, items.begin()); - keyValuesMap[key] = listMap; + if (keyValuesMap.size() > cacheSize) { + const auto evictedKey = items.back(); + const auto evicted = keyValuesMap[items.back()].first; + keyValuesMap.erase(items.back()); + items.pop_back(); + return {evictedKey.first, evictedKey.second, evicted}; } return {0,0,0}; @@ -151,17 +309,14 @@ namespace openset const Key key(propIndex, value); - if (auto iter = keyValuesMap.find(key); iter == keyValuesMap.end()) - { - return nullptr; - } - else + if (auto &iter = keyValuesMap.find(key); iter != keyValuesMap.end()) { items.erase(iter->second.second); items.push_front(key); - keyValuesMap[key] = { iter->second.first, items.begin() }; + iter->second.second = items.begin(); return iter->second.first; } + return nullptr; } }; }; diff --git a/src/oloop_property.cpp b/src/oloop_property.cpp index bc3c59d..d338fa0 100644 --- a/src/oloop_property.cpp +++ b/src/oloop_property.cpp @@ -130,7 +130,6 @@ void OpenLoopProperty::prepare() auto bits = allBits; bits->opAnd(*s); aggs->columns[idx].value = bits->population(stopBit); - delete bits; ++idx; } diff --git a/src/oloop_segment.cpp b/src/oloop_segment.cpp index 85c31ee..0e4a301 100644 --- a/src/oloop_segment.cpp +++ b/src/oloop_segment.cpp @@ -269,7 +269,6 @@ bool OpenLoopSegment::run() // get a fresh pointer to bits on each entry in case they left the LRU maxLinearId = parts->people.customerCount(); - segmentName = macroIter->first; interpreter->setBits(parts->getBits(segmentName), maxLinearId); while (true) @@ -278,7 +277,10 @@ bool OpenLoopSegment::run() return true; // let some other cells run if (!interpreter) + { + suicide(); return false; + } // if there was an error, exit if (interpreter->error.inError()) diff --git a/src/queryindexing.cpp b/src/queryindexing.cpp index aa19b99..19ab076 100644 --- a/src/queryindexing.cpp +++ b/src/queryindexing.cpp @@ -105,7 +105,7 @@ openset::db::IndexBits Indexing::compositeBits(Attributes::listMode_e mode) if (negate) { - resultBits.grow((stopBit / 64) + 1); // grow it to it's fullest size before we flip them all + resultBits.setSizeByBit(stopBit); // grow it to it's fullest size before we flip them all resultBits.opNot(); } @@ -239,12 +239,14 @@ IndexBits Indexing::buildIndex(HintOpList &index, bool countable) { IndexBits bits; bits.makeBits(maxLinId, 1); + cout << bits.population(maxLinId) << ":" << maxLinId << ":" << stopBit << endl; + countable = false; return bits; } auto res = stack.back().bits; - res.grow((stopBit / 64) + 1); + cout << res.population(stopBit) << ":" << stopBit << endl; + res.setSizeByBit(stopBit); return res; - } diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index c5fe8c1..52f4c2e 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -3067,7 +3067,7 @@ void openset::query::Interpreter::setBits(IndexBits* indexBits, const int maxPop { bits = indexBits; maxBitPop = maxPopulation; - bits->lastBit(maxBitPop); + bits->setSizeByBit(maxBitPop); } void openset::query::Interpreter::setCompareSegments(IndexBits* querySegment, std::vector segments) diff --git a/src/rpc_insert.cpp b/src/rpc_insert.cpp index cb20661..38620a0 100644 --- a/src/rpc_insert.cpp +++ b/src/rpc_insert.cpp @@ -225,7 +225,7 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa thread work([=]() { while (SideLog::getSideLog().getLogSize() > 25000) - ThreadSleep(55); + ThreadSleep(5); message->reply(http::StatusCode::success_ok, response); }); diff --git a/src/tablepartitioned.cpp b/src/tablepartitioned.cpp index f3f9b57..4e36833 100644 --- a/src/tablepartitioned.cpp +++ b/src/tablepartitioned.cpp @@ -202,7 +202,7 @@ std::function TablePartitioned::g // if there are no bits with this name created in this query // then look in the index const auto bits = this->attributes.getBits(PROP_SEGMENT, MakeHash(segmentName)); - deleteAfterUsing = true; + deleteAfterUsing = false; return bits; }; diff --git a/src/ver.h b/src/ver.h index ba0ef9a..5838e9d 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.4" +"0.4.5" ; \ No newline at end of file From cf8214e61257f0fc05e0af98107737baa45b8835 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 25 Nov 2019 03:00:40 -0500 Subject: [PATCH 09/31] new memory management for index bits, LRU for bits --- src/indexbits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/indexbits.h b/src/indexbits.h index 42eab49..86a377c 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -309,7 +309,7 @@ namespace openset const Key key(propIndex, value); - if (auto &iter = keyValuesMap.find(key); iter != keyValuesMap.end()) + if (auto iter = keyValuesMap.find(key); iter != keyValuesMap.end()) { items.erase(iter->second.second); items.push_front(key); From f32b56ccf84cb3b3352bc744d725e9bc6561d92d Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 25 Nov 2019 03:17:24 -0500 Subject: [PATCH 10/31] added includes for linux/gcc --- lib/mem/blhash.h | 2 ++ src/attributes.cpp | 9 +++++---- src/attributes.h | 1 + src/common.h | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/mem/blhash.h b/lib/mem/blhash.h index bb57f60..a2bac8a 100644 --- a/lib/mem/blhash.h +++ b/lib/mem/blhash.h @@ -24,8 +24,10 @@ THE SOFTWARE. #include #include +#include #include #include +#include #include "../heapstack/heapstack.h" typedef uint16_t tBranch; diff --git a/src/attributes.cpp b/src/attributes.cpp index 3321670..f53c501 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -57,7 +57,7 @@ void Attributes::addChange(const int64_t customerId, const int32_t propIndex, co if (propIndex == PROP_STAMP || propIndex == PROP_UUID || propIndex == PROP_SESSION) return; - const auto key = attr_key_s{ propIndex, value }; + const auto key = attr_key_s( propIndex, value ); if (state) customerIndexing.insert(propIndex, customerId, linearId, value); @@ -76,7 +76,8 @@ void Attributes::addChange(const int64_t customerId, const int32_t propIndex, co Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) { - if (auto& res = propertyIndex.emplace(attr_key_s{ propIndex, value }, nullptr); res.second == true) + auto key = attr_key_s( propIndex, value ); + if (auto& res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); res.first->second = attr; @@ -102,9 +103,9 @@ Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) Attr_s* Attributes::getMake(const int32_t propIndex, const string& value) { - const auto valueHash = MakeHash(value); + auto key = attr_key_s( propIndex, MakeHash(value) ); - if (auto& res = propertyIndex.emplace(attr_key_s{ propIndex, valueHash }, nullptr); res.second == true) + if (auto& res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); attr->text = blob->storeValue(propIndex, value); diff --git a/src/attributes.h b/src/attributes.h index 1a951bf..dce35e9 100644 --- a/src/attributes.h +++ b/src/attributes.h @@ -1,5 +1,6 @@ #pragma once +#include "common.h" #include //#include "mem/bigring.h" #include "mem/blhash.h" diff --git a/src/common.h b/src/common.h index fd0bd88..b85b562 100644 --- a/src/common.h +++ b/src/common.h @@ -1,7 +1,7 @@ #pragma once #include "logger.h" - +#include #include #include #include From e6e7d682903e4c74858d35d9ea5b88147f02b2e7 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 25 Nov 2019 22:34:39 -0500 Subject: [PATCH 11/31] std::pair hash fix, asc/desc sorts by values and basic sort --- CMakeLists.txt | 2 + lib/mem/blhash.h | 39 ++++++- numericCustomerIds | 0 src/attributes.cpp | 188 ++---------------------------- src/common.cpp | 2 - src/common.h | 28 ++++- src/customer_index.cpp | 3 +- src/customer_index.h | 8 +- src/indexbits.cpp | 8 +- src/indexbits.h | 33 +++--- src/oloop_customer_basic.cpp | 214 +++++++++++++++++++++++++++++++++++ src/oloop_customer_basic.h | 66 +++++++++++ src/oloop_customer_list.cpp | 95 ++++++++++------ src/oloop_customer_list.h | 2 + src/oloop_histogram.cpp | 2 +- src/oloop_insert.cpp | 12 +- src/oloop_property.cpp | 20 ++-- src/oloop_property.h | 4 +- src/oloop_query.cpp | 2 +- src/oloop_seg_refresh.cpp | 10 +- src/oloop_seg_refresh.h | 1 - src/oloop_segment.cpp | 23 ++-- src/queryindexing.cpp | 3 - src/queryinterpreter.cpp | 1 + src/queryparserosl.h | 2 +- src/rpc_query.cpp | 99 +++++++++++----- src/tablepartitioned.cpp | 31 ++--- src/tablepartitioned.h | 12 +- table- | 0 29 files changed, 569 insertions(+), 341 deletions(-) create mode 100644 numericCustomerIds create mode 100644 src/oloop_customer_basic.cpp create mode 100644 src/oloop_customer_basic.h create mode 100644 table- diff --git a/CMakeLists.txt b/CMakeLists.txt index e1a46c7..566d3d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,6 +140,8 @@ set(SOURCE_FILES src/oloop_cleaner.h src/oloop_customer.cpp src/oloop_customer.h + src/oloop_customer_basic.cpp + src/oloop_customer_basic.h src/oloop_customer_list.cpp src/oloop_customer_list.h src/oloop_histogram.cpp diff --git a/lib/mem/blhash.h b/lib/mem/blhash.h index a2bac8a..bc51b17 100644 --- a/lib/mem/blhash.h +++ b/lib/mem/blhash.h @@ -361,7 +361,7 @@ class BinaryListHash }; - HashVector& serialize(int limit, FilterCB filterCallBack) + HashVector& serialize(bool descending, int limit, FilterCB filterCallBack) { tKey key; serializeOver.set(&key); @@ -372,14 +372,17 @@ class BinaryListHash serializeLimit = limit; serializeCB = filterCallBack; - serializeRecurse(root, 0); + if (descending) + serializeRecurseDescending(root, 0); + else + serializeRecurseAscending(root, 0); return serializeList; } private: - void serializeRecurse(bl_array_s* node, int depth) + void serializeRecurseAscending(bl_array_s* node, int depth) { for (auto idx = 0; idx < node->used; ++idx) { @@ -403,10 +406,38 @@ class BinaryListHash } else { - serializeRecurse(reinterpret_cast(node->nodes[idx].next), depth + 1); + serializeRecurseAscending(reinterpret_cast(node->nodes[idx].next), depth + 1); } } + } + void serializeRecurseDescending(bl_array_s* node, int depth) + { + for (auto idx = node->used - 1; idx >= 0; --idx) + { + if (serializeLimit == -1) + return; + + serializeOver.words[serializeOver.elements - 1 - depth] = node->nodes[idx].valueWord; + + if (depth == serializeOver.elements - 1) + { + if (//serializeOver > serializeStart && + serializeCB(serializeOver.getKeyPtr(), reinterpret_cast(&node->nodes[idx].next))) + { + serializeList.emplace_back(*serializeOver.getKeyPtr(), *reinterpret_cast(&node->nodes[idx].next)); + if (serializeList.size() == serializeLimit) + { + serializeLimit = -1; + return; + } + } + } + else + { + serializeRecurseDescending(reinterpret_cast(node->nodes[idx].next), depth + 1); + } + } } // this is a fairly common binary search. Google will find you serveral diff --git a/numericCustomerIds b/numericCustomerIds new file mode 100644 index 0000000..e69de29 diff --git a/src/attributes.cpp b/src/attributes.cpp index f53c501..4a952e4 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -40,7 +40,7 @@ IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) // if anything got squeezed out compress it if (evictBits) { - const auto& attrPair = propertyIndex.find({ evictPropIndex, evictValue }); + const auto& attrPair = propertyIndex.find({ static_cast(evictPropIndex), evictValue }); const auto& evictAttribute = attrPair->second; // compress the data, get it back in a pool ptr @@ -75,9 +75,9 @@ void Attributes::addChange(const int64_t customerId, const int32_t propIndex, co Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) { - auto key = attr_key_s( propIndex, value ); - if (auto& res = propertyIndex.emplace(key, nullptr); res.second == true) + + if (auto res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); res.first->second = attr; @@ -87,25 +87,13 @@ Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) { return res.first->second; } - /* - if (auto attrPair = propertyIndex.find({ propIndex, value }); attrPair == propertyIndex.end()) - { - const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); - propertyIndex.emplace(attr_key_s{ propIndex, value }, attr); - return attr; - } - else - { - return attrPair->second; - } - */ } Attr_s* Attributes::getMake(const int32_t propIndex, const string& value) { auto key = attr_key_s( propIndex, MakeHash(value) ); - if (auto& res = propertyIndex.emplace(key, nullptr); res.second == true) + if (auto res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); attr->text = blob->storeValue(propIndex, value); @@ -146,124 +134,21 @@ void Attributes::setDirty(const int64_t customerId, const int32_t linId, const i void Attributes::clearDirty() { - //IndexBits bits; - for (auto& change : changeIndex) { - getMake(change.first.index, change.first.value); - - //if (attrPair == propertyIndex.end() || !attrPair->second) - // continue; - - auto bits = getBits(change.first.index, change.first.value); + const auto bits = getBits(change.first.index, change.first.value); - for (const auto& t : change.second) + for (const auto t : change.second) { if (t.state) bits->bitSet(t.linId); else bits->bitClear(t.linId); } - - // TODO - check for non-existent prop. - - - /* - const auto attr = attrPair->second; - - bits.mount(attr->index, attr->ints, attr->ofs, attr->len, attr->linId); - - for (const auto& t : change.second) - { - if (t.state) - bits.bitSet(t.linId); - else - bits.bitClear(t.linId); - } - - if (!bits.population(bits.ints * 64)) //pop count zero? remove this - { - drop(change.first.index, change.first.value ); - PoolMem::getPool().freePtr(attr); - } - else - { - int64_t compBytes = 0; // OUT value via reference - int64_t linId; - int32_t ofs, len; - - // compress the data, get it back in a pool ptr - const auto compData = bits.store(compBytes, linId, ofs, len, table->indexCompression); - const auto destAttr = recast(PoolMem::getPool().getPtr(sizeof(Attr_s) + compBytes)); - - // copy header - memcpy(destAttr, attr, sizeof(Attr_s)); - if (compData) - { - memcpy(destAttr->index, compData, compBytes); - // return work buffer from bits.store to the pool - PoolMem::getPool().freePtr(compData); - } - - destAttr->ints = bits.ints;//(isList) ? 0 : bits.ints; - destAttr->comp = static_cast(compBytes); - destAttr->linId = linId; - destAttr->ofs = ofs; - destAttr->len = len; - - // if we made a new destination, we have to update the - // index to point to it, and free the old one up. - // update the Attr pointer directly in the index - attrPair->second = destAttr; - PoolMem::getPool().freePtr(attr); - } - */ - } - changeIndex.clear(); -} - -/* -void Attributes::swap(const int32_t propIndex, const int64_t value, IndexBits* newBits) -{ - auto attrPair = propertyIndex.find(attr_key_s{ propIndex, value }); - - if (attrPair == propertyIndex.end()) - return; - - const auto attr = attrPair->second; - - int64_t compBytes = 0; // OUT value - int64_t linId = -1; - int32_t len, ofs; - - // compress the data, get it back in a pool ptr, size returned in compBytes - const auto compData = newBits->store(compBytes, linId, ofs, len); - auto destAttr = recast(PoolMem::getPool().getPtr(sizeof(Attr_s) + compBytes)); - - // copy header - memcpy(destAttr, attr, sizeof(Attr_s)); - if (compData) - { - memcpy(destAttr->index, compData, compBytes); - // return work buffer from bits.store to the pool - PoolMem::getPool().freePtr(compData); } - destAttr->text = attr->text; - destAttr->ints = (compBytes) ? newBits->ints: 0;//asList ? 0 : newBits->ints; - destAttr->comp = static_cast(compBytes); // TODO - check for overflow - destAttr->linId = linId; - destAttr->ofs = ofs; - destAttr->len = len; - - // if we made a new destination, we have to update the - // index to point to it, and free the old one up. - propertyIndex.insert({attr_key_s{ propIndex, value }, destAttr}); - - // FIX - memory leak - PoolMem::getPool().freePtr(attr); + changeIndex.clear(); } -*/ AttributeBlob* Attributes::getBlob() const { @@ -294,10 +179,6 @@ Attributes::AttrList Attributes::getPropertyValues(const int32_t propIndex, cons if (const auto tAttr = get(propIndex, value); tAttr) result.emplace_back(propIndex, value); return result; - //case listMode_e::PRESENT_FAST: // fast for reducing set in `!= nil` test - // if (const auto tAttr = get(propIndex, NONE); tAttr) - // result.push_back(tAttr); - // return result; default: ; } @@ -352,58 +233,11 @@ void Attributes::serialize(HeapStack* mem) const auto sectionLength = recast(mem->newPtr(sizeof(int64_t))); (*sectionLength) = 0; - for (auto& kv : propertyIndex) - { - /* STL ugliness - I wish they let you alias these names somehow - * - * kv.first is property and value - * kv.second is Attr_s* - * - * so - * - * kv.first.first is property - * kv.first.second is value - */ - + //for (auto& kv : propertyIndex) + //{ // add a header to the HeapStack - const auto blockHeader = recast(mem->newPtr(sizeof(serializedAttr_s))); - - // fill in the header - // - // TODO - copy the shizzle - -/* blockHeader->column = kv.first.index; - blockHeader->hashValue = kv.first.value; - blockHeader->ints = kv.second->ints; - blockHeader->ofs = kv.second->ofs; - blockHeader->len = kv.second->len; - blockHeader->linId = kv.second->linId; - const auto text = this->blob->getValue(kv.first.index, kv.first.value); - blockHeader->textSize = text ? strlen(text) : 0; - //blockHeader->textSize = item.second->text ? strlen(item.second->text) : 0; - blockHeader->compSize = kv.second->comp; - - // copy a text/blob value if any - if (blockHeader->textSize) - { - const auto textData = recast(mem->newPtr(blockHeader->textSize)); - memcpy(textData, text, blockHeader->textSize); - //memcpy(textData, item.second->text, blockHeader->textSize); - } - - // copy the compressed data - if (blockHeader->compSize) - { - const auto blockData = recast(mem->newPtr(blockHeader->compSize)); - memcpy(blockData, kv.second->index, blockHeader->compSize); - } - - (*sectionLength) += - sizeof(serializedAttr_s) + - blockHeader->textSize + - blockHeader->compSize; - */ - } + //const auto blockHeader = recast(mem->newPtr(sizeof(serializedAttr_s))); + //} } diff --git a/src/common.cpp b/src/common.cpp index a902550..e87cac5 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -12,8 +12,6 @@ int64_t Now() (std::chrono::system_clock::now().time_since_epoch()).count(); } -static const int64_t HASH_SEED = 0xFACEFEEDDEADBEEFLL; - int64_t MakeHash(const char* buffer, const int64_t len) { return XXH64(buffer, len, HASH_SEED); diff --git a/src/common.h b/src/common.h index b85b562..c46117a 100644 --- a/src/common.h +++ b/src/common.h @@ -5,10 +5,11 @@ #include #include #include +#include -const int32_t PARTITION_MAX = 1024; // hard limit, not operating limit -const int32_t MAX_PROPERTIES = 4096; - +static const int32_t PARTITION_MAX = 1024; // hard limit, not operating limit +static const int32_t MAX_PROPERTIES = 4096; +static const int64_t HASH_SEED = 0xFACEFEEDDEADBEEFLL; /* Because the full names a just do damn long and ugly turning what could usually fit on one line of code into two @@ -68,7 +69,14 @@ namespace std { size_t operator()(const std::pair& v) const { - return static_cast(MakeHash(recast(&v), sizeof(v))); + return static_cast(XXH64( + reinterpret_cast(&v.first), + 4, + XXH64( + reinterpret_cast(&v.second), + 8, + HASH_SEED) + )); } }; @@ -76,9 +84,17 @@ namespace std template <> struct hash> { - size_t operator()(const std::pair& v) const + size_t operator()(const std::pair& v) const { - return static_cast(MakeHash(recast(&v), sizeof(v))); + return static_cast(XXH64( + reinterpret_cast(&v.first), + 8, + XXH64( + reinterpret_cast(&v.second), + 4, + HASH_SEED) + )); + } }; diff --git a/src/customer_index.cpp b/src/customer_index.cpp index c4627e1..ec7e8f7 100644 --- a/src/customer_index.cpp +++ b/src/customer_index.cpp @@ -1,8 +1,9 @@ #include "customer_index.h" openset::db::CustomerIndexList openset::db::CustomerPropIndex::serialize( + bool descending, int limit, const std::function& filterCallback) { - return index.serialize(limit, filterCallback); + return index.serialize(descending, limit, filterCallback); } diff --git a/src/customer_index.h b/src/customer_index.h index 2b7405c..3d3d61d 100644 --- a/src/customer_index.h +++ b/src/customer_index.h @@ -31,7 +31,7 @@ namespace openset CustomerPropIndex() = default; ~CustomerPropIndex() = default; - void insert(int64_t customerId, int linId, int64_t value) + void insert(const int64_t customerId, const int linId, const int64_t value) { index.set(SortKeyOneProp_s{ customerId, value}, linId); } @@ -42,6 +42,7 @@ namespace openset } CustomerIndexList serialize( + bool descending, int limit, const std::function& filterCallback); }; @@ -79,13 +80,14 @@ namespace openset iter->second->erase(customerId, value); } - CustomerIndexList getListAscending( + CustomerIndexList getList( int propIndex, + bool descending, int limit, const std::function& filterCallback) { if (const auto& iter = indexes.find(propIndex); iter != indexes.end()) - return iter->second->serialize(limit, filterCallback); + return iter->second->serialize(descending, limit, filterCallback); return {}; } }; diff --git a/src/indexbits.cpp b/src/indexbits.cpp index 1856dd3..35bdeb2 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -100,23 +100,21 @@ IndexBits::IndexBits() // move constructor IndexBits::IndexBits(IndexBits&& source) noexcept - : data(std::move(source.data)) { + data = std::move(source.data); placeHolder = source.placeHolder; source.placeHolder = false; } // copy constructor IndexBits::IndexBits(const IndexBits& source) - : data(data), - placeHolder(false) + : placeHolder(false) { opCopy(source); } IndexBits::IndexBits(IndexBits* source) - : data(), - placeHolder(false) + : placeHolder(false) { opCopy(*source); } diff --git a/src/indexbits.h b/src/indexbits.h index 86a377c..c976af7 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -10,12 +10,11 @@ namespace openset { namespace db { - const int64_t BitArraySize = 510; + const int64_t BitArraySize = 128; struct IndexPageMemory_s { bool dirty { false }; - bool empty { true }; // 4096 bytes int64_t bitArray[BitArraySize]; }; @@ -52,18 +51,15 @@ namespace openset IndexMemory(IndexMemory&& source) noexcept { - lastIndex = source.lastIndex; + lastIndex = nullptr; indexPages = std::move(source.indexPages); rawPages = std::move(source.rawPages); - - source.indexPages.clear(); - source.rawPages.clear(); } IndexMemory(const IndexMemory& source) { // raw pages are not copied - lastIndex = source.lastIndex; + lastIndex = nullptr; for (auto sourcePage : source.indexPages) { @@ -76,7 +72,7 @@ namespace openset IndexMemory(IndexMemory* source) { // raw pages are not copied - lastIndex = source->lastIndex; + lastIndex = nullptr; for (auto sourcePage : source->indexPages) { @@ -88,7 +84,7 @@ namespace openset IndexMemory& operator=(IndexMemory&& source) noexcept { - lastIndex = source.lastIndex; + lastIndex = nullptr; indexPages = std::move(source.indexPages); rawPages = std::move(source.rawPages); @@ -99,7 +95,7 @@ namespace openset { // raw pages are not copied reset(); - lastIndex = source.lastIndex; + lastIndex = nullptr; for (auto sourcePage : source.indexPages) { @@ -272,7 +268,7 @@ namespace openset class IndexLRU { - using Key = std::pair; + using Key = std::pair; using Value = std::pair::iterator>; std::list items; @@ -284,7 +280,7 @@ namespace openset cacheSize(cacheSize) {} - std::tuple set(int propIndex, int64_t value, IndexBits* bits) + std::tuple set(const int64_t propIndex, const int64_t value, IndexBits* bits) { const Key key(propIndex, value); @@ -293,20 +289,20 @@ namespace openset const Value listMap(bits, items.begin()); keyValuesMap[key] = listMap; - if (keyValuesMap.size() > cacheSize) { + if (keyValuesMap.size() > cacheSize) + { const auto evictedKey = items.back(); - const auto evicted = keyValuesMap[items.back()].first; + const auto evicted = keyValuesMap[items.back()].first; keyValuesMap.erase(items.back()); items.pop_back(); - return {evictedKey.first, evictedKey.second, evicted}; + return { evictedKey.first, evictedKey.second, evicted }; } - return {0,0,0}; + return { 0, 0, nullptr }; } - IndexBits* get(int propIndex, int64_t value) + IndexBits* get(const int64_t propIndex, const int64_t value) { - const Key key(propIndex, value); if (auto iter = keyValuesMap.find(key); iter != keyValuesMap.end()) @@ -318,6 +314,7 @@ namespace openset } return nullptr; } + }; }; }; diff --git a/src/oloop_customer_basic.cpp b/src/oloop_customer_basic.cpp new file mode 100644 index 0000000..2b8018c --- /dev/null +++ b/src/oloop_customer_basic.cpp @@ -0,0 +1,214 @@ +#include "oloop_customer_basic.h" +#include "indexbits.h" +#include "asyncpool.h" +#include "tablepartitioned.h" +#include "internoderouter.h" + +using namespace openset::async; +using namespace openset::query; +using namespace openset::result; + +// yes, we are passing queryMacros by value to get a copy +OpenLoopCustomerBasicList::OpenLoopCustomerBasicList( + ShuttleLambda* shuttle, + Database::TablePtr table, + Macro_s macros, + openset::result::ResultSet* result, + const std::vector &cursor, + const bool descending, + const int limit, + int instance) : + OpenLoop(table->getName(), oloopPriority_e::realtime), + macros(std::move(macros)), + shuttle(shuttle), + table(table), + parts(nullptr), + maxLinearId(0), + currentLinId(-1), + interpreter(nullptr), + instance(instance), + runCount(0), + startTime(0), + population(0), + index(nullptr), + result(result), + cursor(cursor), + descending(descending), + limit(limit) +{} + +OpenLoopCustomerBasicList::~OpenLoopCustomerBasicList() +{ + if (interpreter) + delete interpreter; +} + +void OpenLoopCustomerBasicList::prepare() +{ + parts = table->getPartitionObjects(loop->partition, false); + + if (!parts) + { + suicide(); + return; + } + + maxLinearId = parts->people.customerCount(); + + // generate the index for this query + indexing.mount(table.get(), macros, loop->partition, maxLinearId); + bool countable; + index = indexing.getIndex("_", countable); + population = index->population(maxLinearId); + + interpreter = new Interpreter(macros); + interpreter->setResultObject(result); + + IndexBits testIndex; + + // if we are in segment compare mode: + if (macros.segments.size()) + { + std::vector segments; + + for (const auto& segmentName : macros.segments) + { + if (segmentName == "*"s) + { + auto tBits = new IndexBits(); + tBits->makeBits(maxLinearId, 1); + segments.push_back(tBits); + } + else + { + if (!parts->segments.count(segmentName)) + { + shuttle->reply( + 0, + result::CellQueryResult_s{ + instance, + {}, + openset::errors::Error{ + openset::errors::errorClass_e::run_time, + openset::errors::errorCode_e::item_not_found, + "missing segment '" + segmentName + "'" + } + } + ); + suicide(); + return; + } + + segments.push_back(parts->segments[segmentName].getBits(parts->attributes)); + + } + } + + //interpreter->setCompareSegments(index, segments); + testIndex.opCopy(*index); + testIndex.opAnd(*segments[0]); + } + else + { + testIndex.opCopy(*index); + } + + // map table, partition and select schema properties to the Customer object + auto mappedColumns = interpreter->getReferencedColumns(); + if (!person.mapTable(table.get(), loop->partition, mappedColumns)) + { + partitionRemoved(); + suicide(); + return; + } + + person.setSessionTime(macros.sessionTime); + + const auto filterAscending = [&](int64_t* key, int* value) -> bool { + if (!testIndex.bitState(*value)) + return false; + if (*key > cursor[0]) + return true; + return false; + }; + + const auto filterDescending = [&](int64_t* key, int* value) -> bool { + if (!testIndex.bitState(*value)) + return false; + if (*key < cursor[0]) + return true; + return false; + }; + + if (descending) + indexedList = parts->people.customerMap.serialize( + true, + limit, + filterDescending + ); + else + indexedList = parts->people.customerMap.serialize( + false, + limit, + filterAscending + ); + + iter = indexedList.begin(); + + startTime = Now(); +} + +bool OpenLoopCustomerBasicList::run() +{ + while (true) + { + if (sliceComplete()) + return true; + + // are we done? This will return the index of the + // next set bit until there are no more, or maxLinId is met + if (interpreter->error.inError() || iter == indexedList.end()) + { + result->setAccTypesFromMacros(macros); + + shuttle->reply( + 0, + CellQueryResult_s { + instance, + {}, + interpreter->error, + }); + + parts->attributes.clearDirty(); + + suicide(); + return false; + } + + if (const auto personData = parts->people.getCustomerByLIN(iter->second); personData != nullptr) + { + ++runCount; + person.mount(personData); + person.prepare(); + interpreter->mount(&person); + interpreter->exec(); // run the script on this customer - do some magic + } + + ++iter; + } +} + +void OpenLoopCustomerBasicList::partitionRemoved() +{ + shuttle->reply( + 0, + CellQueryResult_s { + instance, + {}, + openset::errors::Error { + openset::errors::errorClass_e::run_time, + openset::errors::errorCode_e::partition_migrated, + "please retry query" + } + }); +} diff --git a/src/oloop_customer_basic.h b/src/oloop_customer_basic.h new file mode 100644 index 0000000..adc7461 --- /dev/null +++ b/src/oloop_customer_basic.h @@ -0,0 +1,66 @@ +#pragma once +#include "common.h" +#include "database.h" +#include "oloop.h" +#include "shuttle.h" +#include "querycommon.h" +#include "queryindexing.h" +#include "queryinterpreter.h" +#include "result.h" + +namespace openset +{ + namespace db + { + class Table; + class TablePartitioned; + }; + + namespace async + { + class OpenLoopCustomerBasicList : public OpenLoop + { + public: + openset::query::Macro_s macros; + ShuttleLambda* shuttle; + openset::db::Database::TablePtr table; + openset::db::TablePartitioned* parts; + int64_t maxLinearId; + int64_t currentLinId; + Customer person; + openset::query::Interpreter* interpreter; + int instance; + int runCount; + int64_t startTime; + int population; + openset::query::Indexing indexing; + openset::db::IndexBits* index; + openset::result::ResultSet* result; + + std::vector cursor; + bool descending; + int limit; + + using BasicCustomerList = std::vector>; + + BasicCustomerList indexedList; + BasicCustomerList::iterator iter; + + explicit OpenLoopCustomerBasicList( + ShuttleLambda* shuttle, + openset::db::Database::TablePtr table, + openset::query::Macro_s macros, + openset::result::ResultSet* result, + const std::vector& cursor, + const bool descending, + const int limit, + int instance); + + ~OpenLoopCustomerBasicList() final; + + void prepare() final; + bool run() final; + void partitionRemoved() final; + }; + } +} diff --git a/src/oloop_customer_list.cpp b/src/oloop_customer_list.cpp index b4ca022..35f50be 100644 --- a/src/oloop_customer_list.cpp +++ b/src/oloop_customer_list.cpp @@ -16,38 +16,33 @@ OpenLoopCustomerList::OpenLoopCustomerList( openset::result::ResultSet* result, const std::vector &sortOrderProperties, const std::vector &cursor, + const bool descending, const int limit, - int instance) - : OpenLoop(table->getName(), oloopPriority_e::realtime), - // queries are high priority and will preempt other running cells - macros(std::move(macros)), - shuttle(shuttle), - table(table), - parts(nullptr), - maxLinearId(0), - currentLinId(-1), - interpreter(nullptr), - instance(instance), - runCount(0), - startTime(0), - population(0), - index(nullptr), - result(result), - cursor(cursor), - sortOrderProperties(sortOrderProperties), - limit(limit) + int instance) : + OpenLoop(table->getName(), oloopPriority_e::realtime), + macros(std::move(macros)), + shuttle(shuttle), + table(table), + parts(nullptr), + maxLinearId(0), + currentLinId(-1), + interpreter(nullptr), + instance(instance), + runCount(0), + startTime(0), + population(0), + index(nullptr), + result(result), + cursor(cursor), + sortOrderProperties(sortOrderProperties), + descending(descending), + limit(limit) {} OpenLoopCustomerList::~OpenLoopCustomerList() { if (interpreter) - { - // free up any segment bits we may have made - //for (auto bits : interpreter->segmentIndexes) - // delete bits; - delete interpreter; - } } void OpenLoopCustomerList::prepare() @@ -71,6 +66,8 @@ void OpenLoopCustomerList::prepare() interpreter = new Interpreter(macros); interpreter->setResultObject(result); + IndexBits testIndex; + // if we are in segment compare mode: if (macros.segments.size()) { @@ -104,12 +101,18 @@ void OpenLoopCustomerList::prepare() return; } - segments.push_back(parts->segments[segmentName].getBits()); + segments.push_back(parts->segments[segmentName].getBits(parts->attributes)); } } - interpreter->setCompareSegments(index, segments); + //interpreter->setCompareSegments(index, segments); + testIndex.opCopy(*index); + testIndex.opAnd(*segments[0]); + } + else + { + testIndex.opCopy(*index); } // map table, partition and select schema properties to the Customer object @@ -123,9 +126,9 @@ void OpenLoopCustomerList::prepare() person.setSessionTime(macros.sessionTime); - - const auto filterAscending = [&](SortKeyOneProp_s* key, int* value) -> bool { + if (!testIndex.bitState(*value)) + return false; if (key->value == cursor[0] && key->customerId == cursor[1]) return false; if (key->value < cursor[0]) @@ -135,14 +138,34 @@ void OpenLoopCustomerList::prepare() return false; }; + const auto filterDescending = [&](SortKeyOneProp_s* key, int* value) -> bool { + if (!testIndex.bitState(*value)) + return false; + if (key->value == cursor[0] && key->customerId == cursor[1]) + return false; + if (key->value > cursor[0]) + return false; + if (key->value < cursor[0] || key->customerId <= cursor[1]) + return true; + return false; + }; - auto propIndex = parts->table->getProperties()->getProperty("score")->idx; - - indexedList = std::move(parts->attributes.customerIndexing.getListAscending( - propIndex, - limit, - filterAscending - )); + const auto propIndex = macros.vars.columnVars[sortOrderProperties[0]].schemaColumn; + + if (descending) + indexedList = parts->attributes.customerIndexing.getList( + propIndex, + true, + limit, + filterDescending + ); + else + indexedList = parts->attributes.customerIndexing.getList( + propIndex, + false, + limit, + filterAscending + ); iter = indexedList.begin(); diff --git a/src/oloop_customer_list.h b/src/oloop_customer_list.h index 9bbe787..772c9e1 100644 --- a/src/oloop_customer_list.h +++ b/src/oloop_customer_list.h @@ -39,6 +39,7 @@ namespace openset std::vector sortOrderProperties; std::vector cursor; + bool descending; int limit; CustomerIndexList indexedList; @@ -51,6 +52,7 @@ namespace openset openset::result::ResultSet* result, const std::vector& indexProperties, const std::vector& cursor, + const bool descending, const int limit, int instance); diff --git a/src/oloop_histogram.cpp b/src/oloop_histogram.cpp index 2e38e61..12e0645 100644 --- a/src/oloop_histogram.cpp +++ b/src/oloop_histogram.cpp @@ -173,7 +173,7 @@ void OpenLoopHistogram::prepare() return; } - segments.push_back(parts->segments[segmentName].getBits()); + segments.push_back(parts->segments[segmentName].getBits(parts->attributes)); } } diff --git a/src/oloop_insert.cpp b/src/oloop_insert.cpp index ae95e41..e72b05a 100644 --- a/src/oloop_insert.cpp +++ b/src/oloop_insert.cpp @@ -54,7 +54,10 @@ void OpenLoopInsert::OnInsert(const std::string& uuid, SegmentPartitioned_s* seg return; // mount the customer - const auto personData = tablePartitioned->people.createCustomer(uuid); + const auto personData = tablePartitioned->table->numericCustomerIds ? + tablePartitioned->people.createCustomer(stoll(uuid)) : + tablePartitioned->people.createCustomer(uuid); + person.mount(personData); person.prepare(); @@ -65,7 +68,8 @@ void OpenLoopInsert::OnInsert(const std::string& uuid, SegmentPartitioned_s* seg auto returns = segment->interpreter->getLastReturn(); // set bit according to interpreter results - const auto stateChange = segment->setBit(personData->linId, returns.size() && returns[0].getBool() == true); + auto bits = segment->getBits(tablePartitioned->attributes); + const auto stateChange = segment->setBit(bits, personData->linId, returns.size() && returns[0].getBool() == true); if (stateChange != SegmentPartitioned_s::SegmentChange_e::noChange) { tablePartitioned->pushMessage(segment->segmentHash, stateChange, personData->getIdStr()); @@ -164,10 +168,8 @@ bool OpenLoopInsert::run() const auto insertSegments = tablePartitioned->getOnInsertSegments(); for (auto segment : insertSegments) { - // ensure we have bits mounted for this segment - segment->prepare(tablePartitioned->attributes); // get a cached interpreter (or make one) and set the bits - const auto interpreter = segment->getInterpreter(tablePartitioned->people.customerCount()); + const auto interpreter = segment->getInterpreter(tablePartitioned->attributes, tablePartitioned->people.customerCount()); // we can't crunch segment math on refresh, but we can expire it, so it crunches the next time it's used if (interpreter->macros.isSegmentMath) diff --git a/src/oloop_property.cpp b/src/oloop_property.cpp index d338fa0..0914df6 100644 --- a/src/oloop_property.cpp +++ b/src/oloop_property.cpp @@ -50,9 +50,8 @@ void OpenLoopProperty::prepare() { if (segmentName == "*") { - auto bits = new db::IndexBits(); - bits->makeBits(stopBit, 1); // make an index of all ones. - segments.push_back(bits); + all.makeBits(stopBit, 1); // make an index of all ones. + segments.push_back(segmentName); } else { @@ -74,7 +73,7 @@ void OpenLoopProperty::prepare() return; } - segments.push_back(parts->segments[segmentName].getBits()); + segments.push_back(segmentName); } } } @@ -128,7 +127,7 @@ void OpenLoopProperty::prepare() for (auto s : segments) { auto bits = allBits; - bits->opAnd(*s); + bits->opAnd(*parts->getSegmentBits(s)); aggs->columns[idx].value = bits->population(stopBit); ++idx; @@ -242,13 +241,20 @@ bool OpenLoopProperty::run() } auto columnIndex = 0; - for (auto s : segments) + for (auto segmentName : segments) { // here we are setting the key for the bucket, // this is under our root which is the property name rowKey.key[1] = bucket; // value hash (or value) + db::IndexBits* sourceBits; + + if (segmentName == "*") + sourceBits = &all; + else + sourceBits = parts->getSegmentBits(segmentName); + const auto aggs = result->getMakeAccumulator(rowKey); @@ -265,7 +271,7 @@ bool OpenLoopProperty::run() } // remove bits not in the segment - sumBits->opAnd(*s); + sumBits->opAnd(*sourceBits); aggs->columns[columnIndex].value = sumBits->population(stopBit); delete sumBits; diff --git a/src/oloop_property.h b/src/oloop_property.h index f544734..9983a36 100644 --- a/src/oloop_property.h +++ b/src/oloop_property.h @@ -73,10 +73,12 @@ namespace openset db::TablePartitioned* parts; result::ResultSet* result; + db::IndexBits all; + int64_t stopBit{ 0 }; int64_t instance{ 0 }; - std::vector segments; + std::vector segments; // loop locals result::RowKey rowKey; diff --git a/src/oloop_query.cpp b/src/oloop_query.cpp index 737c7ab..2a993b6 100644 --- a/src/oloop_query.cpp +++ b/src/oloop_query.cpp @@ -98,7 +98,7 @@ void OpenLoopQuery::prepare() return; } - segments.push_back(parts->segments[segmentName].getBits()); + segments.push_back(parts->segments[segmentName].getBits(parts->attributes)); } } diff --git a/src/oloop_seg_refresh.cpp b/src/oloop_seg_refresh.cpp index 4b47461..2de86d2 100644 --- a/src/oloop_seg_refresh.cpp +++ b/src/oloop_seg_refresh.cpp @@ -34,14 +34,14 @@ OpenLoopSegmentRefresh::~OpenLoopSegmentRefresh() void OpenLoopSegmentRefresh::storeSegment() const { - const auto delta = bits->population(maxLinearId) - startPopulation; + const auto delta = parts->getSegmentBits(segmentName)->population(maxLinearId) - startPopulation; // update the segment refresh parts->setSegmentRefresh(segmentName, macros.segmentRefresh); parts->setSegmentTTL(segmentName, macros.segmentTTL); if (delta != 0) - Logger::get().info("segment refresh on " + table->getName() + "/" + segmentName + ". (delta " + to_string(delta) + ")"); + Logger::get().info("segment refresh on " + table->getName() + "/" + segmentName ); } void OpenLoopSegmentRefresh::emitSegmentDifferences(openset::db::IndexBits* before, openset::db::IndexBits* after) const @@ -101,7 +101,7 @@ bool OpenLoopSegmentRefresh::nextExpired() index = indexing.getIndex("_", countable); // get bits for this segment - bits = parts->getBits(segmentName); + auto bits = parts->getSegmentBits(segmentName); startPopulation = bits->population(maxLinearId); auto getSegmentCB = parts->getSegmentCallback(); @@ -217,7 +217,7 @@ bool OpenLoopSegmentRefresh::run() // get a fresh pointer to bits on each entry in case they left the LRU maxLinearId = parts->people.customerCount(); segmentName = segmentsIter->first; - interpreter->setBits(parts->getBits(segmentName), maxLinearId); + interpreter->setBits(parts->getSegmentBits(segmentName), maxLinearId); while (true) { @@ -261,7 +261,7 @@ bool OpenLoopSegmentRefresh::run() auto returns = interpreter->getLastReturn(); // any returns, are they true? - const auto stateChange = segmentInfo->setBit(currentLinId, returns.size() && returns[0].getBool() == true); + const auto stateChange = segmentInfo->setBit(interpreter->bits, currentLinId, returns.size() && returns[0].getBool() == true); if (stateChange != SegmentPartitioned_s::SegmentChange_e::noChange) parts->pushMessage(segmentHash, stateChange, personData->getIdStr()); } diff --git a/src/oloop_seg_refresh.h b/src/oloop_seg_refresh.h index 9383244..17a31dd 100644 --- a/src/oloop_seg_refresh.h +++ b/src/oloop_seg_refresh.h @@ -35,7 +35,6 @@ namespace openset openset::query::Indexing indexing; openset::db::IndexBits* index {nullptr}; - openset::db::IndexBits* bits {nullptr}; std::unordered_map::iterator segmentsIter; diff --git a/src/oloop_segment.cpp b/src/oloop_segment.cpp index 0e4a301..d016312 100644 --- a/src/oloop_segment.cpp +++ b/src/oloop_segment.cpp @@ -4,6 +4,7 @@ #include "tablepartitioned.h" #include "queryparserosl.h" #include "internoderouter.h" +#include "queryinterpreter.h" using namespace openset::async; using namespace openset::query; @@ -45,7 +46,7 @@ OpenLoopSegment::~OpenLoopSegment() void OpenLoopSegment::storeResult(std::string& name, int64_t count) const { - const auto nameHash = MakeHash(name); + const auto nameHash = result->addLocalTextAndHash(name); const auto set_cb = [count](openset::result::Accumulator* resultColumns) { @@ -60,7 +61,6 @@ void OpenLoopSegment::storeResult(std::string& name, int64_t count) const rowKey.clear(); rowKey.key[0] = nameHash; rowKey.types[0] = ResultTypes_e::Text; - result->addLocalText(nameHash, name); auto aggs = result->getMakeAccumulator(rowKey); set_cb(aggs); @@ -80,12 +80,6 @@ void OpenLoopSegment::storeSegments() for (auto& macro : macrosList) { const auto &segmentName = macro.first; - - if (macro.second.segmentRefresh != -1) - parts->setSegmentRefresh(segmentName, macro.second.segmentRefresh); - - if (macro.second.segmentTTL != -1) - parts->setSegmentTTL(segmentName, macro.second.segmentTTL); } } @@ -160,7 +154,7 @@ bool OpenLoopSegment::nextMacro() index = indexing.getIndex("_", countable); // get the bits for this segment - auto bits = parts->getBits(segmentName); + auto bits = parts->getSegmentBits(segmentName); beforeBits.opCopy(*bits); // should we return these bits, as a cached copy? @@ -175,6 +169,10 @@ bool OpenLoopSegment::nextMacro() // cached copy not found... carry on! } + // we will refresh now, so we will move the refresh time down + if (macroIter->second.segmentRefresh != -1) + parts->setSegmentRefresh(segmentName, macroIter->second.segmentRefresh); + // is this something we can calculate using purely // indexes? (nifty) if (countable && !macros.isSegmentMath) @@ -185,7 +183,7 @@ bool OpenLoopSegment::nextMacro() bits->opCopy(*index); // add to resultBits upon query completion - storeResult(segmentName, index->population(maxLinearId)); + storeResult(segmentName, bits->population(maxLinearId)); ++macroIter; continue; // try another index @@ -194,6 +192,7 @@ bool OpenLoopSegment::nextMacro() interpreter = parts->getInterpreter(segmentName, maxLinearId); auto getSegmentCB = parts->getSegmentCallback(); interpreter->setGetSegmentCB(getSegmentCB); + interpreter->setBits(bits, maxLinearId); auto mappedColumns = interpreter->getReferencedColumns(); @@ -269,7 +268,7 @@ bool OpenLoopSegment::run() // get a fresh pointer to bits on each entry in case they left the LRU maxLinearId = parts->people.customerCount(); - interpreter->setBits(parts->getBits(segmentName), maxLinearId); + interpreter->setBits(parts->getSegmentBits(segmentName), maxLinearId); while (true) { @@ -355,7 +354,7 @@ bool OpenLoopSegment::run() auto returns = interpreter->getLastReturn(); // any returns, are they true? - const auto stateChange = segmentInfo->setBit(currentLinId, returns.size() && returns[0].getBool() == true); + const auto stateChange = segmentInfo->setBit(interpreter->bits, currentLinId, returns.size() && returns[0].getBool() == true); if (stateChange != SegmentPartitioned_s::SegmentChange_e::noChange) parts->pushMessage(segmentHash, stateChange, personData->getIdStr()); } diff --git a/src/queryindexing.cpp b/src/queryindexing.cpp index 19ab076..1dc845e 100644 --- a/src/queryindexing.cpp +++ b/src/queryindexing.cpp @@ -239,14 +239,11 @@ IndexBits Indexing::buildIndex(HintOpList &index, bool countable) { IndexBits bits; bits.makeBits(maxLinId, 1); - cout << bits.population(maxLinId) << ":" << maxLinId << ":" << stopBit << endl; - countable = false; return bits; } auto res = stack.back().bits; - cout << res.population(stopBit) << ":" << stopBit << endl; res.setSizeByBit(stopBit); return res; } diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index 52f4c2e..a0a0a90 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -28,6 +28,7 @@ openset::query::Interpreter::~Interpreter() void openset::query::Interpreter::setResultObject(result::ResultSet* resultSet) { result = resultSet; + result->addLocalText(NONE, "n/a"); } void openset::query::Interpreter::configure() diff --git a/src/queryparserosl.h b/src/queryparserosl.h index 8d10093..138c909 100644 --- a/src/queryparserosl.h +++ b/src/queryparserosl.h @@ -4259,7 +4259,7 @@ namespace openset::query if (keyVal[0] == "ttl" || keyVal[0] == "refresh") // these are special and allow for time appends like 's' or 'm', or 'd' - flags[keyVal[0]] = expandTime(keyVal[1], lastDebug) * 1000; + flags[keyVal[0]] = expandTime(keyVal[1], lastDebug); else if (keyVal[0] == "use_cached") flags["use_cached"] = (keyVal[1].length() == 0 || keyVal[1][0] == 'T' || keyVal[1][0] == diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index de21fc3..2010772 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -12,6 +12,7 @@ #include "oloop_segment.h" #include "oloop_customer.h" #include "oloop_customer_list.h" +#include "oloop_customer_basic.h" #include "oloop_property.h" #include "oloop_histogram.h" #include "asyncpool.h" @@ -796,9 +797,8 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM std::vector sortOrderProperties; - int customerIdIndex = -1; - // validate that sortKeys are in the select statement + int customerIdIndex = -1; const auto sortKeyParts = split(sortKeyString, ','); for (auto key : sortKeyParts) { @@ -854,11 +854,10 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM if (column.alias == key) { - found = true; sortOrderProperties.push_back(index); - - break; + found = true; } + ++index; } } @@ -900,6 +899,9 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM return; } + // if the sort is by 'id' we will use the 'OpenLoopCustomerBasicList' iterator + auto isBasic = sortKeyParts[0] == "id"; + // add customerId as secondary sort if (sortOrderProperties.size() == 1) sortOrderProperties.push_back(customerIdIndex); @@ -949,18 +951,21 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM if (cursorValues.size() == 0) { - cursorValues = { LLONG_MIN, LLONG_MIN }; + if (sortOrder == ResultSortOrder_e::Desc) + cursorValues = { LLONG_MAX, LLONG_MAX }; + else + cursorValues = { LLONG_MIN, LLONG_MIN }; } - else if (cursorValues.size() != 2) + else if (!isBasic && cursorValues.size() != 2) { - RpcError( - errors::Error { - errors::errorClass_e::query, - errors::errorCode_e::general_error, - "param 'cursor': expecting two numeric values (separated by a comma)" - }, - message); - return; + RpcError( + errors::Error { + errors::errorClass_e::query, + errors::errorCode_e::general_error, + "param 'cursor': expecting two numeric values (separated by a comma)" + }, + message); + return; } if (message->isParam("segments")) @@ -1144,22 +1149,64 @@ void RpcQuery::customer_list(const openset::web::MessagePtr& message, const RpcM auto instance = 0; - // pass factory function (as lambda) to create new cell objects - partitions->cellFactory( - activeList, - [shuttle, table, queryMacros, resultSets, &instance, sortOrderProperties, cursorValues, trimSize](AsyncLoop* loop) -> OpenLoop* - { - instance++; - return new OpenLoopCustomerList( + if (isBasic) + { + // pass factory function (as lambda) to create new cell objects + partitions->cellFactory( + activeList, + [ shuttle, table, queryMacros, - resultSets[loop->getWorkerId()], + resultSets, + &instance, + cursorValues, + sortOrder, + trimSize](AsyncLoop* loop) -> OpenLoop* + { + instance++; + return new OpenLoopCustomerBasicList( + shuttle, + table, + queryMacros, + resultSets[loop->getWorkerId()], + cursorValues, + sortOrder == ResultSortOrder_e::Desc, + trimSize, + instance); + } + ); + } + else + { + // pass factory function (as lambda) to create new cell objects + partitions->cellFactory( + activeList, + [ + shuttle, + table, + queryMacros, + resultSets, + &instance, sortOrderProperties, cursorValues, - trimSize, - instance); - }); + sortOrder, + trimSize](AsyncLoop* loop) -> OpenLoop* + { + instance++; + return new OpenLoopCustomerList( + shuttle, + table, + queryMacros, + resultSets[loop->getWorkerId()], + sortOrderProperties, + cursorValues, + sortOrder == ResultSortOrder_e::Desc, + trimSize, + instance); + } + ); + } } void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping& matches) diff --git a/src/tablepartitioned.cpp b/src/tablepartitioned.cpp index 4e36833..b890db3 100644 --- a/src/tablepartitioned.cpp +++ b/src/tablepartitioned.cpp @@ -14,20 +14,19 @@ SegmentPartitioned_s::~SegmentPartitioned_s() delete interpreter; } -void openset::db::SegmentPartitioned_s::prepare(Attributes& attr) +/*void openset::db::SegmentPartitioned_s::prepare(Attributes& attr) { attributes = &attr; attributes->getMake(PROP_SEGMENT, segmentName); -} +}*/ -openset::db::IndexBits* openset::db::SegmentPartitioned_s::getBits() +openset::db::IndexBits* openset::db::SegmentPartitioned_s::getBits(Attributes& attributes) { - return attributes->getBits(PROP_SEGMENT, MakeHash(segmentName)); + return attributes.getBits(PROP_SEGMENT, MakeHash(segmentName)); } -openset::db::SegmentPartitioned_s::SegmentChange_e openset::db::SegmentPartitioned_s::setBit(int64_t linearId, bool state) +openset::db::SegmentPartitioned_s::SegmentChange_e openset::db::SegmentPartitioned_s::setBit(IndexBits* bits, int64_t linearId, bool state) { - const auto bits = getBits(); const auto currentState = bits->bitState(linearId); if (state && !currentState) { @@ -44,15 +43,11 @@ openset::db::SegmentPartitioned_s::SegmentChange_e openset::db::SegmentPartition return SegmentChange_e::noChange; } -openset::query::Interpreter * openset::db::SegmentPartitioned_s::getInterpreter(int64_t maxLinearId) +openset::query::Interpreter * openset::db::SegmentPartitioned_s::getInterpreter(Attributes& attributes, int64_t maxId) { if (!interpreter) interpreter = new openset::query::Interpreter(macros, openset::query::InterpretMode_e::count); - - const auto bits = getBits(); - - if (!bits) - throw std::runtime_error("call prepare before calling getInterpreter"); + interpreter->setBits(getBits(attributes), maxId); return interpreter; } @@ -107,7 +102,7 @@ openset::query::Interpreter* TablePartitioned::getInterpreter(const std::string& if (!segments.count(segmentName)) return nullptr; - return segments[segmentName].getInterpreter(maxLinearId); + return segments[segmentName].getInterpreter(attributes, people.customerCount()); } void TablePartitioned::checkForSegmentChanges() @@ -195,8 +190,7 @@ std::function TablePartitioned::g if (this->segments.count(segmentName)) { deleteAfterUsing = false; - this->segments[segmentName].prepare(this->attributes); - return this->segments[segmentName].getBits(); + return this->segments[segmentName].getBits(attributes); } // if there are no bits with this name created in this query @@ -208,13 +202,10 @@ std::function TablePartitioned::g } -openset::db::IndexBits* TablePartitioned::getBits(std::string& segmentName) +openset::db::IndexBits* TablePartitioned::getSegmentBits(const std::string& segmentName) { if (this->segments.count(segmentName)) - { - this->segments[segmentName].prepare(attributes); - return this->segments[segmentName].getBits(); - } + return this->segments[segmentName].getBits(attributes); return nullptr; } diff --git a/src/tablepartitioned.h b/src/tablepartitioned.h index 6d89eb1..fe0e0f7 100644 --- a/src/tablepartitioned.h +++ b/src/tablepartitioned.h @@ -46,7 +46,7 @@ namespace openset bool onInsert {false}; query::Interpreter* interpreter { nullptr }; - Attributes* attributes; + //Attributes* attributes; SegmentPartitioned_s( const std::string& segmentName, @@ -74,12 +74,12 @@ namespace openset * * setBit - flips a bit to the desired state and returns the state change that took place */ - void prepare(Attributes& attributes); // mounts bits, if they are not already - IndexBits* getBits(); - SegmentChange_e setBit(int64_t linearId, bool state); // flip bits by persion linear id + //void prepare(Attributes& attributes); // mounts bits, if they are not already + IndexBits* getBits(Attributes& attributes); + SegmentChange_e setBit(IndexBits* bits, int64_t linearId, bool state); // flip bits by persion linear id // returns a new or cached interpreter. Call prepare before calling get Interpreter - query::Interpreter* getInterpreter(int64_t maxLinearId); + query::Interpreter* getInterpreter(Attributes& attributes, int64_t maxId); }; @@ -191,7 +191,7 @@ namespace openset // The Interpreter needs this callback to operate when performing segment math std::function getSegmentCallback(); - openset::db::IndexBits* getBits(std::string& segmentName); + openset::db::IndexBits* getSegmentBits(const std::string& segmentName); void pushMessage(const int64_t segmentHash, const SegmentPartitioned_s::SegmentChange_e state, std::string uuid); diff --git a/table- b/table- new file mode 100644 index 0000000..e69de29 From f94349b67139a9d00b3f400f859bc6ee323e8f3e Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 25 Nov 2019 22:42:31 -0500 Subject: [PATCH 12/31] changed to const reference --- src/customer_props.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/customer_props.cpp b/src/customer_props.cpp index cd95881..263e1ae 100644 --- a/src/customer_props.cpp +++ b/src/customer_props.cpp @@ -1,3 +1,5 @@ +#include + #include "customer_props.h" #include "table.h" #include "properties.h" @@ -329,7 +331,7 @@ void openset::db::CustomerProps::setProp(openset::db::Table* table, int propInde if (propInfo->isSet) listFix(value); - if (auto& iter = props.find(propIndex); iter != props.end()) + if (const auto& iter = props.find(propIndex); iter != props.end()) { if (propInfo->isSet) { From 979fa68149a5a4302fc03dae1f29d5e1ff16cb4e Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 25 Nov 2019 23:02:04 -0500 Subject: [PATCH 13/31] gcc compatiblity fix --- src/grid.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/grid.cpp b/src/grid.cpp index e48dfcd..69d6e4a 100644 --- a/src/grid.cpp +++ b/src/grid.cpp @@ -1004,19 +1004,25 @@ Grid::RowType_e Grid::insertParse(Properties* properties, cjson* doc, Col_s* ins if (!propInfo->isCustomerProperty) continue; + cvar workVar; + switch (c->type()) { case cjson::Types_e::INT: - customerProps.setProp(table, propIndex, cvar(c->getInt())); + workVar = c->getInt(); + customerProps.setProp(table, propIndex, workVar); break; case cjson::Types_e::DBL: - customerProps.setProp(table, propIndex, cvar(c->getDouble())); + workVar = c->getDouble(); + customerProps.setProp(table, propIndex, workVar); break; case cjson::Types_e::STR: - customerProps.setProp(table, propIndex, cvar(c->getString())); + workVar = c->getString(); + customerProps.setProp(table, propIndex, workVar); break; case cjson::Types_e::BOOL: - customerProps.setProp(table, propIndex, cvar(c->getBool())); + workVar = c->getBool(); + customerProps.setProp(table, propIndex, workVar); break; case cjson::Types_e::ARRAY: { From dfa8a7b0e1e760c68c985a806d4f049d0c002ed5 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 25 Nov 2019 23:14:18 -0500 Subject: [PATCH 14/31] gcc compatiblity fix --- src/queryinterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index a0a0a90..432e1f4 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -348,7 +348,7 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum macros.vars.columnVars[varIndex].valueInt64 = macros.vars.columnVars[varIndex].propShortcut != -1 ? macros.vars.userVars[macros.vars.columnVars[varIndex].propShortcut].value.getBool() : - macros.vars.columnVars[varIndex].value = (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getBool(); + (*lambda(macros.vars.columnVars[varIndex].lambdaIndex, 0)).getBool(); break; default: macros.vars.columnVars[varIndex].valueInt64 = 0; From 63e50487ffaaeea23759d0aad0183a98e0ef0ba6 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Tue, 26 Nov 2019 00:07:24 -0500 Subject: [PATCH 15/31] gcc compatiblity fix --- src/result.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/result.cpp b/src/result.cpp index 5b5d6e7..6ef34cd 100644 --- a/src/result.cpp +++ b/src/result.cpp @@ -156,7 +156,7 @@ void ResultSet::setAccTypesFromMacros(const openset::query::Macro_s ¯os) Accumulator* ResultSet::getMakeAccumulator(RowKey& key) { - if (auto& res = results.emplace(key, nullptr); res.second == true) + if (const auto& res = results.emplace(key, nullptr); res.second == true) { const auto t = new(mem.newPtr(resultBytes)) openset::result::Accumulator(resultWidth); res.first->second = t; @@ -166,15 +166,6 @@ Accumulator* ResultSet::getMakeAccumulator(RowKey& key) { return res.first->second; } - - /*if (const auto tempPair = results.find(key); tempPair != results.end()) - return tempPair->second; - - const auto resultBytes = resultWidth * sizeof(Accumulation_s); - const auto t = new(mem.newPtr(resultBytes)) openset::result::Accumulator(resultWidth); - results.emplace(key, t); - - return t;*/ } void mergeResultTypes( From be98d1939a08a9e0624b4b9d81715f9b4cf1e70c Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Wed, 27 Nov 2019 17:05:42 -0500 Subject: [PATCH 16/31] documentation updates --- README.md | 8 ++++ docs/README.md | 15 ++++--- docs/rest/README.md | 95 ++++++++++++++++++++++++++------------------- 3 files changed, 71 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 4c3ce9f..d3c5d7a 100644 --- a/README.md +++ b/README.md @@ -680,6 +680,14 @@ Ultimately DeepMetrix had to say no to Bud, but that failure planted a seed. # Release Notes +### 0.4.5 + +- the `event` query endpoint has been renamed `report`. The new name expresses the purpose of the endpoint better, as events play a role in all queries. +- `id_type` is +- added `customers` query. The customer query returns a list of customer id's and selected `customer properties` or computed values for each customer. The list can be paginated, and sorted on alternate indexes (defined when a table is created). +- faster smaller indexes. The old index caused lots of memory reallocation as indexes grew. An LRU was also added to the indexing system to keep hot indexes in an uncompressed state. +- added lamda functions in select statements. A lambda allows a select parameter to get it's value from a code. This could makes it possible to select the value of a variable or inline aggregation. + ### 0.4.4 - added `id_type` to switch in create table. This is now required and allows you to specify `numeric` or `textual` customer ids. diff --git a/docs/README.md b/docs/README.md index 8a6fd2e..ea30da5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,15 +1,14 @@ # Documentation -  -**topics** +**Help** +* [Quick Overview](https://github.com/opset/openset/tree/master/docs/osl/README.md) +* [Scripting Language (OSL)](https://github.com/opset/openset/blob/master/docs/osl/language_reference.md) +* [API](https://github.com/opset/openset/tree/master/docs/rest/README.md) + +**Nerdier Matters** * [Docker Images](https://github.com/opset/openset/tree/master/docs/docker) (recommended - run anywhere) * [Building and Installing](https://github.com/opset/openset/tree/master/docs/build_install) (build release or debug on windows or linux) -* [OSL query language overview](https://github.com/opset/openset/tree/master/docs/osl/README.md) -* [OSL language reference](https://github.com/opset/openset/blob/master/docs/osl/language_reference.md) -* [REST API](https://github.com/opset/openset/tree/master/docs/rest/README.md) -* [Samples](https://github.com/opset/openset_samples) -* [Clustering](#) (coming soon) -:coffee: These documents are a work in progress. + diff --git a/docs/rest/README.md b/docs/rest/README.md index 4863c43..1a450b1 100644 --- a/docs/rest/README.md +++ b/docs/rest/README.md @@ -1,6 +1,8 @@ -# Cluster +# API -## PUT /v1/cluster/init?partitions={#} +## Cluster + +### PUT /v1/cluster/init?partitions={#} Initializes a cluster (a cluster with just **one** node will still need initializing). @@ -12,7 +14,7 @@ Returns a 200 or 400 status code. > :pushpin:the ideal partition size is the lowest possible number that will fit the size of your cluster in the long run. There is overhead incurred with each partition, but you also want to pick a number that will allow you to grow. Picking a number less than the number of processor cores in your cluster will **not** allow you to reach peak performance. -## PUT /v1/cluster/join?host={host|ip}&port={port} +### PUT /v1/cluster/join?host={host|ip}&port={port} **query_params:** @@ -25,15 +27,15 @@ Returns a 200 or 400 status code. ## Table -## POST /v1/table/{table} (create table) +### POST /v1/table/{table} (create table) Create a table by passing a JSON array of desired table properties and types. -### id_type +#### id_type (required) -The `id_type` key specifies whether this table uses `numeric` or `textual` customer ids. +The `id_type` determines whether this table uses `numeric` or `textual` customer ids. -### properties +#### properties (required) Properties you would like to track are defined as an array under the `properties` key. @@ -44,11 +46,12 @@ A property at minimum requires a name and type. - `is_set` - if provided and `true`, this property will be a collection of values, rather than single value (think product tags i.e. 'red', 'big', 'kitchen') - `is_customer` - If provided and `true` this is property is a special customer property. Customer Properties unlike regular properties are associated with the customer rather than events in their history. Facts about a customer. These might be values like `age` or `country` or created by an ML model. -### event_order + +#### event_order (optional) The `event_order` key allows you to specify insert sort order for event types. For example, if you want a `purchase` event to always precede `purchase_items` events you would specify `"event_order": ['purchase', 'purchase_items']`. This can make it easier to write queries as order is guaranteed on events that have the same timestamp. -### example +#### example ``` { @@ -78,7 +81,7 @@ The `event_order` key allows you to specify insert sort order for event types. F Returns a 200 or 400 status code. -## GET /v1/table/{table} (describe table) +### GET /v1/table/{table} (describe table) Returns JSON describing the table. @@ -136,11 +139,11 @@ Returns JSON describing the table. Returns a 200 or 400 status code. -## PUT /v1/table/{table}/property/{prop_name}?{property definition params} +### PUT /v1/table/{table}/property/{prop_name}?{property definition params} Adds a property to an existing table. -### params +#### params - `prop_name` can be any string consisting of lowercase letters `a-z`, numbers `0-9`, or the `_`. Properties cannot start with number. - `type` can be `text|int|double|bool`. @@ -149,7 +152,7 @@ Adds a property to an existing table. Returns a 200 or 400 status code. -## DELETE /v1/table/{table}/property/{prop_name} +### DELETE /v1/table/{table}/property/{prop_name} Removes a property from the table. @@ -157,7 +160,7 @@ Removes a property from the table. Returns a 200 or 400 status code. -## PUT /v1/subscription/{table}/{segment_name}/{sub_name} +### PUT /v1/subscription/{table}/{segment_name}/{sub_name} To subscribe to segment changes, the segment must already exist. @@ -209,13 +212,13 @@ Example body for web-hook call: } ``` -# DELETE /v1/subscription/{table}/{segment_name}/{sub_name} +### DELETE /v1/subscription/{table}/{segment_name}/{sub_name} Delete a segment subscription. # Queries -## POST /v1/query/{table}/event +### POST /v1/query/{table}/event Analytics are generated by calling the `event` endpoint. @@ -230,16 +233,12 @@ This will perform an event scanning query by executing the provided `OSL` script | `sort=` | `prop_name` | sort by `select` property name or `as name` if specified. specifying `sort=group`, will sort the result set by using grouping names. | | `order=` | `asc/desc` | default is descending order. | | `trim=` | `# limit` | clip long branches at a certain count. Root nodes will still include totals for the entire branch. | -| `str_{var_name}` | `text` | populates variable of the same name in the params block with a string value | -| `int_{var_name}` | `integer` | populates variable of the same name in the params block with a integer value | -| `dbl_{var_name}` | `double` | populates variable of the same name in the params block with a double value | -| `bool_{var_name}` | `true/false` | populates variable of the same name in the params block with a boolean value | **result** 200 or 400 status with JSON data or error. -## POST /v1/query/{table}/segment +### POST /v1/query/{table}/segment This will perform an index counting query by executing the provided `OSL` script in the POST body as `text/plain`. The result will be in JSON and contain results or any errors produced by the query. @@ -255,7 +254,7 @@ A single counts query can contain multiple sections to create multiple segments **post body:** -The post body can include multiple sections. The `@` decorator is used to define sections. The example below is using the sample `high_street` sample data to create two segments named `products_home` and `products_outdoor`. +The post body can include multiple segment definitions. The `@` decorator is used to define code blocks for each segment. The example below is using the sample `high_street` sample data to create two segments named `products_home` and `products_outdoor`. The `params` on the `@segment` definition tell OpenSet to not-recalculate the segment if it's within the TTL, and that it's ok to use a cached version. It also tells OpenSet to refresh this segment about every 300 seconds. @@ -293,7 +292,7 @@ end 200 or 400 status with JSON data or error. -## GET /v1/query/{table}/property/{prop_name} +### GET /v1/query/{table}/property/{prop_name} The property query allows you to query all the values within a named property in a table as well as perform searches and numeric grouping. @@ -318,24 +317,42 @@ The property query allows you to query all the values within a named property in 200 or 400 status with JSON data or error. -## GET /v1/query/{table}/customer +### GET /v1/query/{table}/customer Returns the event sequence for an individual customer. -> :pushpin: If events contain complex data (i.e. sub values), OpenSet will re-condense the data by folding up data permeations generated on insert. The folded row may be grouped differently than the one provided to `/insert` but will be logically identical. +**query parameters:** + +| param | values | note | +| ------ | ------------- | ------------ | +| `id=` | `number/text` | Customer ID | + +**result** + +200 or 400 status with JSON data or error. + +### POST /v1/query/{table}/customers + +Analytics are generated by calling the `event` endpoint. + +This will perform an event scanning query by executing the provided `OSL` script in the POST body as `text/plain`. The result will be in JSON and contain results or any errors produced by the query. **query parameters:** -| param | values | note | -| ------ | -------- | ----------------------------------------------------- | -| `sid=` | `string` | If you are using textual IDs use the `sid=` parameter | -| `id=` | `number` | If you are using numeric IDs use the `id=` parameter | +| param | values | note | +| ------------------ | ------------ | --------------------------------------------------------------------------------------------------------------------------------------- | +| `debug=` | `true/false` | will return the assembly for the query rather than the results | +| `segments=` | `segment` | comma separted segment list. Segment must be created with a `/segment` query (see next section). Default segment is `*` (all customers) | +| `sort=` | `prop_name` | Name of property to sort by. | +| `order=` | `asc/desc` | default is descending order. | +| `trim=` | `# limit` | clip long branches at a certain count. Root nodes will still include totals for the entire branch. | +| `cursor=` | `key,key` | a resume from cursor is provided with each query to allow for pagination. | **result** 200 or 400 status with JSON data or error. -## POST /v1/query/{table}/histogram/{name} +### POST /v1/query/{table}/histogram/{name} This will generate a histogram using`OSL` script in the POST body as `text/plain`. The result will be in JSON and contain results or any errors produced by the query. @@ -376,7 +393,7 @@ return( to_weeks(now - last_stamp) ) 200 or 400 status with JSON data or error. -## POST /v1/query/{table}/batch (experimental) +### POST /v1/query/{table}/batch (experimental) Run multiple segment, property and histogram queries at once, generate a single result. Including `foreach` on histograms. @@ -416,12 +433,12 @@ end ``` -# Internode (internode node chatter) +## Internode (internode node chatter) Don't call these from client code. The `/v1/internode` REST interface is used internally to maintain a proper functioning cluster. -## GET /v1/cluster/is_member +### GET /v1/cluster/is_member This will return a JSON object informing if the node is already part of a cluster @@ -431,23 +448,23 @@ This will return a JSON object informing if the node is already part of a cluste } ``` -## POST /v1/internode/join_to_cluster +### POST /v1/internode/join_to_cluster Joins an empty node to the cluster. This originates with the `/v1/cluster/join` endpoint. `/v1/cluster/join` will issue a `/v1/interndoe/is_cluster_member` and verify the certificate before this endpoint (`/v1/internode/join_to_cluster`) is called. This endpoint transfers information about tables, subscribers, and partition mapping. -## POST /v1/internode/add_node +### POST /v1/internode/add_node Dispatched to all nodes by `/v1/cluster/join` to inform all nodes in the cluster that a new node has been joined to the cluster. Nodes receiving `add_node` will adjust their node mapping. At this point the node will be empty. The `sentinel` for the elected node will start balancing to this node shortly after this dispatch. -## POST /v1/internode/map_change +### POST /v1/internode/map_change Dispatched by `sentinel` when node mapping and membership have changed. This is the basic mechanism that keeps cluster topology in sync. -## PUT /v1/internode/transfer?partition={partition_id}&node={dest_node_name} +### PUT /v1/internode/transfer?partition={partition_id}&node={dest_node_name} This initiates a partition transfer. The node containing the partition to transfer is contacted directly. It is provided the `partition_id` to transfer and the `dest_node_name` to send it to. @@ -455,13 +472,13 @@ This will result in potentially several transfers, one for each table using `POS After a successful transfer the `sentinel` will send a `POST /v1/internode/map_change` request to tell the cluster that the partition is available. -## POST /v1/internode/transfer?partition={partition_id}&table={table_name} +### POST /v1/internode/transfer?partition={partition_id}&table={table_name} Transfers packed `binary` data for partition. Partition is `partition_id` is passed in URL as an integer. # Other -## GET /ping +### GET /ping If the node is runing, this will respond with 200 OK and JSON: From beceea4aa3a1bdc3a27611ce28c2d1478ca98df2 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 28 Nov 2019 23:14:26 -0500 Subject: [PATCH 17/31] small block alloctor fix, always_fresh flag, index def fix --- CMakeLists.txt | 3 +- lib/mem/prequeues.cpp | 1 - lib/mem/prequeues.h | 77 ----------------- lib/mem/segmented_list.h | 64 ++++++++++++++ lib/sba/sba.cpp | 45 ++++------ lib/sba/sba.h | 23 ++--- src/asyncpool.cpp | 2 +- src/customer_props.cpp | 178 ++++++++++++++++++++++++-------------- src/customer_props.h | 4 + src/customers.cpp | 29 ++++--- src/customers.h | 3 +- src/oloop_insert.cpp | 6 +- src/oloop_seg_refresh.cpp | 10 ++- src/oloop_segment.cpp | 9 +- src/querycommon.h | 1 + src/rpc_query.cpp | 10 +++ src/tablepartitioned.cpp | 6 +- src/tablepartitioned.h | 5 +- src/ver.h | 2 +- 19 files changed, 260 insertions(+), 218 deletions(-) delete mode 100644 lib/mem/prequeues.cpp delete mode 100644 lib/mem/prequeues.h create mode 100644 lib/mem/segmented_list.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 566d3d1..beeaaf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,10 +83,9 @@ set(SOURCE_FILES lib/include/libcommon.h lib/mem/bloom.cpp lib/mem/bloom.h - lib/mem/prequeues.cpp - lib/mem/prequeues.h lib/mem/ssdict.h lib/mem/blhash.h + lib/mem/segmented_list.h lib/str/strtools.cpp lib/str/strtools.h lib/threads/spinlock.h diff --git a/lib/mem/prequeues.cpp b/lib/mem/prequeues.cpp deleted file mode 100644 index bf12baa..0000000 --- a/lib/mem/prequeues.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "prequeues.h" diff --git a/lib/mem/prequeues.h b/lib/mem/prequeues.h deleted file mode 100644 index f18ef8d..0000000 --- a/lib/mem/prequeues.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef RARELOGIC_CPPLIB_MEM_PREQUEUES_H_ -#define RARELOGIC_CPPLIB_MEM_PREQUEUES_H_ - -#include "../threads/locks.h" -#include - -/* - -Type T below may be an object or structure and must have "Clear" member -as well as a static "New" that returns a type T* - - -*/ - -template -class prequeue -{ -private: - - CriticalSection _CS; - - int32_t _Max; - - std::deque _LIFO; - -public: - - prequeue() - : _CS() - { - _Max = 10000; - } - - ~prequeue() - { } - - T* CheckOut() - { - _CS.lock(); - - T* Return; - - if (_LIFO.size() == 0) - { - Return = T::New(); - } - else - { - Return = _LIFO.back(); - _LIFO.pop_back(); - } - - _CS.unlock(); - - return Return; - } - - void CheckIn(T* ObjectPtr) - { - _CS.lock(); - - if (_LIFO.size() > _Max) - { - ObjectPtr->Clear(); - delete ObjectPtr; - } - else - { - ObjectPtr->Clear(); - _LIFO.push_back(ObjectPtr); - } - - _CS.unlock(); - } -}; - -#endif // RARELOGIC_CPPLIB_MEM_PREQUEUES_H_ diff --git a/lib/mem/segmented_list.h b/lib/mem/segmented_list.h new file mode 100644 index 0000000..e9e0a58 --- /dev/null +++ b/lib/mem/segmented_list.h @@ -0,0 +1,64 @@ +#pragma once +#include +#include "../sba/sba.h" + +template +class SegmentedList +{ + struct PageStruct_s + { + tEntry values[elements - 1]; + }; + + int64_t elementsPerPage {elements - 1}; + + using Pages = std::vector; + + Pages pages; + int64_t listSize{0}; +public: + SegmentedList() = default; + ~SegmentedList() + { + for (auto page: pages) + PoolMem::getPool().freePtr(page); + + pages.clear(); + } + + tEntry& at(int64_t index) + { + if (index < 0 || index > listSize) + throw std::runtime_error("segmented_list index out of range"); + return pages.at(index / elementsPerPage)->values[index % elementsPerPage]; + } + + void push_back(tEntry entry) + { + if (listSize / elementsPerPage == pages.size()) + pages.push_back(reinterpret_cast(PoolMem::getPool().getPtr(sizeof(PageStruct_s)))); + pages.at(listSize / elementsPerPage)->values[listSize % elementsPerPage] = entry; + ++listSize; + } + + int64_t size() const + { + return listSize; + } + +private: + PageStruct_s* getPage(int64_t index) + { + index /= elementsPerPage; + + while (index >= static_cast(pages.size())) + { + const auto page = reinterpret_cast(PoolMem::getPool().getPtr(sizeof(PageStruct_s))); + pages.push_back(page); + } + + return pages.at(index); + } + + +}; \ No newline at end of file diff --git a/lib/sba/sba.cpp b/lib/sba/sba.cpp index 631017d..24b637d 100644 --- a/lib/sba/sba.cpp +++ b/lib/sba/sba.cpp @@ -14,37 +14,22 @@ PoolMem::PoolMem() } // build the reverse lookup - once - auto bits = 0; - while (true) - { - const auto size = pow(bits, 2); - auto bucket = -1; - for (auto &b : breakPoints) - if (b.maxSize >= size) - { - bucket = b.index; - break; - } - bucketLookup.push_back(bucket == 0 ? 1 : bucket); - ++bits; - - if (size >= breakPoints.back().maxSize) - break; - } + for (auto &b : breakPoints) + bucketLookup.push_back(b.maxSize); } -void* PoolMem::getPtr(int64_t size) -{ +void* PoolMem::getPtr(const int64_t size) +{ // give us the starting bucket for iteration - int64_t bucket = std::sqrt(size); + int64_t bucket = 0; - // will iterate through bucekts of matching sqrt until one fits or we hit the end. - // this will iteratate once or twice - while (bucket < bucketLookup.size() && size > breakPoints[bucketLookup[bucket]].maxSize) + // will iterate through buckets of matching sqrt until one fits or we hit the end. + // this will iterate once or twice + while (bucket < 33 && size > bucketLookup[bucket]) ++bucket; // bucket index beyond lookup, so this is a non-pooled allocation - if (bucket >= bucketLookup.size()) + if (bucket >= 33) { // this is a big allocation (outside our bucket sizes), so grab it from heap const auto alloc = reinterpret_cast(new char[size + MemConstants::PoolMemHeaderSize]); @@ -53,7 +38,7 @@ void* PoolMem::getPtr(int64_t size) } // figure out which bucket size (if any) this allocation will fit - auto &mem = breakPoints[bucketLookup[bucket]]; + auto &mem = breakPoints[bucket]; csLock lock(mem.memLock); @@ -65,8 +50,8 @@ void* PoolMem::getPtr(int64_t size) return alloc->data; } - //reinterpret_cast(mem.heap.newPtr(mem.maxSize + MemConstants::PoolMemHeaderSize)); - const auto alloc = reinterpret_cast(new char[mem.maxSize + MemConstants::PoolMemHeaderSize]); + const auto alloc = reinterpret_cast(mem.heap->newPtr(mem.maxSize + MemConstants::PoolMemHeaderSize)); + //const auto alloc = reinterpret_cast(new char[mem.maxSize + MemConstants::PoolMemHeaderSize]); alloc->poolIndex = mem.index; return alloc->data; } @@ -75,7 +60,7 @@ void PoolMem::freePtr(void* ptr) { const auto alloc = reinterpret_cast(static_cast(ptr) - MemConstants::PoolMemHeaderSize); - if (alloc->poolIndex == -2) // already freed + if (alloc->poolIndex == -2) // already freed return; // nice place for a breakpoint in debug // -1 means this was non-pooled so just delete it @@ -88,11 +73,12 @@ void PoolMem::freePtr(void* ptr) auto& mem = breakPoints[alloc->poolIndex]; csLock lock(mem.memLock); - + alloc->poolIndex = -2; mem.freed.push_back(alloc); // if a pool gets to large, trim it back + /* if (mem.freed.size() > MemConstants::CullSize) { const auto cullTo = MemConstants::CullSize / 5; @@ -102,5 +88,6 @@ void PoolMem::freePtr(void* ptr) mem.freed.pop_back(); } } + */ } diff --git a/lib/sba/sba.h b/lib/sba/sba.h index 41d4c5f..5fb53a1 100644 --- a/lib/sba/sba.h +++ b/lib/sba/sba.h @@ -2,6 +2,7 @@ #include #include #include "threads/locks.h" +#include "../heapstack/heapstack.h" namespace MemConstants { @@ -30,9 +31,11 @@ class PoolMem int32_t index{ 0 }; const int64_t maxSize; std::vector freed; + HeapStack* heap; memory_s(const int64_t maxSize) : - maxSize(maxSize) + maxSize(maxSize), + heap(new HeapStack()) {} }; @@ -70,22 +73,6 @@ class PoolMem { 12544 }, { 14400 }, { 16384 }, -/* { 18496 }, - { 20736 }, - { 23104 }, - { 25600 }, - { 28224 }, - { 30976 }, - { 33856 }, - { 36864 }, - { 40000 }, - { 43264 }, - { 46656 }, - { 50176 }, - { 53824 }, - { 57600 }, - { 61504 }, - { 65536 }, */ }; std::vector bucketLookup; @@ -102,7 +89,7 @@ class PoolMem return pool; } - void* getPtr(int64_t size); + void* getPtr(const int64_t size); void freePtr(void* ptr); }; diff --git a/src/asyncpool.cpp b/src/asyncpool.cpp index 95f4129..245fd01 100644 --- a/src/asyncpool.cpp +++ b/src/asyncpool.cpp @@ -84,7 +84,7 @@ void AsyncPool::resumeAsync() if (globalAsyncLockDepth == 0) globalAsyncInitSuspend = false; - while (globalAsyncSuspendedWorkerCount != 0) + while (globalAsyncLockDepth == 0 && globalAsyncSuspendedWorkerCount != 0) this_thread::sleep_for(chrono::milliseconds(1)); } diff --git a/src/customer_props.cpp b/src/customer_props.cpp index 263e1ae..5182307 100644 --- a/src/customer_props.cpp +++ b/src/customer_props.cpp @@ -1,10 +1,77 @@ #include +#include #include "customer_props.h" #include "table.h" #include "properties.h" #include "dbtypes.h" +enum PackingSize_e : int8_t +{ + bits8 = 0, + bits16 = 1, + bits32 = 2, + bits64 = 3 +}; + +void openset::db::CustomerProps::encodeValue(const int64_t value) +{ + if (value >= SCHAR_MIN && value <= SCHAR_MAX) + { + *mem.newInt8() = static_cast(PackingSize_e::bits8); + *mem.newInt8() = value; + return; + } + + if (value >= SHRT_MIN && value <= SHRT_MAX) + { + *mem.newInt8() = static_cast(PackingSize_e::bits16); + *mem.newInt16() = value; + return; + } + + if (value >= LONG_MIN && value <= LONG_MAX) + { + *mem.newInt8() = static_cast(PackingSize_e::bits32); + *mem.newInt32() = value; + return; + } + + *mem.newInt8() = static_cast(PackingSize_e::bits64); + *mem.newInt64() = value; +} + +int64_t openset::db::CustomerProps::decodeValue(char*& data) +{ + const auto size = *reinterpret_cast(data); + ++data; + + int64_t value; + + switch (size) + { + case bits8: + value = *reinterpret_cast(data); + data += sizeof(int8_t); + break; + case bits16: + value = *reinterpret_cast(data); + data += sizeof(int16_t); + break; + case bits32: + value = *reinterpret_cast(data); + data += sizeof(int32_t); + break; + case bits64: + default: + value = *reinterpret_cast(data); + data += sizeof(int64_t); + break; + } + + return value; +} + void openset::db::CustomerProps::reset() { mem.reset(); @@ -23,7 +90,7 @@ char* openset::db::CustomerProps::encodeCustomerProps(openset::db::Table* table) auto tableProps = table->getProperties(); - const auto count = mem.newInt32(); + const auto count = mem.newInt16(); *count = 0; for (auto& prop : props) @@ -64,12 +131,10 @@ char* openset::db::CustomerProps::encodeCustomerProps(openset::db::Table* table) } // store column index - *mem.newInt32() = static_cast(info->idx); - // store column type - *mem.newInt32() = static_cast(info->type); + *mem.newInt16() = static_cast(info->idx); // placeholder size - const auto size = mem.newInt32(); + //const auto size = mem.newInt32(); const auto startOffset = mem.getBytes(); @@ -79,50 +144,50 @@ char* openset::db::CustomerProps::encodeCustomerProps(openset::db::Table* table) if (info->isSet) { // store number of elements - *mem.newInt32() = prop.second.len(); + *mem.newInt16() = prop.second.len(); for (auto& item : *var.getSet()) - *mem.newInt64() = item.getInt64(); + encodeValue(item.getInt64()); } else { - *mem.newInt64() = var.getInt64(); // copy the union in cvar + encodeValue(var.getInt64()); // copy the union in cvar } break; case openset::db::PropertyTypes_e::doubleProp: if (info->isSet) { // store number of elements - *mem.newInt32() = prop.second.len(); + *mem.newInt16() = prop.second.len(); for (auto& item : *var.getSet()) - *mem.newInt64() = round(item.getDouble() * 10000); + encodeValue(round(item.getDouble() * 10000)); } else { - *mem.newInt64() = round(var.getDouble() * 10000); // copy the union in cvar + encodeValue(round(var.getDouble() * 10000)); // copy the union in cvar } break; case openset::db::PropertyTypes_e::boolProp: if (info->isSet) { // store number of elements - *mem.newInt32() = prop.second.len(); + *mem.newInt16() = prop.second.len(); for (auto& item : *var.getSet()) - *mem.newInt64() = item.getBool() ? 1 : 0; + encodeValue(item.getBool() ? 1 : 0); } else { - *mem.newInt64() = var.getBool() ? 1 : 0; // copy the union in cvar + encodeValue(var.getBool() ? 1 : 0); // copy the union in cvar } break; case openset::db::PropertyTypes_e::textProp: if (info->isSet) { // store number of elements - *mem.newInt32() = prop.second.len(); + *mem.newInt16() = prop.second.len(); for (auto& item : *var.getSet()) { const auto text = item.getString(); - *mem.newInt32() = text.length(); + *mem.newInt16() = text.length(); const auto buffer = mem.newPtr(text.length()); memcpy(buffer, text.c_str(), text.length()); } @@ -130,7 +195,7 @@ char* openset::db::CustomerProps::encodeCustomerProps(openset::db::Table* table) else { const auto text = var.getString(); - *mem.newInt32() = text.length(); + *mem.newInt16() = text.length(); const auto buffer = mem.newPtr(text.length()); memcpy(buffer, text.c_str(), text.length()); } @@ -138,7 +203,7 @@ char* openset::db::CustomerProps::encodeCustomerProps(openset::db::Table* table) } // update size of data - *size = mem.getBytes() - startOffset; + //ize = mem.getBytes() - startOffset; ++(*count); } @@ -154,111 +219,96 @@ void openset::db::CustomerProps::decodeCustomerProps(openset::db::Table* table, return; auto tableProps = table->getProperties(); - const auto count = static_cast(*data); - data += sizeof(int32_t); + const auto count = static_cast(*data); + data += sizeof(int16_t); for (auto i = 0; i < count; ++i) { - const auto propIndex = *reinterpret_cast(data); - data += sizeof(int32_t); - const auto propType = *reinterpret_cast(data); - data += sizeof(int32_t); - const auto recordSize = *reinterpret_cast(data); - data += sizeof(int32_t); + const auto propIndex = *reinterpret_cast(data); + data += sizeof(int16_t); - const auto info = tableProps->getProperty(propIndex); + //const auto prop16 = *reinterpret_cast(data); - // skip if something has changed (dropped or redefined column?) - if (!info->isCustomerProperty || info->type != propType) - { - data += recordSize; - continue; - } + //const auto propType = static_cast(prop16); + //data += sizeof(int32_t); + + //const auto recordSize = *reinterpret_cast(data); + //data += sizeof(int32_t); + + const auto info = tableProps->getProperty(propIndex); - switch (propType) + switch (info->type) { case openset::db::PropertyTypes_e::intProp: if (info->isSet) { - const auto elements = *reinterpret_cast(data); - data += sizeof(int32_t); + const auto elements = *reinterpret_cast(data); + data += sizeof(int16_t); cvar set; set.set(); for (auto e = 0; e < elements; ++e) - { - set += *reinterpret_cast(data); - data += sizeof(int64_t); - } + set += decodeValue(data); props[propIndex] = std::move(set); } else { - props[propIndex] = *reinterpret_cast(data); - data += sizeof(int64_t); + props[propIndex] = decodeValue(data); } break; case openset::db::PropertyTypes_e::doubleProp: if (info->isSet) { - const auto elements = *reinterpret_cast(data); - data += sizeof(int32_t); + const auto elements = *reinterpret_cast(data); + data += sizeof(int16_t); cvar set; set.set(); for (auto e = 0; e < elements; ++e) - { - set += (static_cast(*reinterpret_cast(data)) / 10000.0); - data += sizeof(int64_t); - } + set += (static_cast(decodeValue(data)) / 10000.0); props[propIndex] = std::move(set); } else { - props[propIndex] = (static_cast(*reinterpret_cast(data)) / 10000.0); - data += sizeof(int64_t); + props[propIndex] = (static_cast(decodeValue(data)) / 10000.0); } break; case openset::db::PropertyTypes_e::boolProp: if (info->isSet) { - const auto elements = *reinterpret_cast(data); - data += sizeof(int32_t); + const auto elements = *reinterpret_cast(data); + data += sizeof(int16_t); cvar set; set.set(); for (auto e = 0; e < elements; ++e) - { - set += *reinterpret_cast(data) ? true : false; - data += sizeof(int64_t); - } + set += decodeValue(data) ? true : false; props[propIndex] = std::move(set); } else { - props[propIndex] = *reinterpret_cast(data) ? true : false; - data += sizeof(int64_t); + props[propIndex] = decodeValue(data) ? true : false; } break; case openset::db::PropertyTypes_e::textProp: if (info->isSet) { - const auto elements = *reinterpret_cast(data); - data += sizeof(int32_t); + const auto elements = *reinterpret_cast(data); + data += sizeof(int16_t); cvar set; set.set(); for (auto e = 0; e < elements; ++e) { - const auto textLength = *reinterpret_cast(data); - data += sizeof(int32_t); + const auto textLength = *reinterpret_cast(data); + data += sizeof(int16_t); set += std::string(data, textLength); data += textLength; } @@ -267,8 +317,8 @@ void openset::db::CustomerProps::decodeCustomerProps(openset::db::Table* table, } else { - const auto textLength = *reinterpret_cast(data); - data += sizeof(int32_t); + const auto textLength = *reinterpret_cast(data); + data += sizeof(int16_t); props[propIndex] = std::string(data, textLength); data += textLength; } diff --git a/src/customer_props.h b/src/customer_props.h index a14a600..194f17c 100644 --- a/src/customer_props.h +++ b/src/customer_props.h @@ -30,6 +30,9 @@ namespace openset void reset(); + void encodeValue(int64_t value); + static int64_t decodeValue(char*& data); + char* encodeCustomerProps(openset::db::Table* table); void decodeCustomerProps(openset::db::Table* table, char* data); @@ -55,5 +58,6 @@ namespace openset CustomerPropMap* getCustomerProps(); }; + }; }; \ No newline at end of file diff --git a/src/customers.cpp b/src/customers.cpp index 6a8da62..0f66e89 100644 --- a/src/customers.cpp +++ b/src/customers.cpp @@ -11,8 +11,12 @@ Customers::Customers(const int partition) : Customers::~Customers() { - for (const auto &person: customerLinear) - PoolMem::getPool().freePtr(person); + for (auto i = 0; i < customerLinear.size(); ++i) + { + PoolMem::getPool().freePtr(customerLinear.at(i)); + } + //for (const auto &person: customerLinear) + //PoolMem::getPool().freePtr(person); } PersonData_s* Customers::getCustomerByID(int64_t userId) @@ -53,7 +57,7 @@ PersonData_s* Customers::getCustomerByLIN(const int64_t linId) if (linId < 0 || linId >= customerLinear.size()) return nullptr; - return customerLinear[linId]; + return customerLinear.at(linId); } PersonData_s* Customers::createCustomer(int64_t userId) @@ -73,7 +77,7 @@ PersonData_s* Customers::createCustomer(int64_t userId) newUser->props = nullptr; customerMap.set(userId, newUser->linId); - customerLinear.emplace_back(newUser); + customerLinear.push_back(newUser); return newUser; /* @@ -127,7 +131,7 @@ PersonData_s* Customers::createCustomer(string userIdString) newUser->setIdStr(userIdString); customerMap.set(hashId, newUser->linId); - customerLinear.emplace_back(newUser); + customerLinear.push_back(newUser); return newUser; } @@ -143,8 +147,8 @@ PersonData_s* Customers::createCustomer(string userIdString) void Customers::replaceCustomerRecord(PersonData_s* newRecord) { - if (newRecord && customerLinear[newRecord->linId] != newRecord) - customerLinear[newRecord->linId] = newRecord; + if (newRecord && customerLinear.at(newRecord->linId) != newRecord) + customerLinear.at(newRecord->linId) = newRecord; } int64_t Customers::customerCount() const @@ -159,9 +163,9 @@ void Customers::drop(const int64_t userId) if (!info) return; + // TODO - fix //customerMap.erase(userId); - - customerLinear[info->linId] = nullptr; + //customerLinear.at(info->linId) = nullptr; PoolMem::getPool().freePtr(info); } @@ -169,7 +173,8 @@ void Customers::drop(const int64_t userId) void Customers::serialize(HeapStack* mem) { // grab 8 bytes, and set the block type at that address - *recast(mem->newPtr(sizeof(int64_t))) = serializedBlockType_e::people; + /* + recast(mem->newPtr(sizeof(int64_t))) = serializedBlockType_e::people; // grab 8 more bytes, this will be the length of the attributes data within the block const auto sectionLength = recast(mem->newPtr(sizeof(int64_t))); @@ -186,10 +191,12 @@ void Customers::serialize(HeapStack* mem) memcpy(serializedPerson, person, size); *sectionLength += size; } + */ } int64_t Customers::deserialize(char* mem) { + /* auto read = mem; if (*recast(read) != serializedBlockType_e::people) @@ -234,4 +241,6 @@ int64_t Customers::deserialize(char* mem) } return sectionLength + 16; + */ + return 0; } diff --git a/src/customers.h b/src/customers.h index 04d38da..5932f9b 100644 --- a/src/customers.h +++ b/src/customers.h @@ -7,6 +7,7 @@ #include "grid.h" #include +#include "mem/segmented_list.h" using namespace std; @@ -20,7 +21,7 @@ namespace openset { public: BinaryListHash customerMap; - vector customerLinear; + SegmentedList customerLinear; int partition; explicit Customers(int partition); diff --git a/src/oloop_insert.cpp b/src/oloop_insert.cpp index e72b05a..4dbdd2b 100644 --- a/src/oloop_insert.cpp +++ b/src/oloop_insert.cpp @@ -41,7 +41,7 @@ void OpenLoopInsert::prepare() return; } - tablePartitioned->checkForSegmentChanges(); + tablePartitioned->syncPartitionSegmentsWithTableSegments(); } void OpenLoopInsert::OnInsert(const std::string& uuid, SegmentPartitioned_s* segment) @@ -68,7 +68,7 @@ void OpenLoopInsert::OnInsert(const std::string& uuid, SegmentPartitioned_s* seg auto returns = segment->interpreter->getLastReturn(); // set bit according to interpreter results - auto bits = segment->getBits(tablePartitioned->attributes); + const auto bits = segment->getBits(tablePartitioned->attributes); const auto stateChange = segment->setBit(bits, personData->linId, returns.size() && returns[0].getBool() == true); if (stateChange != SegmentPartitioned_s::SegmentChange_e::noChange) { @@ -81,7 +81,7 @@ bool OpenLoopInsert::run() const auto mapInfo = globals::mapper->partitionMap.getState(tablePartitioned->partition, globals::running->nodeId); // check partition segment data against master and update if necessary - tablePartitioned->checkForSegmentChanges(); + tablePartitioned->syncPartitionSegmentsWithTableSegments(); if (mapInfo != openset::mapping::NodeState_e::active_owner && mapInfo != openset::mapping::NodeState_e::active_clone) diff --git a/src/oloop_seg_refresh.cpp b/src/oloop_seg_refresh.cpp index 2de86d2..f5327b6 100644 --- a/src/oloop_seg_refresh.cpp +++ b/src/oloop_seg_refresh.cpp @@ -93,7 +93,13 @@ bool OpenLoopSegmentRefresh::nextExpired() macros = segmentsIter->second.macros; segmentInfo = &parts->segments[segmentName]; - //cout << "segment refresh: " << segmentName << endl; + if (macros.alwaysFresh) + { + parts->setSegmentRefresh(segmentName, macros.segmentRefresh); + parts->setSegmentTTL(segmentName, macros.segmentTTL); + ++segmentsIter; + continue; + } // generate the index for this query indexing.mount(table.get(), macros, loop->partition, maxLinearId); @@ -184,7 +190,7 @@ void OpenLoopSegmentRefresh::prepare() return; } - parts->checkForSegmentChanges(); + parts->syncPartitionSegmentsWithTableSegments(); ++parts->segmentUsageCount; segmentsIter = parts->segments.begin(); diff --git a/src/oloop_segment.cpp b/src/oloop_segment.cpp index d016312..28a3ea5 100644 --- a/src/oloop_segment.cpp +++ b/src/oloop_segment.cpp @@ -62,7 +62,7 @@ void OpenLoopSegment::storeResult(std::string& name, int64_t count) const rowKey.key[0] = nameHash; rowKey.types[0] = ResultTypes_e::Text; - auto aggs = result->getMakeAccumulator(rowKey); + const auto aggs = result->getMakeAccumulator(rowKey); set_cb(aggs); } @@ -134,7 +134,6 @@ bool OpenLoopSegment::nextMacro() ); parts->attributes.clearDirty(); - suicide(); return false; @@ -158,13 +157,13 @@ bool OpenLoopSegment::nextMacro() beforeBits.opCopy(*bits); // should we return these bits, as a cached copy? - if (macros.useCached && !parts->isRefreshDue(segmentName)) + if (macros.useCached && !macros.alwaysFresh && !parts->isRefreshDue(segmentName)) { if (bits) { storeResult(segmentName, bits->population(maxLinearId)); ++macroIter; - continue; // try another index + continue; // done, move to next index } // cached copy not found... carry on! } @@ -250,7 +249,7 @@ void OpenLoopSegment::prepare() return; } - parts->checkForSegmentChanges(); + parts->syncPartitionSegmentsWithTableSegments(); ++parts->segmentUsageCount; maxLinearId = parts->people.customerCount(); diff --git a/src/querycommon.h b/src/querycommon.h index 99ede12..b958080 100644 --- a/src/querycommon.h +++ b/src/querycommon.h @@ -928,6 +928,7 @@ namespace openset bool writesProps { true }; // script can change props bool useGlobals { false }; // uses global for table bool useCached { false }; // for segments allow use of cached values within TTL + bool alwaysFresh { false }; // cached, but always calculated fresh on query bool isSegmentMath { false }; // for segments, the index has the value, script execution not required bool useSessions { false }; // uses session functions, we can cache these bool useStampedRowIds { false }; // count using row stamp rather than row uniqueness diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index 2010772..c248b98 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -1289,10 +1289,20 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping table->setSegmentTtl(r.sectionName, r.flags["ttl"]); } + const auto alwaysFresh = r.flags.contains("always_fresh") ? r.flags["use_cached"].getBool() : false; + + // item is cached for subsequent queries, but generates a fresh copy when queried + if (alwaysFresh) + { + r.flags["use_cached"] = true; + r.flags["refresh"] = 86400000; + } + const auto zIndex = r.flags.contains("z_index") ? r.flags["z_index"].getInt32() : 100; const auto onInsert = r.flags.contains("on_insert") ? r.flags["on_insert"].getBool() : false; const auto useCached = r.flags.contains("use_cached") ? r.flags["use_cached"].getBool() : false; + queryMacros.alwaysFresh = alwaysFresh; queryMacros.useCached = useCached; queryMacros.isSegment = true; diff --git a/src/tablepartitioned.cpp b/src/tablepartitioned.cpp index b890db3..740cbf7 100644 --- a/src/tablepartitioned.cpp +++ b/src/tablepartitioned.cpp @@ -105,7 +105,7 @@ openset::query::Interpreter* TablePartitioned::getInterpreter(const std::string& return segments[segmentName].getInterpreter(attributes, people.customerCount()); } -void TablePartitioned::checkForSegmentChanges() +void TablePartitioned::syncPartitionSegmentsWithTableSegments() { // if segment calculations are taking place in an open-loop // we will not change or invalidate any segment records @@ -169,7 +169,11 @@ void TablePartitioned::checkForSegmentChanges() // delete any segments in the cleanup list for (auto &segName : orphanedSegments) + { segments.erase(segName); + segmentRefresh.erase(segName); + segmentTTL.erase(segName); + } std::sort( onInsertList.begin(), diff --git a/src/tablepartitioned.h b/src/tablepartitioned.h index fe0e0f7..71920d9 100644 --- a/src/tablepartitioned.h +++ b/src/tablepartitioned.h @@ -93,7 +93,6 @@ namespace openset AttributeBlob* attributeBlob; Customers people; openset::async::AsyncLoop* asyncLoop; - //openset::revent::ReventManager* triggers; // map of segment names to expire times std::unordered_map segmentRefresh; @@ -116,7 +115,7 @@ namespace openset // when an open-loop is using segments it will increment this value // when it is done it will decrement this value. // - // checkForSegmentChanges will not invalidate segments that have changed + // syncPartitionSegmentsWithTableSegments will not invalidate segments that have changed // if this is a non-zero value... instead they will be invalidated at the // next opportunity int segmentUsageCount {0}; @@ -175,7 +174,7 @@ namespace openset openset::query::Interpreter* getInterpreter(const std::string& segmentName, int64_t maxLinearId); - void checkForSegmentChanges(); + void syncPartitionSegmentsWithTableSegments(); InterpreterList& getOnInsertSegments() { diff --git a/src/ver.h b/src/ver.h index 5838e9d..30c56d0 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5" +"0.4.5.test3" ; \ No newline at end of file From 368dafe793f434d74a2496401680d89ca9503f74 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 28 Nov 2019 23:43:47 -0500 Subject: [PATCH 18/31] fix for always_fresh segment flag --- src/queryparserosl.h | 4 ++++ src/rpc_query.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/queryparserosl.h b/src/queryparserosl.h index 138c909..f97cea3 100644 --- a/src/queryparserosl.h +++ b/src/queryparserosl.h @@ -4265,6 +4265,10 @@ namespace openset::query flags["use_cached"] = (keyVal[1].length() == 0 || keyVal[1][0] == 'T' || keyVal[1][0] == 't'); + else if (keyVal[0] == "always_fresh") + flags["always_fresh"] = (keyVal[1].length() == 0 || keyVal[1][0] == 'T' || keyVal[1][0] == + 't'); + else if (keyVal[0] == "on_insert") flags["on_insert"] = (keyVal[1].length() == 0 || keyVal[1][0] == 'T' || keyVal[1][0] == 't'); diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index c248b98..8345eee 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -1289,7 +1289,7 @@ void RpcQuery::segment(const openset::web::MessagePtr& message, const RpcMapping table->setSegmentTtl(r.sectionName, r.flags["ttl"]); } - const auto alwaysFresh = r.flags.contains("always_fresh") ? r.flags["use_cached"].getBool() : false; + const auto alwaysFresh = r.flags.contains("always_fresh") ? r.flags["always_fresh"].getBool() : false; // item is cached for subsequent queries, but generates a fresh copy when queried if (alwaysFresh) From dbb91ad1be222ca3457fb283af4a6912dfeb68e5 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 2 Dec 2019 12:47:31 -0500 Subject: [PATCH 19/31] fixed blhash.h iterator bug, double entries --- lib/mem/blhash.h | 13 ------------- src/ver.h | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/lib/mem/blhash.h b/lib/mem/blhash.h index bc51b17..66c676c 100644 --- a/lib/mem/blhash.h +++ b/lib/mem/blhash.h @@ -465,21 +465,9 @@ class BinaryListHash // on a short list scanning sequentially is more efficient // because the data is fits in a cache line. - // iterating the first dozen or is most efficient - // and is quicker than list sub-division on my i7 type processor. - // Some of the newer server processors might benefit from a - // higher setting. - // - // bl_element_s = 10 bytes - // cache line = 64 bytes. - // 6 elements per cache line. - // - // testing showed a positive gain for on my processor - // at two cache lines worth of elements. if (node->used <= 8) { - ++first; // we just checked index 0 above, so skip it for (; first <= last; ++first) { // nesting these conditions netted 15% speed improvement @@ -490,7 +478,6 @@ class BinaryListHash return -(first + 1); } } - return -(last + 2); } diff --git a/src/ver.h b/src/ver.h index 30c56d0..819713d 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test3" +"0.4.5.test4" ; \ No newline at end of file From 86c0035c34be7edde7b500633ebc69b34b932139 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 2 Dec 2019 22:12:06 -0500 Subject: [PATCH 20/31] fixed cache eviction --- src/attributes.cpp | 9 ++++----- src/indexbits.cpp | 2 +- src/indexbits.h | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/attributes.cpp b/src/attributes.cpp index 4a952e4..5202ae5 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -11,7 +11,7 @@ Attributes::Attributes(const int partition, Table* table, AttributeBlob* attribu blob(attributeBlob), properties(properties), partition(partition), - indexCache(50) + indexCache(64) {} Attributes::~Attributes() @@ -44,8 +44,7 @@ IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) const auto& evictAttribute = attrPair->second; // compress the data, get it back in a pool ptr - evictAttribute->data = bits->store(); - + evictAttribute->data = evictBits->store(); delete evictBits; } @@ -77,7 +76,7 @@ Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) { auto key = attr_key_s( propIndex, value ); - if (auto res = propertyIndex.emplace(key, nullptr); res.second == true) + if (const auto& res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); res.first->second = attr; @@ -93,7 +92,7 @@ Attr_s* Attributes::getMake(const int32_t propIndex, const string& value) { auto key = attr_key_s( propIndex, MakeHash(value) ); - if (auto res = propertyIndex.emplace(key, nullptr); res.second == true) + if (const auto& res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); attr->text = blob->storeValue(propIndex, value); diff --git a/src/indexbits.cpp b/src/indexbits.cpp index 35bdeb2..65d3298 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -88,7 +88,7 @@ char* IndexMemory::compress() rawPages = std::move(newRawPages); if (rawPages.size()) - return reinterpret_cast(newRawPages.front()); + return reinterpret_cast(rawPages.front()); return nullptr; } diff --git a/src/indexbits.h b/src/indexbits.h index c976af7..49f031d 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -292,7 +292,7 @@ namespace openset if (keyValuesMap.size() > cacheSize) { const auto evictedKey = items.back(); - const auto evicted = keyValuesMap[items.back()].first; + const auto evicted = keyValuesMap[evictedKey].first; keyValuesMap.erase(items.back()); items.pop_back(); return { evictedKey.first, evictedKey.second, evicted }; From 82c55cdaa0aa13a8af0e5a107c9067b7fcde1755 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Mon, 2 Dec 2019 22:12:49 -0500 Subject: [PATCH 21/31] version bump --- src/ver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ver.h b/src/ver.h index 819713d..92db93b 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test4" +"0.4.5.test5" ; \ No newline at end of file From a5bd8347a7bf7171de4bf03702926588c99d86de Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Tue, 3 Dec 2019 12:38:07 -0500 Subject: [PATCH 22/31] fixed leak in index load/writeback --- lib/sba/sba.cpp | 5 +---- src/attributes.cpp | 2 +- src/indexbits.cpp | 16 +++++++++++----- src/indexbits.h | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/sba/sba.cpp b/lib/sba/sba.cpp index 24b637d..41d6f6f 100644 --- a/lib/sba/sba.cpp +++ b/lib/sba/sba.cpp @@ -10,12 +10,9 @@ PoolMem::PoolMem() for (auto &b : breakPoints) { b.index = idx; + bucketLookup.push_back(b.maxSize); ++idx; } - - // build the reverse lookup - once - for (auto &b : breakPoints) - bucketLookup.push_back(b.maxSize); } void* PoolMem::getPtr(const int64_t size) diff --git a/src/attributes.cpp b/src/attributes.cpp index 5202ae5..7f26d3e 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -11,7 +11,7 @@ Attributes::Attributes(const int partition, Table* table, AttributeBlob* attribu blob(attributeBlob), properties(properties), partition(partition), - indexCache(64) + indexCache(128) {} Attributes::~Attributes() diff --git a/src/indexbits.cpp b/src/indexbits.cpp index 65d3298..13e1bf0 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -24,6 +24,8 @@ void IndexMemory::decompress(char* compressedData) reinterpret_cast(indexPage->bitArray), IndexPageDataSize); + rawPages.push_back(rawPage); + // next block rawPage = rawPage->next; } @@ -35,11 +37,12 @@ char* IndexMemory::compress() RawPageList newRawPages; + int64_t totalSize = 0; + auto pageIdx = -1; for (auto indexPage : indexPages) { ++pageIdx; - const auto rawPage = getRawPage(pageIdx); // we have no bits in this page (skip, and cleanup the old page) @@ -68,7 +71,7 @@ char* IndexMemory::compress() compBuffer, IndexPageDataSize, IndexPageDataSize + Overflow, - 5 + 1 ); const auto newRawPage = static_cast(PoolMem::getPool().getPtr(CompPageHeaderSize + compressedSize)); @@ -77,9 +80,7 @@ char* IndexMemory::compress() newRawPage->next = nullptr; memcpy(newRawPage->compressedData, compBuffer, compressedSize); - if (newRawPages.size()) - newRawPages.back()->next = newRawPage; - + totalSize += compressedSize + CompPageHeaderSize; newRawPages.push_back(newRawPage); } @@ -88,7 +89,12 @@ char* IndexMemory::compress() rawPages = std::move(newRawPages); if (rawPages.size()) + { + // relink raw pages + for (auto i = 0; i < rawPages.size() - 1; ++i) + rawPages[i]->next = rawPages[i+1]; return reinterpret_cast(rawPages.front()); + } return nullptr; } diff --git a/src/indexbits.h b/src/indexbits.h index 49f031d..7327aa2 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -10,7 +10,7 @@ namespace openset { namespace db { - const int64_t BitArraySize = 128; + const int64_t BitArraySize = 126; struct IndexPageMemory_s { From 0d9806c4901e557302367a077eb070a088fb8ec6 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Tue, 3 Dec 2019 12:38:29 -0500 Subject: [PATCH 23/31] fixed leak in index load/writeback --- src/ver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ver.h b/src/ver.h index 92db93b..789638c 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test5" +"0.4.5.test6" ; \ No newline at end of file From 03bb666c677c536f2d21a953601f45396fbbc71d Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Tue, 3 Dec 2019 12:40:04 -0500 Subject: [PATCH 24/31] remove unused allocation total --- src/indexbits.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/indexbits.cpp b/src/indexbits.cpp index 13e1bf0..9f1a3f6 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -37,8 +37,6 @@ char* IndexMemory::compress() RawPageList newRawPages; - int64_t totalSize = 0; - auto pageIdx = -1; for (auto indexPage : indexPages) { @@ -80,7 +78,6 @@ char* IndexMemory::compress() newRawPage->next = nullptr; memcpy(newRawPage->compressedData, compBuffer, compressedSize); - totalSize += compressedSize + CompPageHeaderSize; newRawPages.push_back(newRawPage); } From 106b72838cbd0ddacd5fa92852f78bf160a7557d Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Tue, 3 Dec 2019 21:54:39 -0500 Subject: [PATCH 25/31] potentially fixed index writeback bug --- lib/sba/sba.cpp | 33 ++--- lib/sba/sba.h | 1 + src/asyncpool.cpp | 9 +- src/asyncpool.h | 262 +++++++++++++++++++-------------------- src/attributes.cpp | 8 +- src/dbtypes.h | 9 +- src/http_serve.cpp | 2 +- src/indexbits.cpp | 60 +++------ src/indexbits.h | 24 +--- src/main.cpp | 2 +- src/queryinterpreter.cpp | 14 +-- src/rpc_cluster.cpp | 28 +++-- src/rpc_table.cpp | 20 ++- src/service.cpp | 3 +- src/table.cpp | 1 + src/ver.h | 2 +- 16 files changed, 235 insertions(+), 243 deletions(-) diff --git a/lib/sba/sba.cpp b/lib/sba/sba.cpp index 41d6f6f..fd09b71 100644 --- a/lib/sba/sba.cpp +++ b/lib/sba/sba.cpp @@ -17,8 +17,7 @@ PoolMem::PoolMem() void* PoolMem::getPtr(const int64_t size) { - // give us the starting bucket for iteration - int64_t bucket = 0; + int64_t bucket = 0; // will iterate through buckets of matching sqrt until one fits or we hit the end. // this will iterate once or twice @@ -34,7 +33,6 @@ void* PoolMem::getPtr(const int64_t size) return alloc->data; } - // figure out which bucket size (if any) this allocation will fit auto &mem = breakPoints[bucket]; csLock lock(mem.memLock); @@ -53,12 +51,28 @@ void* PoolMem::getPtr(const int64_t size) return alloc->data; } +int PoolMem::getSize(void* ptr) +{ + const auto alloc = reinterpret_cast(static_cast(ptr) - MemConstants::PoolMemHeaderSize); + + if (alloc->poolIndex == -2) // already freed + return -2; + + // -1 means this was non-pooled so just delete it + if (alloc->poolIndex == -1) + return -1; + + return breakPoints[alloc->poolIndex].maxSize; +} + void PoolMem::freePtr(void* ptr) { const auto alloc = reinterpret_cast(static_cast(ptr) - MemConstants::PoolMemHeaderSize); if (alloc->poolIndex == -2) // already freed + { return; // nice place for a breakpoint in debug + } // -1 means this was non-pooled so just delete it if (alloc->poolIndex == -1) @@ -73,18 +87,5 @@ void PoolMem::freePtr(void* ptr) alloc->poolIndex = -2; mem.freed.push_back(alloc); - - // if a pool gets to large, trim it back - /* - if (mem.freed.size() > MemConstants::CullSize) - { - const auto cullTo = MemConstants::CullSize / 5; - while (mem.freed.size() > cullTo) - { - delete [] reinterpret_cast(mem.freed.back()); - mem.freed.pop_back(); - } - } - */ } diff --git a/lib/sba/sba.h b/lib/sba/sba.h index 5fb53a1..49f1f41 100644 --- a/lib/sba/sba.h +++ b/lib/sba/sba.h @@ -90,6 +90,7 @@ class PoolMem } void* getPtr(const int64_t size); + int getSize(void* ptr); void freePtr(void* ptr); }; diff --git a/src/asyncpool.cpp b/src/asyncpool.cpp index 245fd01..7ea5961 100644 --- a/src/asyncpool.cpp +++ b/src/asyncpool.cpp @@ -441,10 +441,14 @@ void AsyncPool::startAsync() workerNumber)); } + // detach and return + for (auto &w : workers) + w.detach(); + Logger::get().info(to_string(workerMax) + " async workers created."); running = true; - ThreadSleep(1000); + ThreadSleep(500); auto maintThread = thread( &AsyncPool::maint, @@ -452,9 +456,6 @@ void AsyncPool::startAsync() maintThread.detach(); - // detach and return - for (auto &w : workers) - w.detach(); } diff --git a/src/asyncpool.h b/src/asyncpool.h index abadac9..aaa642d 100644 --- a/src/asyncpool.h +++ b/src/asyncpool.h @@ -13,165 +13,165 @@ namespace openset { - namespace async - { - class AsyncPool; - } - - namespace globals - { - extern async::AsyncPool* async; - } - - namespace async - { - class OpenLoop; - - const int32_t PARTITION_WORKERS = 256; // max number of workers - max cores + hyperthreads - - class AsyncPool - { - public: - - // we store data about a shard here - struct partitionInfo_s - { - AsyncPool* asyncPool; - AsyncLoop* ooLoop; // open-ended-AsyncLoop - int instance; - int worker; - atomic realtimeCells; - - explicit partitionInfo_s(AsyncPool* asyncPool, const int instance, const int worker) : - asyncPool(asyncPool), - ooLoop(nullptr), - instance(instance), - worker(worker), - realtimeCells(0) - {} - - ~partitionInfo_s() - { - if (ooLoop) - delete ooLoop; - } - - void init() - { - ooLoop = new AsyncLoop(asyncPool, instance, worker); - } - - bool isInitialized() const - { - return (ooLoop) ? true : false; - } - }; - - struct workerInfo_s - { - std::mutex lock; - atomic_bool triggered {false}; - std::condition_variable conditional; - vector jobs; - atomic queued; - }; - - - CriticalSection poolLock; - - int32_t partitionMax{ 0 }; - int32_t workerMax{ 0 }; - - CriticalSection globalAsyncLock; - atomic globalAsyncInitSuspend{ false }; // we want it to suspend - atomic globalAsyncLockDepth{ 0 }; // suspend depth - atomic globalAsyncSuspendedWorkerCount{ 0 }; - - bool running; - - //OpenSet::mapping::PartitionMap partitionMap; - - workerInfo_s workerInfo[PARTITION_WORKERS]; - partitionInfo_s* partitions[PARTITION_MAX]; + namespace async + { + class AsyncPool; + } + + namespace globals + { + extern async::AsyncPool* async; + } + + namespace async + { + class OpenLoop; + + const int32_t PARTITION_WORKERS = 256; // max number of workers - max cores + hyperthreads + + class AsyncPool + { + public: + + // we store data about a shard here + struct partitionInfo_s + { + AsyncPool* asyncPool; + AsyncLoop* ooLoop; // open-ended-AsyncLoop + int instance; + int worker; + atomic realtimeCells; + + explicit partitionInfo_s(AsyncPool* asyncPool, const int instance, const int worker) : + asyncPool(asyncPool), + ooLoop(nullptr), + instance(instance), + worker(worker), + realtimeCells(0) + {} + + ~partitionInfo_s() + { + if (ooLoop) + delete ooLoop; + } + + void init() + { + ooLoop = new AsyncLoop(asyncPool, instance, worker); + } + + bool isInitialized() const + { + return (ooLoop) ? true : false; + } + }; + + struct workerInfo_s + { + std::mutex lock; + atomic_bool triggered {false}; + std::condition_variable conditional; + vector jobs; + atomic queued; + }; + + + CriticalSection poolLock; + + int32_t partitionMax{ 0 }; + int32_t workerMax{ 0 }; + + CriticalSection globalAsyncLock; + atomic globalAsyncInitSuspend{ false }; // we want it to suspend + atomic globalAsyncLockDepth{ 0 }; // suspend depth + atomic globalAsyncSuspendedWorkerCount{ 0 }; + + bool running; + + //OpenSet::mapping::PartitionMap partitionMap; + + workerInfo_s workerInfo[PARTITION_WORKERS]; + partitionInfo_s* partitions[PARTITION_MAX]; atomic lastZombieStamp{0}; std::vector zombiePartitions; - AsyncPool(int32_t ShardMax, int32_t WorkerMax) : - partitionMax(ShardMax), - workerMax(WorkerMax), - running(false) - { - openset::globals::async = this; + AsyncPool(int32_t ShardMax, int32_t WorkerMax) : + partitionMax(ShardMax), + workerMax(WorkerMax), + running(false) + { + openset::globals::async = this; - // all nulls - memset(partitions, 0, sizeof(partitions)); + // all nulls + memset(partitions, 0, sizeof(partitions)); - for (auto &wInfo : workerInfo) - wInfo.queued = 0; - } + for (auto &wInfo : workerInfo) + wInfo.queued = 0; + } - ~AsyncPool() = default; + ~AsyncPool() = default; - int getLeastBusy() const; + int getLeastBusy() const; - void mapPartitionsToAsyncWorkers(); + void mapPartitionsToAsyncWorkers(); - void suspendAsync(); - void resumeAsync(); - void waitForResume(); - void assertAsyncLock() const; + void suspendAsync(); + void resumeAsync(); + void waitForResume(); + void assertAsyncLock() const; - AsyncLoop* initPartition(int32_t partition); + AsyncLoop* initPartition(int32_t partition); void balancePartitions(); - void freePartition(int32_t partition); + void freePartition(int32_t partition); - /* Add a cell to every the loop object in every partition - * calls back to a factory function that builds the cell - */ - void cellFactory(std::vector partitionList, const function& factory); - void cellFactory(const function& factory); + /* Add a cell to every the loop object in every partition + * calls back to a factory function that builds the cell + */ + void cellFactory(std::vector partitionList, const function& factory); + void cellFactory(const function& factory); void purgeByTable(const std::string& tableName); - int32_t count(); + int32_t count(); - AsyncLoop* isPartition(int32_t shardNumber); - AsyncLoop* getPartition(int32_t shardNumber); + AsyncLoop* isPartition(int32_t shardNumber); + AsyncLoop* getPartition(int32_t shardNumber); - void realtimeInc(int32_t shardNumber); - void realtimeDec(int32_t shardNumber); - int32_t getRealtimeRunning(int32_t shardNumber) const; + void realtimeInc(int32_t shardNumber); + void realtimeDec(int32_t shardNumber); + int32_t getRealtimeRunning(int32_t shardNumber) const; - bool isRunning() const - { - return running; - } + bool isRunning() const + { + return running; + } - int getPartitionMax() const - { - return partitionMax; - } + int getPartitionMax() const + { + return partitionMax; + } - int getWorkerCount() const - { - return workerMax; - } + int getWorkerCount() const + { + return workerMax; + } - void setPartitionMax(int maxPartitions) - { - partitionMax = maxPartitions; - } + void setPartitionMax(int maxPartitions) + { + partitionMax = maxPartitions; + } - void runner(int32_t workerId) noexcept; + void runner(int32_t workerId) noexcept; void maint() noexcept; - void startAsync(); - }; - }; + void startAsync(); + }; + }; }; diff --git a/src/attributes.cpp b/src/attributes.cpp index 7f26d3e..96e30c6 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -40,10 +40,7 @@ IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) // if anything got squeezed out compress it if (evictBits) { - const auto& attrPair = propertyIndex.find({ static_cast(evictPropIndex), evictValue }); - const auto& evictAttribute = attrPair->second; - - // compress the data, get it back in a pool ptr + const auto evictAttribute = Attributes::getMake(static_cast(evictPropIndex), evictValue); evictAttribute->data = evictBits->store(); delete evictBits; } @@ -79,6 +76,8 @@ Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) if (const auto& res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); + attr->data = nullptr; + attr->text = nullptr; res.first->second = attr; return attr; } @@ -95,6 +94,7 @@ Attr_s* Attributes::getMake(const int32_t propIndex, const string& value) if (const auto& res = propertyIndex.emplace(key, nullptr); res.second == true) { const auto attr = new(PoolMem::getPool().getPtr(sizeof(Attr_s)))Attr_s(); + attr->data = nullptr; attr->text = blob->storeValue(propIndex, value); res.first->second = attr; return attr; diff --git a/src/dbtypes.h b/src/dbtypes.h index fd105b7..28f939a 100644 --- a/src/dbtypes.h +++ b/src/dbtypes.h @@ -126,7 +126,14 @@ namespace std { size_t operator()(const openset::db::attr_key_s& x) const { - return (uint64_t(x.index) << 32) + x.value; + return static_cast(XXH64( + reinterpret_cast(&x.index), + 4, + XXH64( + reinterpret_cast(&x.value), + 8, + HASH_SEED) + )); } }; }; diff --git a/src/http_serve.cpp b/src/http_serve.cpp index 899ec5e..57784e8 100644 --- a/src/http_serve.cpp +++ b/src/http_serve.cpp @@ -146,7 +146,7 @@ namespace openset::web void HttpServe::makeWorkers() { - const auto workerCount = std::thread::hardware_concurrency(); + const auto workerCount = 8; // TODO make a switch std::thread::hardware_concurrency(); workers.reserve(workerCount); threads.reserve(workerCount); diff --git a/src/indexbits.cpp b/src/indexbits.cpp index 9f1a3f6..3510554 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -9,11 +9,11 @@ using namespace openset::db; void IndexMemory::decompress(char* compressedData) { + reset(); + if (!compressedData) return; - reset(); - auto rawPage = reinterpret_cast(compressedData); while (rawPage) @@ -33,63 +33,42 @@ void IndexMemory::decompress(char* compressedData) char* IndexMemory::compress() { - const auto compBuffer = static_cast(PoolMem::getPool().getPtr(IndexPageDataSize + Overflow)); + const auto bufferSize = LZ4_compressBound(IndexPageDataSize); + const auto compBuffer = static_cast(PoolMem::getPool().getPtr(bufferSize)); - RawPageList newRawPages; + for (auto rawPage : rawPages) + PoolMem::getPool().freePtr(rawPage); + rawPages.clear(); - auto pageIdx = -1; + auto pageNumber = -1; for (auto indexPage : indexPages) { - ++pageIdx; - const auto rawPage = getRawPage(pageIdx); + ++pageNumber; - // we have no bits in this page (skip, and cleanup the old page) - if (!pagePopulation(indexPage)) - { - if (rawPage) - PoolMem::getPool().freePtr(rawPage); - continue; - } - - // use existing if we already have compressed version of this and nothing changed - if (rawPage) - { - if (!indexPage->dirty) - { - newRawPages.push_back(rawPage); - continue; - } - PoolMem::getPool().freePtr(rawPage); - } - - indexPage->dirty = false; - - const auto compressedSize = LZ4_compress_fast( + const auto compressedSize = LZ4_compress_default( reinterpret_cast(indexPage->bitArray), compBuffer, IndexPageDataSize, - IndexPageDataSize + Overflow, - 1 + bufferSize ); const auto newRawPage = static_cast(PoolMem::getPool().getPtr(CompPageHeaderSize + compressedSize)); - newRawPage->index = pageIdx; + newRawPage->index = pageNumber; newRawPage->next = nullptr; memcpy(newRawPage->compressedData, compBuffer, compressedSize); - newRawPages.push_back(newRawPage); + rawPages.push_back(newRawPage); } PoolMem::getPool().freePtr(compBuffer); - rawPages = std::move(newRawPages); - if (rawPages.size()) { // relink raw pages - for (auto i = 0; i < rawPages.size() - 1; ++i) - rawPages[i]->next = rawPages[i+1]; + for (auto i = 0; i < rawPages.size(); ++i) + rawPages[i]->next = (i == rawPages.size() - 1) ? nullptr : rawPages[i+1]; + return reinterpret_cast(rawPages.front()); } @@ -190,7 +169,6 @@ void IndexBits::bitSet(const int64_t index) { const auto bits = data.getBitInt(index); *bits |= BITMASK[index & 63ULL]; // mod 64 - data.setDirty(); } void IndexBits::setSizeByBit(const int64_t index) @@ -202,7 +180,6 @@ void IndexBits::bitClear(const int64_t index) { const auto bits = data.getBitInt(index); *bits &= ~(BITMASK[index & 63ULL]); // mod 64 - data.setDirty(); } bool IndexBits::bitState(const int64_t index) @@ -255,7 +232,6 @@ void IndexBits::opCopy(const IndexBits& source) reset(); data = source.data; placeHolder = source.placeHolder; - data.setDirtyAllPages(); } void IndexBits::opCopyNot(IndexBits& source) @@ -276,7 +252,6 @@ void IndexBits::opAnd(IndexBits& source) { const auto dest = data.getInt(index); *dest &= *source.data.getInt(index); - data.setDirty(); ++index; } } @@ -293,7 +268,6 @@ void IndexBits::opOr(IndexBits& source) { const auto dest = data.getInt(index); *dest |= *source.data.getInt(index); - data.setDirty(); ++index; } } @@ -310,7 +284,6 @@ void IndexBits::opAndNot(IndexBits& source) { const auto dest = data.getInt(index); *dest = *dest & ~(*source.data.getInt(index)); - data.setDirty(); ++index; } } @@ -327,7 +300,6 @@ void IndexBits::opNot() { const auto dest = data.getInt(index); *dest = ~(*dest); - data.setDirty(); ++index; } } diff --git a/src/indexbits.h b/src/indexbits.h index 7327aa2..f7d0579 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -14,8 +14,6 @@ namespace openset struct IndexPageMemory_s { - bool dirty { false }; - // 4096 bytes int64_t bitArray[BitArraySize]; }; @@ -24,16 +22,14 @@ namespace openset const int64_t IndexBitsPerPage = BitArraySize * 64; const int64_t Overflow = 64; -#pragma pack(push,1) struct CompPageMemory_s { - int index { 0 }; + int64_t index { 0 }; CompPageMemory_s* next { nullptr }; char compressedData[IndexPageDataSize]; }; -#pragma pack(pop) - const int64_t CompPageHeaderSize = sizeof(int) + sizeof(CompPageMemory_s*); + const int64_t CompPageHeaderSize = 16; class IndexMemory { @@ -144,18 +140,6 @@ namespace openset return page->bitArray + indexInPage; } - void setDirty() const - { - if (lastIndex) - lastIndex->dirty = true; - } - - void setDirtyAllPages() - { - for (const auto page : indexPages) - page->dirty = true; - } - IndexPageMemory_s* getPage(const int64_t bitIndex) { const auto pageIndex = bitIndex / IndexBitsPerPage; // convert bit index into page in dex @@ -293,7 +277,7 @@ namespace openset { const auto evictedKey = items.back(); const auto evicted = keyValuesMap[evictedKey].first; - keyValuesMap.erase(items.back()); + keyValuesMap.erase(evictedKey); items.pop_back(); return { evictedKey.first, evictedKey.second, evicted }; } @@ -305,7 +289,7 @@ namespace openset { const Key key(propIndex, value); - if (auto iter = keyValuesMap.find(key); iter != keyValuesMap.end()) + if (const auto& iter = keyValuesMap.find(key); iter != keyValuesMap.end()) { items.erase(iter->second.second); items.push_front(key); diff --git a/src/main.cpp b/src/main.cpp index 80810bd..560b3b2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -39,7 +39,7 @@ void StartOpenSet(openset::config::CommandlineArgs args) Logger::get().info("OpenSet v" + __version__); Logger::get().info("OpenSet, Copyright(c) 2015 - 2019, Seth Hamilton."); - const auto workerCount = std::thread::hardware_concurrency(); + const auto workerCount = 8;// TODO make this a switch std::thread::hardware_concurrency(); Logger::get().info(to_string(workerCount) + " processor cores available."); args.fix(); // fix the default startup arguments after WSAStartup (on windows) diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index 432e1f4..651ad2b 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -1863,14 +1863,14 @@ void openset::query::Interpreter::opRunner(Instruction_s* inst, int64_t currentR { auto colValue = NONE; // extract property value from grid->propRow - //if (macros.vars.tableVars[inst->index].isCustomerProperty) - //{ - // colValue = propRow->cols[macros.vars.tableVars[inst->index].property]; - // } - //else - //{ + if (readRow >= rowCount) + { + *stackPtr = None; + ++stackPtr; + break; + } + colValue = (*rows)[readRow]->cols[macros.vars.tableVars[inst->index].column]; - //} switch (macros.vars.tableVars[inst->index].schemaType) { diff --git a/src/rpc_cluster.cpp b/src/rpc_cluster.cpp index b937d67..fa48edb 100644 --- a/src/rpc_cluster.cpp +++ b/src/rpc_cluster.cpp @@ -62,20 +62,32 @@ void RpcCluster::init(const openset::web::MessagePtr message, const RpcMapping& // update config { csLock lock(globals::running->cs); + + if (partitions->isRunning()) + { + RpcError( + openset::errors::Error{ + openset::errors::errorClass_e::config, + openset::errors::errorCode_e::general_config_error, + "This instance is already part of a cluster (2)" }, + message); + return; + } + globals::running->setNodeName(openset::config::createName()); globals::running->state = openset::config::NodeState_e::active; globals::running->partitionMax = partitionMax; Logger::get().info("instance has been named '" + globals::running->nodeName + "'."); - } - openset::globals::mapper->partitionMap.clear(); - for (auto i = 0; i < partitionMax; ++i) - openset::globals::mapper->partitionMap.setOwner(i, globals::running->nodeId); + openset::globals::mapper->partitionMap.clear(); + for (auto i = 0; i < partitionMax; ++i) + openset::globals::mapper->partitionMap.setOwner(i, globals::running->nodeId); - // set number of partitions - partitions->setPartitionMax(partitionMax); - // set them running - this return right away - partitions->startAsync(); + // set number of partitions + partitions->setPartitionMax(partitionMax); + // set them running - this returns right away + partitions->startAsync(); + } partitions->mapPartitionsToAsyncWorkers(); diff --git a/src/rpc_table.cpp b/src/rpc_table.cpp index 47a77e8..8047aaf 100644 --- a/src/rpc_table.cpp +++ b/src/rpc_table.cpp @@ -7,6 +7,8 @@ #include "common.h" #include "cjson/cjson.h" +#include "threads/locks.h" + #include "oloop_insert.h" #include "oloop_property.h" #include "oloop_histogram.h" @@ -29,6 +31,8 @@ using namespace openset::comms; using namespace openset::db; using namespace openset::result; +CriticalSection RpcTableCs; + void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMapping& matches) { @@ -36,6 +40,8 @@ void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMa if (ForwardRequest(message) != ForwardStatus_e::alreadyForwarded) return; + csLock rpcLock(RpcTableCs); + auto database = openset::globals::database; const auto request = message->getJSON(); const auto tableName = matches.find("table"s)->second; @@ -298,6 +304,8 @@ void openset::comms::RpcTable::table_drop(const openset::web::MessagePtr& messag if (ForwardRequest(message) != ForwardStatus_e::alreadyForwarded) return; + csLock rpcLock(RpcTableCs); + auto database = openset::globals::database; const auto request = message->getJSON(); const auto tableName = matches.find("table"s)->second; @@ -334,6 +342,8 @@ void openset::comms::RpcTable::table_drop(const openset::web::MessagePtr& messag void RpcTable::table_describe(const openset::web::MessagePtr& message, const RpcMapping& matches) { + csLock rpcLock(RpcTableCs); + auto database = openset::globals::database; const auto request = message->getJSON(); @@ -421,17 +431,17 @@ void RpcTable::table_describe(const openset::web::MessagePtr& message, const Rpc const auto settings = response.setObject("settings"); table->serializeSettings(settings); - Logger::get().info("describe table '" + tableName + "'."); message->reply(http::StatusCode::success_ok, response); } void RpcTable::column_add(const openset::web::MessagePtr& message, const RpcMapping& matches) { - // this request must be forwarded to all the other nodes if (ForwardRequest(message) != ForwardStatus_e::alreadyForwarded) return; + csLock rpcLock(RpcTableCs); + auto database = openset::globals::database; const auto request = message->getJSON(); @@ -538,6 +548,8 @@ void RpcTable::column_add(const openset::web::MessagePtr& message, const RpcMapp void RpcTable::column_drop(const openset::web::MessagePtr& message, const RpcMapping& matches) { + csLock rpcLock(RpcTableCs); + auto database = openset::globals::database; const auto request = message->getJSON(); @@ -609,6 +621,8 @@ void RpcTable::column_drop(const openset::web::MessagePtr& message, const RpcMap void RpcTable::table_settings(const openset::web::MessagePtr& message, const RpcMapping& matches) { + csLock rpcLock(RpcTableCs); + auto database = openset::globals::database; const auto request = message->getJSON(); @@ -651,7 +665,7 @@ void RpcTable::table_settings(const openset::web::MessagePtr& message, const Rpc void openset::comms::RpcTable::table_list(const openset::web::MessagePtr & message, const RpcMapping & matches) { - // lock the table object + csLock rpcLock(RpcTableCs); auto database = openset::globals::database; const auto names = database->getTableNames(); diff --git a/src/service.cpp b/src/service.cpp index 80c696b..455c834 100644 --- a/src/service.cpp +++ b/src/service.cpp @@ -21,7 +21,6 @@ namespace openset { const auto ip = globals::running->host; const auto port = globals::running->port; - const auto pool = std::thread::hardware_concurrency(); // set to number of cores const auto partitionTotal = globals::running->partitionMax; @@ -30,7 +29,7 @@ namespace openset #endif // generate our async workers, we are going to use one worker per core - openset::async::AsyncPool async(partitionTotal, std::thread::hardware_concurrency()); + openset::async::AsyncPool async(partitionTotal, 8 ); // TODO make this a switch std::thread::hardware_concurrency()); // DEBUG OpenSet::async::AsyncPool async(partitionTotal, 1); openset::mapping::Mapper mapper; diff --git a/src/table.cpp b/src/table.cpp index 486a29f..6a39d56 100644 --- a/src/table.cpp +++ b/src/table.cpp @@ -49,6 +49,7 @@ void openset::db::Table::initialize() properties.setProperty(PROP_SESSION, "session", PropertyTypes_e::intProp, false); createMissingPartitionObjects(); + Logger::get().info("table created '" + name + "'."); } void Table::createMissingPartitionObjects() diff --git a/src/ver.h b/src/ver.h index 789638c..5bf5737 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test6" +"0.4.5.test7" ; \ No newline at end of file From 2e28622edc4b7c1efc217975de76d8eac0f24a90 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 5 Dec 2019 16:19:58 -0500 Subject: [PATCH 26/31] properties query fix, skip empty pages in index --- src/attributes.cpp | 7 ++- src/indexbits.cpp | 45 ++++++++++---------- src/indexbits.h | 36 ++++++++++++++-- src/oloop_property.cpp | 96 +++++++++++++++++++++--------------------- src/oloop_property.h | 2 + src/ver.h | 2 +- 6 files changed, 111 insertions(+), 77 deletions(-) diff --git a/src/attributes.cpp b/src/attributes.cpp index 96e30c6..1c49192 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -40,8 +40,11 @@ IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) // if anything got squeezed out compress it if (evictBits) { - const auto evictAttribute = Attributes::getMake(static_cast(evictPropIndex), evictValue); - evictAttribute->data = evictBits->store(); + if (evictBits->data.isDirty()) + { + const auto evictAttribute = Attributes::getMake(static_cast(evictPropIndex), evictValue); + evictAttribute->data = evictBits->store(); + } delete evictBits; } diff --git a/src/indexbits.cpp b/src/indexbits.cpp index 3510554..82ead2b 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -33,6 +33,8 @@ void IndexMemory::decompress(char* compressedData) char* IndexMemory::compress() { + dirty = false; + const auto bufferSize = LZ4_compressBound(IndexPageDataSize); const auto compBuffer = static_cast(PoolMem::getPool().getPtr(bufferSize)); @@ -45,6 +47,9 @@ char* IndexMemory::compress() { ++pageNumber; + if (!pagePopulation(indexPage)) + continue; + const auto compressedSize = LZ4_compress_default( reinterpret_cast(indexPage->bitArray), compBuffer, @@ -76,8 +81,7 @@ char* IndexMemory::compress() } IndexBits::IndexBits() - : data(), - placeHolder(false) + : placeHolder(false) {} // move constructor @@ -152,6 +156,8 @@ void IndexBits::makeBits(const int64_t index, const int state) for (auto i = index; i < lastBit; i++) bitClear(i); } + + data.setDirty(); } void IndexBits::mount(char* compressedData) @@ -165,29 +171,11 @@ char* IndexBits::store() return data.compress(); } -void IndexBits::bitSet(const int64_t index) -{ - const auto bits = data.getBitInt(index); - *bits |= BITMASK[index & 63ULL]; // mod 64 -} - void IndexBits::setSizeByBit(const int64_t index) { data.getBitInt(index); } -void IndexBits::bitClear(const int64_t index) -{ - const auto bits = data.getBitInt(index); - *bits &= ~(BITMASK[index & 63ULL]); // mod 64 -} - -bool IndexBits::bitState(const int64_t index) -{ - const auto bits = data.getBitInt(index); - return ((*bits) & BITMASK[index & 63ULL]); -} - /* population(int stopBit); @@ -232,6 +220,7 @@ void IndexBits::opCopy(const IndexBits& source) reset(); data = source.data; placeHolder = source.placeHolder; + data.setDirty(); } void IndexBits::opCopyNot(IndexBits& source) @@ -246,7 +235,10 @@ void IndexBits::opAnd(IndexBits& source) return; auto index = 0; - const auto end = source.data.intCount(); + auto end = source.data.intCount(); + // whichever is bigger + if (data.intCount() > end) + end = data.intCount(); while (index < end) { @@ -254,6 +246,7 @@ void IndexBits::opAnd(IndexBits& source) *dest &= *source.data.getInt(index); ++index; } + data.setDirty(); } void IndexBits::opOr(IndexBits& source) @@ -262,7 +255,7 @@ void IndexBits::opOr(IndexBits& source) return; auto index = 0; - const auto end = source.data.intCount(); + auto end = source.data.intCount(); while (index < end) { @@ -270,6 +263,7 @@ void IndexBits::opOr(IndexBits& source) *dest |= *source.data.getInt(index); ++index; } + data.setDirty(); } void IndexBits::opAndNot(IndexBits& source) @@ -278,7 +272,10 @@ void IndexBits::opAndNot(IndexBits& source) return; auto index = 0; - const auto end = source.data.intCount(); + auto end = source.data.intCount(); + // whichever is bigger + if (data.intCount() > end) + end = data.intCount(); while (index < end) { @@ -286,6 +283,7 @@ void IndexBits::opAndNot(IndexBits& source) *dest = *dest & ~(*source.data.getInt(index)); ++index; } + data.setDirty(); } void IndexBits::opNot() @@ -302,6 +300,7 @@ void IndexBits::opNot() *dest = ~(*dest); ++index; } + data.setDirty(); } /* diff --git a/src/indexbits.h b/src/indexbits.h index f7d0579..a72fc56 100644 --- a/src/indexbits.h +++ b/src/indexbits.h @@ -5,6 +5,7 @@ #include "common.h" #include "sba/sba.h" #include +#include "dbtypes.h" namespace openset { @@ -41,6 +42,8 @@ namespace openset IndexPageMemory_s* lastIndex { nullptr }; + bool dirty { false }; + public: IndexMemory() = default; @@ -114,6 +117,7 @@ namespace openset PoolMem::getPool().freePtr(page); indexPages.clear(); rawPages.clear(); + dirty = false; lastIndex = nullptr; } @@ -122,6 +126,16 @@ namespace openset return BitArraySize * static_cast(indexPages.size()); } + void setDirty() + { + dirty = true; + } + + bool isDirty() const + { + return dirty; + } + int64_t* getBitInt(const int64_t bitIndex) { const auto page = getPage(bitIndex); @@ -234,9 +248,25 @@ namespace openset char* store(); void setSizeByBit(int64_t index); - void bitSet(int64_t index); - void bitClear(int64_t index); - bool bitState(int64_t index); + void bitSet(const int64_t index) + { + const auto bits = data.getBitInt(index); + *bits |= BITMASK[index & 63ULL]; // mod 64 + data.setDirty(); + } + + void bitClear(const int64_t index) + { + const auto bits = data.getBitInt(index); + *bits &= ~(BITMASK[index & 63ULL]); // mod 64 + data.setDirty(); + } + + bool bitState(const int64_t index) + { + const auto bits = data.getBitInt(index); + return ((*bits) & BITMASK[index & 63ULL]); + } int64_t population(const int64_t stopBit); diff --git a/src/oloop_property.cpp b/src/oloop_property.cpp index 0914df6..a9e1383 100644 --- a/src/oloop_property.cpp +++ b/src/oloop_property.cpp @@ -95,43 +95,7 @@ void OpenLoopProperty::prepare() return; } - rowKey.clear(); - - const auto hash = MakeHash(config.propName); - result->addLocalText(MakeHash(config.propName), config.propName); - - rowKey.key[0] = hash; - rowKey.types[0] = ResultTypes_e::Text; - - // assign the type for the value to the key - switch (config.propType) - { - case db::PropertyTypes_e::intProp: - rowKey.types[1] = ResultTypes_e::Int; - break; - case db::PropertyTypes_e::doubleProp: - rowKey.types[1] = ResultTypes_e::Double; - break; - case db::PropertyTypes_e::boolProp: - rowKey.types[1] = ResultTypes_e::Bool; - break; - case db::PropertyTypes_e::textProp: - rowKey.types[1] = ResultTypes_e::Text; - break; - default: ; - } - - const auto aggs = result->getMakeAccumulator(rowKey); - - auto idx = 0; - for (auto s : segments) - { - auto bits = allBits; - bits->opAnd(*parts->getSegmentBits(s)); - aggs->columns[idx].value = bits->population(stopBit); - - ++idx; - } + addRootTotal(); // turn ints and doubles into their bucketed name const auto toBucket = [&](const int64_t value)->int64_t @@ -218,6 +182,45 @@ void OpenLoopProperty::prepare() groupsIter = groups.begin(); } +void OpenLoopProperty::addRootTotal() +{ + rowKey.clear(); + + rowKey.key[0] = result->addLocalTextAndHash(config.propName); + rowKey.types[0] = ResultTypes_e::Text; + + // assign the type for the value to the key + switch (config.propType) + { + case db::PropertyTypes_e::intProp: + rowKey.types[1] = ResultTypes_e::Int; + break; + case db::PropertyTypes_e::doubleProp: + rowKey.types[1] = ResultTypes_e::Double; + break; + case db::PropertyTypes_e::boolProp: + rowKey.types[1] = ResultTypes_e::Bool; + break; + case db::PropertyTypes_e::textProp: + rowKey.types[1] = ResultTypes_e::Text; + break; + default: ; + } + + const auto aggs = result->getMakeAccumulator(rowKey); + + auto idx = 0; + for (auto &segmentName : segments) + { + db::IndexBits* segmentBits = segmentName == "*" ? &all : parts->getSegmentBits(segmentName); + db::IndexBits bits; + bits.opCopy(rootCount); + bits.opAnd(*segmentBits); + aggs->columns[idx].value = bits.population(stopBit); + ++idx; + } +} + bool OpenLoopProperty::run() { @@ -241,21 +244,13 @@ bool OpenLoopProperty::run() } auto columnIndex = 0; - for (auto segmentName : segments) + for (const auto& segmentName : segments) { - // here we are setting the key for the bucket, // this is under our root which is the property name rowKey.key[1] = bucket; // value hash (or value) - db::IndexBits* sourceBits; - - if (segmentName == "*") - sourceBits = &all; - else - sourceBits = parts->getSegmentBits(segmentName); - - + const auto segmentBits = segmentName == "*" ? &all : parts->getSegmentBits(segmentName); const auto aggs = result->getMakeAccumulator(rowKey); auto sumBits = new db::IndexBits(); @@ -270,8 +265,10 @@ bool OpenLoopProperty::run() sumBits->opOr(*bits); } + rootCount.opOr(*sumBits); + // remove bits not in the segment - sumBits->opAnd(*sourceBits); + sumBits->opAnd(*segmentBits); aggs->columns[columnIndex].value = sumBits->population(stopBit); delete sumBits; @@ -294,6 +291,8 @@ bool OpenLoopProperty::run() if (groupsIter == groups.end()) { + addRootTotal(); + shuttle->reply( 0, result::CellQueryResult_s{ @@ -302,6 +301,7 @@ bool OpenLoopProperty::run() errors::Error{} } ); + suicide(); return false; } diff --git a/src/oloop_property.h b/src/oloop_property.h index 9983a36..6b1990f 100644 --- a/src/oloop_property.h +++ b/src/oloop_property.h @@ -74,6 +74,7 @@ namespace openset result::ResultSet* result; db::IndexBits all; + db::IndexBits rootCount; int64_t stopBit{ 0 }; int64_t instance{ 0 }; @@ -99,6 +100,7 @@ namespace openset void prepare() final; bool run() final; + void addRootTotal(); void partitionRemoved() final; }; diff --git a/src/ver.h b/src/ver.h index 5bf5737..7222120 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test7" +"0.4.5.test8" ; \ No newline at end of file From c89f714a0bda97b7548a4f38bb375e5aec45dcaf Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Fri, 6 Dec 2019 12:55:52 -0500 Subject: [PATCH 27/31] set up for 16 workers --- lib/mem/blhash.h | 2 +- src/customer_index.h | 4 ++++ src/http_serve.cpp | 2 +- src/main.cpp | 2 +- src/service.cpp | 2 +- src/ver.h | 2 +- 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/mem/blhash.h b/lib/mem/blhash.h index 66c676c..963b45d 100644 --- a/lib/mem/blhash.h +++ b/lib/mem/blhash.h @@ -367,7 +367,7 @@ class BinaryListHash serializeOver.set(&key); serializeList.clear(); - serializeList.reserve(distinct); + serializeList.reserve(limit); serializeLimit = limit; serializeCB = filterCallBack; diff --git a/src/customer_index.h b/src/customer_index.h index 3d3d61d..caa56d2 100644 --- a/src/customer_index.h +++ b/src/customer_index.h @@ -86,6 +86,10 @@ namespace openset int limit, const std::function& filterCallback) { + if (limit < 0) + limit = 10; + if (limit > 10000) + limit = 10000; if (const auto& iter = indexes.find(propIndex); iter != indexes.end()) return iter->second->serialize(descending, limit, filterCallback); return {}; diff --git a/src/http_serve.cpp b/src/http_serve.cpp index 57784e8..fa31616 100644 --- a/src/http_serve.cpp +++ b/src/http_serve.cpp @@ -146,7 +146,7 @@ namespace openset::web void HttpServe::makeWorkers() { - const auto workerCount = 8; // TODO make a switch std::thread::hardware_concurrency(); + const auto workerCount = 16; // TODO make a switch std::thread::hardware_concurrency(); workers.reserve(workerCount); threads.reserve(workerCount); diff --git a/src/main.cpp b/src/main.cpp index 560b3b2..04990ad 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -39,7 +39,7 @@ void StartOpenSet(openset::config::CommandlineArgs args) Logger::get().info("OpenSet v" + __version__); Logger::get().info("OpenSet, Copyright(c) 2015 - 2019, Seth Hamilton."); - const auto workerCount = 8;// TODO make this a switch std::thread::hardware_concurrency(); + const auto workerCount = 16;// TODO make this a switch std::thread::hardware_concurrency(); Logger::get().info(to_string(workerCount) + " processor cores available."); args.fix(); // fix the default startup arguments after WSAStartup (on windows) diff --git a/src/service.cpp b/src/service.cpp index 455c834..98a62d8 100644 --- a/src/service.cpp +++ b/src/service.cpp @@ -29,7 +29,7 @@ namespace openset #endif // generate our async workers, we are going to use one worker per core - openset::async::AsyncPool async(partitionTotal, 8 ); // TODO make this a switch std::thread::hardware_concurrency()); + openset::async::AsyncPool async(partitionTotal, 16 ); // TODO make this a switch std::thread::hardware_concurrency()); // DEBUG OpenSet::async::AsyncPool async(partitionTotal, 1); openset::mapping::Mapper mapper; diff --git a/src/ver.h b/src/ver.h index 7222120..6eb5eec 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test8" +"0.4.5.test9" ; \ No newline at end of file From 3f35056defda1796e5dc3b175698c21746b7e332 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Wed, 11 Dec 2019 17:50:39 -0500 Subject: [PATCH 28/31] flow control, averaging fix, bucket indexes --- src/attributes.cpp | 10 ++- src/attributes.h | 4 +- src/common.cpp | 5 ++ src/common.h | 1 + src/http_serve.cpp | 133 ++++++++++++++++++++++++++++----------- src/http_serve.h | 28 ++++++--- src/indexbits.cpp | 2 +- src/oloop_property.cpp | 17 ++++- src/oloop_property.h | 1 + src/properties.h | 1 + src/querycommon.h | 5 +- src/queryindexing.cpp | 36 +++++++---- src/queryindexing.h | 6 +- src/queryinterpreter.cpp | 28 +++++++-- src/queryinterpreter.h | 8 ++- src/queryparserosl.h | 9 ++- src/result.h | 13 ---- src/rpc_insert.cpp | 2 +- src/rpc_query.cpp | 30 ++++++--- src/rpc_table.cpp | 12 ++++ 20 files changed, 250 insertions(+), 101 deletions(-) diff --git a/src/attributes.cpp b/src/attributes.cpp index 1c49192..25f2658 100644 --- a/src/attributes.cpp +++ b/src/attributes.cpp @@ -23,8 +23,11 @@ Attributes::~Attributes() } } -IndexBits* Attributes::getBits(const int32_t propIndex, const int64_t value) +IndexBits* Attributes::getBits(const int32_t propIndex, int64_t value) { + // apply bucketing to double values + if (const auto propInfo = properties->getProperty(propIndex); propInfo && propInfo->type == PropertyTypes_e::doubleProp) + value = static_cast(value / propInfo->bucket) * propInfo->bucket; if (const auto bits = indexCache.get(propIndex, value); bits) return bits; @@ -72,8 +75,11 @@ void Attributes::addChange(const int64_t customerId, const int32_t propIndex, co changeIndex.emplace(key, std::vector{Attr_changes_s{linearId, state}}); } -Attr_s* Attributes::getMake(const int32_t propIndex, const int64_t value) +Attr_s* Attributes::getMake(const int32_t propIndex, int64_t value) { + if (const auto propInfo = properties->getProperty(propIndex); propInfo && propInfo->type == PropertyTypes_e::doubleProp) + value = static_cast(value / propInfo->bucket) * propInfo->bucket; + auto key = attr_key_s( propIndex, value ); if (const auto& res = propertyIndex.emplace(key, nullptr); res.second == true) diff --git a/src/attributes.h b/src/attributes.h index dce35e9..bbf3bf9 100644 --- a/src/attributes.h +++ b/src/attributes.h @@ -140,11 +140,11 @@ namespace openset::db explicit Attributes(const int partition, Table* table, AttributeBlob* attributeBlob, Properties* properties); ~Attributes(); - IndexBits* getBits(const int32_t propIndex, const int64_t value); + IndexBits* getBits(const int32_t propIndex, int64_t value); void addChange(const int64_t customerId, const int32_t propIndex, const int64_t value, const int32_t linearId, const bool state); - Attr_s* getMake(const int32_t propIndex, const int64_t value); + Attr_s* getMake(const int32_t propIndex, int64_t value); Attr_s* getMake(const int32_t propIndex, const string& value); Attr_s* get(const int32_t propIndex, const int64_t value) const; diff --git a/src/common.cpp b/src/common.cpp index e87cac5..564eed6 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -22,6 +22,11 @@ int64_t MakeHash(const char* buffer) return XXH64(buffer, strlen(buffer), HASH_SEED); } +int64_t MakeHash(const int64_t value) +{ + return XXH64(static_cast(&value), sizeof(int64_t), HASH_SEED); +} + int64_t MakeHash(const std::string& buffer) { return XXH64(buffer.c_str(), buffer.length(), HASH_SEED); diff --git a/src/common.h b/src/common.h index c46117a..c447481 100644 --- a/src/common.h +++ b/src/common.h @@ -31,6 +31,7 @@ int64_t Now(); int64_t MakeHash(const char* buffer, int64_t len); int64_t MakeHash(const char* buffer); +int64_t MakeHash(const int64_t value); int64_t MakeHash(const std::string& buffer); int64_t HashPair(const int64_t a, const int64_t b); diff --git a/src/http_serve.cpp b/src/http_serve.cpp index fa31616..96f2665 100644 --- a/src/http_serve.cpp +++ b/src/http_serve.cpp @@ -68,47 +68,95 @@ namespace openset::web std::shared_ptr message; - { // scope for lock + if (queryWorker) + { // wait on a job to appear, verify it's there, and run it. - unique_lock waiter(server->readyLock); - if (server->messagesQueued == 0) - server->messageReady.wait(waiter, - [&]() - { // oh yeah a lambda! - return static_cast(server->messagesQueued) != 0; - }); - - message = server->getQueuedMessage(); - if (!message) - continue; + { + unique_lock waiter(server->queryReadyLock); + if (server->queryMessagesQueued == 0 || server->runningQueries >= 3) + server->queryMessageReady.wait(waiter, + [&]() + { + return static_cast(server->queryMessagesQueued) != 0 && server->runningQueries < 3; + }); + + message = server->getQueuedQueryMessage(); + if (!message) + continue; + } + + ++server->jobsRun; + ++server->runningQueries; + openset::comms::Dispatch(message); + --server->runningQueries; + + } + else + { + { + // wait on a job to appear, verify it's there, and run it. + unique_lock waiter(server->otherReadyLock); + if (server->otherMessagesQueued == 0) + server->otherMessageReady.wait(waiter, + [&]() + { + return static_cast(server->otherMessagesQueued) != 0; + }); + + message = server->getQueuedOtherMessage(); + if (!message) + continue; + } + + ++server->jobsRun; + openset::comms::Dispatch(message); } // unlock out of scope - ++server->jobsRun; - - openset::comms::Dispatch(message); } } - void HttpServe::queueMessage(std::shared_ptr message) + void HttpServe::queueQueryMessage(std::shared_ptr message) + { + csLock lock(messagesLock); + ++queryMessagesQueued; + queryMessages.emplace(message); + queryMessageReady.notify_one(); + } + + void HttpServe::queueOtherMessage(std::shared_ptr message) + { + csLock lock(messagesLock); + ++otherMessagesQueued; + otherMessages.emplace(message); + otherMessageReady.notify_one(); + } + + std::shared_ptr HttpServe::getQueuedOtherMessage() { csLock lock(messagesLock); - ++messagesQueued; - messages.emplace(message); - messageReady.notify_one(); + + if (otherMessages.empty()) + return nullptr; + + --otherMessagesQueued; + + auto result = otherMessages.front(); + otherMessages.pop(); + return result; } - std::shared_ptr HttpServe::getQueuedMessage() + std::shared_ptr HttpServe::getQueuedQueryMessage() { csLock lock(messagesLock); - if (messages.empty()) + if (queryMessages.empty()) return nullptr; - --messagesQueued; + --queryMessagesQueued; - auto result = messages.front(); - messages.pop(); + auto result = queryMessages.front(); + queryMessages.pop(); return result; } @@ -120,19 +168,25 @@ namespace openset::web using SharedRequestT = std::shared_ptr; server.resource["^/v1/.*$"]["GET"] = [&](SharedResponseT response, SharedRequestT request) { - queueMessage(std::move(MakeMessage(response, request))); + if (request->path.find("/v1/query/") == 0 && request->query_string.find("fork=true") == -1) + queueQueryMessage(std::move(MakeMessage(response, request))); + else + queueQueryMessage(std::move(MakeMessage(response, request))); }; server.resource["^/v1/.*$"]["POST"] = [&](SharedResponseT response, SharedRequestT request) { - queueMessage(std::move(MakeMessage(response, request))); + if (request->path.find("/v1/query/") == 0 && request->query_string.find("fork=true") == -1) + queueQueryMessage(std::move(MakeMessage(response, request))); + else + queueOtherMessage(std::move(MakeMessage(response, request))); }; server.resource["^/v1/.*$"]["PUT"] = [&](SharedResponseT response, SharedRequestT request) { - queueMessage(std::move(MakeMessage(response, request))); + queueOtherMessage(std::move(MakeMessage(response, request))); }; server.resource["^/v1/.*$"]["DELETE"] = [&](SharedResponseT response, SharedRequestT request) { - queueMessage(std::move(MakeMessage(response, request))); + queueOtherMessage(std::move(MakeMessage(response, request))); }; server.resource["^/ping$"]["GET"] = [&](SharedResponseT response, SharedRequestT request) { @@ -146,22 +200,27 @@ namespace openset::web void HttpServe::makeWorkers() { - const auto workerCount = 16; // TODO make a switch std::thread::hardware_concurrency(); + otherWorkers.reserve(32); + queryWorkers.reserve(8); + threads.reserve(40); - workers.reserve(workerCount); - threads.reserve(workerCount); + for (auto i = 0; i < 32; i++) + { + otherWorkers.emplace_back(std::make_shared(this, i, false)); + threads.emplace_back(thread(&webWorker::runner, otherWorkers[i])); + } - for (auto i = 0; i < static_cast(workerCount); i++) + for (auto i = 0; i < 8; i++) { - workers.emplace_back(std::make_shared(this, i)); - threads.emplace_back(thread(&webWorker::runner, workers[i])); + queryWorkers.emplace_back(std::make_shared(this, i, true)); + threads.emplace_back(thread(&webWorker::runner, queryWorkers[i])); } - Logger::get().info(to_string(workerCount) + " HTTP REST workers created."); + Logger::get().info(" HTTP REST server created."); // detach these threads, let them do their thing in the background - for (auto i = 0; i < workerCount; i++) - threads[i].detach(); + for (auto& thread : threads) + thread.detach(); } void HttpServe::serve(const std::string& ip, const int port) diff --git a/src/http_serve.h b/src/http_serve.h index cd38eb5..effa6ca 100644 --- a/src/http_serve.h +++ b/src/http_serve.h @@ -174,10 +174,12 @@ namespace openset::web { HttpServe* server; int instance; + bool queryWorker; public: - webWorker(HttpServe* server, const int instance) : + webWorker(HttpServe* server, const int instance, bool queryWorker) : server(server), - instance(instance) + instance(instance), + queryWorker(queryWorker) {}; void runner(); }; @@ -185,22 +187,30 @@ namespace openset::web class HttpServe { public: - atomic messagesQueued{ 0 }; + atomic queryMessagesQueued{ 0 }; + atomic otherMessagesQueued{ 0 }; atomic jobsRun{ 0 }; + atomic runningQueries{ 0 }; CriticalSection messagesLock; - queue> messages; + queue> queryMessages; + queue> otherMessages; - mutex readyLock; - condition_variable messageReady; + mutex otherReadyLock; + mutex queryReadyLock; + condition_variable queryMessageReady; + condition_variable otherMessageReady; // worker pools - vector> workers; + vector> otherWorkers; + vector> queryWorkers; vector threads; HttpServe() = default; - void queueMessage(std::shared_ptr message); - std::shared_ptr getQueuedMessage(); + void queueQueryMessage(std::shared_ptr message); + void queueOtherMessage(std::shared_ptr message); + std::shared_ptr getQueuedOtherMessage(); + std::shared_ptr getQueuedQueryMessage(); template void mapEndpoints(T& server); diff --git a/src/indexbits.cpp b/src/indexbits.cpp index 82ead2b..c1b1fa3 100644 --- a/src/indexbits.cpp +++ b/src/indexbits.cpp @@ -255,7 +255,7 @@ void IndexBits::opOr(IndexBits& source) return; auto index = 0; - auto end = source.data.intCount(); + const auto end = source.data.intCount(); while (index < end) { diff --git a/src/oloop_property.cpp b/src/oloop_property.cpp index a9e1383..9ca09af 100644 --- a/src/oloop_property.cpp +++ b/src/oloop_property.cpp @@ -23,7 +23,8 @@ OpenLoopProperty::OpenLoopProperty( table(table), result(result), instance(instance) -{} +{ +} OpenLoopProperty::~OpenLoopProperty() { @@ -95,7 +96,7 @@ void OpenLoopProperty::prepare() return; } - addRootTotal(); + createRootNode(); // turn ints and doubles into their bucketed name const auto toBucket = [&](const int64_t value)->int64_t @@ -182,7 +183,7 @@ void OpenLoopProperty::prepare() groupsIter = groups.begin(); } -void OpenLoopProperty::addRootTotal() +void OpenLoopProperty::createRootNode() { rowKey.clear(); @@ -207,6 +208,16 @@ void OpenLoopProperty::addRootTotal() default: ; } + result->getMakeAccumulator(rowKey); +} + +void OpenLoopProperty::addRootTotal() +{ + rowKey.clear(); + + rowKey.key[0] = result->addLocalTextAndHash(config.propName); + rowKey.types[0] = ResultTypes_e::Text; + const auto aggs = result->getMakeAccumulator(rowKey); auto idx = 0; diff --git a/src/oloop_property.h b/src/oloop_property.h index 6b1990f..2db7687 100644 --- a/src/oloop_property.h +++ b/src/oloop_property.h @@ -99,6 +99,7 @@ namespace openset ~OpenLoopProperty() final; void prepare() final; + void createRootNode(); bool run() final; void addRootTotal(); void partitionRemoved() final; diff --git a/src/properties.h b/src/properties.h index 1fdc9d7..10b0d30 100644 --- a/src/properties.h +++ b/src/properties.h @@ -34,6 +34,7 @@ namespace openset bool isSet{ false }; bool isCustomerProperty{ false }; bool deleted{ false }; + int64_t bucket {1}; }; using PropsMap = robin_hood::unordered_map>; diff --git a/src/querycommon.h b/src/querycommon.h index b958080..75dcebc 100644 --- a/src/querycommon.h +++ b/src/querycommon.h @@ -647,7 +647,7 @@ namespace openset HintOp_s(const HintOp_e op, const double value) : op(op), value(value), - hash(static_cast(value * 1'000'000LL)) + hash(static_cast(value * 10'000LL)) {} HintOp_s(const HintOp_e op, const string& text) @@ -912,7 +912,6 @@ namespace openset std::string capturedIndex; std::string rawIndex; HintOpList index; - bool indexIsCountable { false }; string segmentName; SegmentList segments; MarshalSet marshalsReferenced; @@ -923,6 +922,7 @@ namespace openset int64_t sessionTime { 60'000LL * 30LL }; // 30 minutes std::string rawScript; + bool fastTally { false }; bool isSegment { false }; bool useProps { false }; // uses customer props bool writesProps { true }; // script can change props @@ -933,6 +933,7 @@ namespace openset bool useSessions { false }; // uses session functions, we can cache these bool useStampedRowIds { false }; // count using row stamp rather than row uniqueness bool onInsert { false }; + bool indexIsCountable { false }; int zIndex { 100 }; }; diff --git a/src/queryindexing.cpp b/src/queryindexing.cpp index 1dc845e..c910b66 100644 --- a/src/queryindexing.cpp +++ b/src/queryindexing.cpp @@ -33,7 +33,7 @@ void Indexing::mount(Table* tablePtr, Macro_s& queryMacros, int partitionNumber, } // returns an index by name -openset::db::IndexBits* Indexing::getIndex(std::string name, bool &countable) +openset::db::IndexBits* Indexing::getIndex(const std::string& name, bool &countable) { for (auto &idx:indexes) { @@ -54,7 +54,7 @@ and returns values that match the condition. In getBits we take the last item on the stack and apply all matching indexes to the bits in the stack entry. */ -openset::db::IndexBits Indexing::compositeBits(Attributes::listMode_e mode) +openset::db::IndexBits Indexing::compositeBits(Attributes::listMode_e mode, bool& countable) { auto& entry = stack.back(); @@ -62,7 +62,6 @@ openset::db::IndexBits Indexing::compositeBits(Attributes::listMode_e mode) const auto propInfo = table->getProperties()->getProperty(entry.columnName); // if the value side is NONE we go check for presence - auto negate = false; if (mode == Attributes::listMode_e::EQ && entry.hash == NONE) @@ -78,13 +77,26 @@ openset::db::IndexBits Indexing::compositeBits(Attributes::listMode_e mode) negate = true; // != VAL -- anything other than VAL } + if (propInfo->type == PropertyTypes_e::doubleProp) + { + // double types are not automatically countable due to bucketing + countable = false; + + entry.hash = static_cast(entry.hash / propInfo->bucket) * propInfo->bucket; + + if (mode == Attributes::listMode_e::GT) + mode = Attributes::listMode_e::GTE; + else if (mode == Attributes::listMode_e::LT) + mode = Attributes::listMode_e::LTE; + } + auto attrList = parts->attributes.getPropertyValues(propInfo->idx, mode, entry.hash); auto& resultBits = entry.bits; // where our bits will all accumulate resultBits.reset(); auto initialized = false; - for (auto attr: attrList) + for (const auto attr: attrList) { // get the bits const auto workBits = parts->attributes.getBits(attr.index, attr.value); @@ -141,9 +153,11 @@ OR | OR | OR | */ -IndexBits Indexing::buildIndex(HintOpList &index, bool countable) +IndexBits Indexing::buildIndex(HintOpList &index, bool& countable) { + countable = true; + struct IndexStack_s { IndexBits bits; @@ -171,27 +185,27 @@ IndexBits Indexing::buildIndex(HintOpList &index, bool countable) { case HintOp_e::UNSUPPORTED: break; case HintOp_e::EQ: - compositeBits(Attributes::listMode_e::EQ); + compositeBits(Attributes::listMode_e::EQ, countable); ++count; break; case HintOp_e::NEQ: - compositeBits(Attributes::listMode_e::NEQ); + compositeBits(Attributes::listMode_e::NEQ, countable); ++count; break; case HintOp_e::GT: - compositeBits(Attributes::listMode_e::GT); + compositeBits(Attributes::listMode_e::GT, countable); ++count; break; case HintOp_e::GTE: - compositeBits(Attributes::listMode_e::GTE); + compositeBits(Attributes::listMode_e::GTE, countable); ++count; break; case HintOp_e::LT: - compositeBits(Attributes::listMode_e::LT); + compositeBits(Attributes::listMode_e::LT, countable); ++count; break; case HintOp_e::LTE: - compositeBits(Attributes::listMode_e::LTE); + compositeBits(Attributes::listMode_e::LTE, countable); ++count; break; case HintOp_e::PUSH_VAL: diff --git a/src/queryindexing.h b/src/queryindexing.h index aa311b0..66cb27c 100644 --- a/src/queryindexing.h +++ b/src/queryindexing.h @@ -54,12 +54,12 @@ namespace openset int partitionNumber, int stopAtBit); - openset::db::IndexBits compositeBits(const db::Attributes::listMode_e mode); + openset::db::IndexBits compositeBits(const db::Attributes::listMode_e mode, bool& countable); - openset::db::IndexBits* getIndex(std::string name, bool &countable); + openset::db::IndexBits* getIndex(const std::string& name, bool &countable); private: - openset::db::IndexBits buildIndex(HintOpList &index, bool countable); + openset::db::IndexBits buildIndex(HintOpList &index, bool& countable); }; }; }; diff --git a/src/queryinterpreter.cpp b/src/queryinterpreter.cpp index 651ad2b..a4722fb 100644 --- a/src/queryinterpreter.cpp +++ b/src/queryinterpreter.cpp @@ -9,6 +9,8 @@ const int MAX_RECURSE_COUNT = 10; const int STACK_DEPTH = 64; +const int64_t StarHash = MakeHash("*"); + openset::query::Interpreter::Interpreter(Macro_s& macros, const InterpretMode_e interpretMode) : macros(macros), rowKey(), @@ -29,6 +31,15 @@ void openset::query::Interpreter::setResultObject(result::ResultSet* resultSet) { result = resultSet; result->addLocalText(NONE, "n/a"); + result->addLocalText(StarHash, "*"); + + if (macros.fastTally) + { + rowKey.clear(); + rowKey.key[0] = StarHash; + rowKey.types[0] = result::ResultTypes_e::Text; + fastTallyAccumulator = result->getMakeAccumulator(rowKey); + } } void openset::query::Interpreter::configure() @@ -170,7 +181,7 @@ void openset::query::Interpreter::extractMarshalParams(const int paramCount) void openset::query::Interpreter::tally(const int paramCount, const Col_s* columns, const int currentRow) { - if (paramCount <= 0) + if (paramCount <= 0 && !macros.fastTally) return; // this will ensure non-int types are represented as ints @@ -315,8 +326,6 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum } }; - rowKey.clear(); - // run lambdas result columns if (macros.vars.columnLambdas.size()) { @@ -356,6 +365,15 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum } } + if (macros.fastTally) + { + aggColumns(fastTallyAccumulator); + return; + } + + rowKey.clear(); + + if (macros.scriptMode == ScriptMode_e::customers) { auto depth = 0; @@ -375,7 +393,7 @@ void openset::query::Interpreter::tally(const int paramCount, const Col_s* colum { if (depth == paramCount || (item.typeOf() != cvar::valueType::STR && item == NONE)) break; - rowKey.key[depth] = fixToInt(item, rowKey.types[depth]); + rowKey.key[depth] = fixToInt(item, rowKey.types[depth]); aggColumns(result->getMakeAccumulator(rowKey)); ++depth; } @@ -2064,7 +2082,7 @@ void openset::query::Interpreter::opRunner(Instruction_s* inst, int64_t currentR ++stackPtr; break; case OpCode_e::PSHLITFLT: // push a floating point value - *stackPtr = cast(inst->value) / cast(1'000'000); + *stackPtr = cast(inst->value) / cast(10'000); ++stackPtr; break; case OpCode_e::PSHLITNUL: // push a null/none diff --git a/src/queryinterpreter.h b/src/queryinterpreter.h index 313d014..7e20794 100644 --- a/src/queryinterpreter.h +++ b/src/queryinterpreter.h @@ -23,6 +23,11 @@ namespace openset class AttributeBlob; class IndexBits; } + + namespace result + { + class Accumulator; + } } namespace openset @@ -153,6 +158,8 @@ namespace openset bool propsChanged{ false }; + result::Accumulator* fastTallyAccumulator { nullptr }; + // counters int loopCount{ 0 }; int recursion{ 0 }; @@ -185,7 +192,6 @@ namespace openset // regular function local vectors were impacting performance > 6% MarshalParams marshalParams = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - // distinct counting (with property as key) ValuesSeen eventDistinct; // distinct to group id ValuesSeenKey distinctKey; diff --git a/src/queryparserosl.h b/src/queryparserosl.h index f97cea3..2caffaf 100644 --- a/src/queryparserosl.h +++ b/src/queryparserosl.h @@ -293,6 +293,7 @@ namespace openset::query db::Properties* tableColumns { nullptr }; bool usesSessions { false }; + bool fastTally { false }; std::string rawScript; Blocks blocks; @@ -2974,12 +2975,13 @@ namespace openset::query { if (words.size() == 1) - throw QueryParse2Error_s { + fastTally = true; + /*throw QueryParse2Error_s { errors::errorClass_e::parse, errors::errorCode_e::syntax_error, "expecting at least one group after `<<`", lastDebug - }; + };*/ // the `<<` statement doesn't take brackets, so we are adding them before // we call parseParams @@ -3188,7 +3190,7 @@ namespace openset::query finCode.emplace_back( OpCode_e::PSHLITFLT, 0, - static_cast(midOp.value1 * 1'000'000.0), // float value + static_cast(midOp.value1 * 10'000.0), // float value 0, debug); break; @@ -3518,6 +3520,7 @@ namespace openset::query // lets us know if we are read-only inMacros.writesProps = writesProps; + inMacros.fastTally = fastTally; index = 0; for (auto& v : stringLiterals) diff --git a/src/result.h b/src/result.h index 2527c5b..c838c80 100644 --- a/src/result.h +++ b/src/result.h @@ -55,18 +55,10 @@ namespace openset key[1] = NONE; key[2] = NONE; key[3] = NONE; - //key[4] = NONE; - //key[5] = NONE; - //key[6] = NONE; - //key[7] = NONE; types[0] = ResultTypes_e::Int; types[1] = ResultTypes_e::Int; types[2] = ResultTypes_e::Int; types[3] = ResultTypes_e::Int; - //types[4] = ResultTypes_e::Int; - //types[5] = ResultTypes_e::Int; - //types[6] = ResultTypes_e::Int; - //types[7] = ResultTypes_e::Int; } void clearFrom(const int index) @@ -75,11 +67,6 @@ namespace openset *iter = NONE; } - void makeReady() - { - //hash = MakeHash(reinterpret_cast(key), keyDepth * sizeof(int64_t)); - } - size_t makeHash() const { return MakeHash(reinterpret_cast(key), keyDepth * sizeof(int64_t)); diff --git a/src/rpc_insert.cpp b/src/rpc_insert.cpp index 38620a0..f4fd01d 100644 --- a/src/rpc_insert.cpp +++ b/src/rpc_insert.cpp @@ -151,7 +151,7 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa else uuid = personNode->getInt(); - const auto destination = cast((std::abs(uuid) % 13337) % partitions->getPartitionMax()); + const auto destination = cast(MakeHash(uuid) % partitions->getPartitionMax()); int64_t len; auto logSize = SideLog::getSideLog().add(table.get(), destination, cjson::stringifyCstr(row, len)); diff --git a/src/rpc_query.cpp b/src/rpc_query.cpp index 8345eee..1e71ea0 100644 --- a/src/rpc_query.cpp +++ b/src/rpc_query.cpp @@ -6,6 +6,8 @@ #include "common.h" #include "cjson/cjson.h" #include "str/strtools.h" +#include "threads/spinlock.h" +#include "threads/locks.h" #include "sba/sba.h" #include "oloop_insert.h" #include "oloop_query.h" @@ -43,7 +45,9 @@ enum class queryFunction_e : int32_t status, query, count, -}; /* +}; + +/* * The magic FORK function. * * This will add a `is_fork: true` member to the request @@ -56,7 +60,7 @@ enum class queryFunction_e : int32_t * Note: a single node could have any number of partitions, these partitions * are merged into a single result by `is_fork` nodes before return the * result set. This greatly reduces the number of data sets that need to be held -* in memory and marged by the originator. +* in memory and merged by the originator. */ shared_ptr forkQuery( const Database::TablePtr& table, @@ -120,6 +124,7 @@ shared_ptr forkQuery( { const auto backOff = (retryCount * retryCount) * 20; ThreadSleep( backOff < 10000 ? backOff : 10000); + return forkQuery( table, message, @@ -166,6 +171,7 @@ shared_ptr forkQuery( // clean up all those resultSet* for (auto res : resultSets) delete res; + return nullptr; } result.routeError = true; @@ -190,15 +196,17 @@ shared_ptr forkQuery( // clean up all those resultSet* for (auto res : resultSets) delete res; + return nullptr; } } const auto gatherEndTime = Now(); + auto resultJson = make_shared(); if (scriptMode == openset::query::ScriptMode_e::customers) { - auto resultJson = make_shared(); + const auto toJsonStartTime = Now(); ResultMuxDemux::resultFlatColumnsToJson(resultColumnCount, setCount, resultSets, resultJson.get()); const auto toJsonEndTime = Now(); @@ -247,7 +255,6 @@ shared_ptr forkQuery( return resultJson; } - auto resultJson = make_shared(); ResultMuxDemux::resultSetToJson(resultColumnCount, setCount, resultSets, resultJson.get()); // free up the responses @@ -270,10 +277,12 @@ shared_ptr forkQuery( default: ; } - ResultMuxDemux::jsonResultTrim(resultJson.get(), trim); // local function to fill Meta data in result JSON + ResultMuxDemux::jsonResultTrim(resultJson.get(), trim); + + // local function to fill Meta data in result JSON const auto fillMeta = [](const openset::query::VarList& mapping, cjson* jsonArray) { - for (auto c : mapping) + for (auto& c : mapping) { auto tNode = jsonArray->pushObject(); if (c.modifier == openset::query::Modifiers_e::var) @@ -335,7 +344,9 @@ shared_ptr forkQuery( } } } - }; // add status nodes to JSON document + }; + + // add status nodes to JSON document //auto metaJson = resultJson->setObject("info"); //auto dataJson = metaJson->setObject("data"); @@ -348,6 +359,7 @@ shared_ptr forkQuery( //metaJson->set("serialize_time", serialTime); //metaJson->set("total_time", elapsed); Logger::get().info("RpcQuery on " + table->getName()); + return resultJson; } @@ -598,6 +610,7 @@ void RpcQuery::report(const openset::web::MessagePtr& message, const RpcMapping& // std::vector resultSets; resultSets.reserve(partitions->getWorkerCount()); + for (auto i = 0; i < partitions->getWorkerCount(); ++i) resultSets.push_back( new ResultSet( @@ -1853,6 +1866,7 @@ void RpcQuery::property(openset::web::MessagePtr& message, const RpcMapping& mat bufferLength); message->reply(http::StatusCode::success_ok, buffer, bufferLength); + PoolMem::getPool().freePtr(buffer); Logger::get().info("Fork query on " + table->getName()); @@ -2282,13 +2296,13 @@ void RpcQuery::histogram(openset::web::MessagePtr& message, const RpcMapping& ma bufferLength); message->reply(http::StatusCode::success_ok, buffer, bufferLength); + PoolMem::getPool().freePtr(buffer); Logger::get().info("Fork query on " + table->getName()); // clean up stray resultSets for (auto resultSet : resultSets) delete resultSet; - PoolMem::getPool().freePtr(buffer); // this will delete the shuttle, and clear up the CellQueryResult_s vector release_cb(); diff --git a/src/rpc_table.cpp b/src/rpc_table.cpp index 8047aaf..53060c1 100644 --- a/src/rpc_table.cpp +++ b/src/rpc_table.cpp @@ -193,6 +193,14 @@ void RpcTable::table_create(const openset::web::MessagePtr& message, const RpcMa } columns->setProperty(columnEnum, name, colType, isSet, isProp); + + const auto bucket = n->xPathInt("/bucket", 1); + if (colType == PropertyTypes_e::doubleProp) + { + const auto prop = columns->getProperty(name); + prop->bucket = bucket * 10000; + } + ++columnEnum; } @@ -416,6 +424,10 @@ void RpcTable::table_describe(const openset::web::MessagePtr& message, const Rpc columnRecord->set("is_set", true); if (c.isCustomerProperty) columnRecord->set("is_customer", true); + + if (c.type == PropertyTypes_e::doubleProp) + columnRecord->set("bucket", static_cast(c.bucket / 10000)); + } auto eventOrder = response.setArray("event_order"); From 543660c4ebc685bf80406445411734674cd2a272 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 12 Dec 2019 08:47:39 -0500 Subject: [PATCH 29/31] version bump --- src/ver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ver.h b/src/ver.h index 6eb5eec..8af2c06 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test9" +"0.4.5.test10" ; \ No newline at end of file From 1d593783cc227e86721c81926e303f34f9f07967 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 12 Dec 2019 19:42:29 -0500 Subject: [PATCH 30/31] fixed partition hashing bug, faster inserts, list limits --- lib/mem/blhash.h | 4 ++-- src/customer_index.h | 8 ++++---- src/http_serve.cpp | 5 +---- src/oloop_insert.cpp | 2 +- src/rpc_insert.cpp | 6 +++--- src/ver.h | 2 +- 6 files changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/mem/blhash.h b/lib/mem/blhash.h index 963b45d..f5700b7 100644 --- a/lib/mem/blhash.h +++ b/lib/mem/blhash.h @@ -397,7 +397,7 @@ class BinaryListHash serializeCB(serializeOver.getKeyPtr(), reinterpret_cast(&node->nodes[idx].next))) { serializeList.emplace_back(*serializeOver.getKeyPtr(), *reinterpret_cast(&node->nodes[idx].next)); - if (serializeList.size() == serializeLimit) + if (serializeList.size() >= serializeLimit) { serializeLimit = -1; return; @@ -426,7 +426,7 @@ class BinaryListHash serializeCB(serializeOver.getKeyPtr(), reinterpret_cast(&node->nodes[idx].next))) { serializeList.emplace_back(*serializeOver.getKeyPtr(), *reinterpret_cast(&node->nodes[idx].next)); - if (serializeList.size() == serializeLimit) + if (serializeList.size() >= serializeLimit) { serializeLimit = -1; return; diff --git a/src/customer_index.h b/src/customer_index.h index caa56d2..5bd934e 100644 --- a/src/customer_index.h +++ b/src/customer_index.h @@ -86,10 +86,10 @@ namespace openset int limit, const std::function& filterCallback) { - if (limit < 0) - limit = 10; - if (limit > 10000) - limit = 10000; + if (limit <= 0) + limit = 1; + if (limit > 1000) + limit = 1000; if (const auto& iter = indexes.find(propIndex); iter != indexes.end()) return iter->second->serialize(descending, limit, filterCallback); return {}; diff --git a/src/http_serve.cpp b/src/http_serve.cpp index 96f2665..f792273 100644 --- a/src/http_serve.cpp +++ b/src/http_serve.cpp @@ -200,11 +200,8 @@ namespace openset::web void HttpServe::makeWorkers() { - otherWorkers.reserve(32); - queryWorkers.reserve(8); - threads.reserve(40); - for (auto i = 0; i < 32; i++) + for (auto i = 0; i < 64; i++) { otherWorkers.emplace_back(std::make_shared(this, i, false)); threads.emplace_back(thread(&webWorker::runner, otherWorkers[i])); diff --git a/src/oloop_insert.cpp b/src/oloop_insert.cpp index 4dbdd2b..3693c7a 100644 --- a/src/oloop_insert.cpp +++ b/src/oloop_insert.cpp @@ -97,7 +97,7 @@ bool OpenLoopInsert::run() } int64_t readHandle = 0; - auto inserts = SideLog::getSideLog().read(table.get(), loop->partition, inBypass() ? 25 : 250, readHandle); + auto inserts = SideLog::getSideLog().read(table.get(), loop->partition, inBypass() ? 25 : 50, readHandle); if (inserts.empty()) { diff --git a/src/rpc_insert.cpp b/src/rpc_insert.cpp index f4fd01d..ab2e60e 100644 --- a/src/rpc_insert.cpp +++ b/src/rpc_insert.cpp @@ -151,7 +151,7 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa else uuid = personNode->getInt(); - const auto destination = cast(MakeHash(uuid) % partitions->getPartitionMax()); + const auto destination = cast(cast(MakeHash(uuid)) % partitions->getPartitionMax()); int64_t len; auto logSize = SideLog::getSideLog().add(table.get(), destination, cjson::stringifyCstr(row, len)); @@ -216,7 +216,7 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa } } - if (SideLog::getSideLog().getLogSize() < 25000) + if (SideLog::getSideLog().getLogSize() < 50000) { message->reply(http::StatusCode::success_ok, response); } @@ -224,7 +224,7 @@ void RpcInsert::insertRetry(const openset::web::MessagePtr& message, const RpcMa { thread work([=]() { - while (SideLog::getSideLog().getLogSize() > 25000) + while (SideLog::getSideLog().getLogSize() > 50000) ThreadSleep(5); message->reply(http::StatusCode::success_ok, response); diff --git a/src/ver.h b/src/ver.h index 8af2c06..10e8c96 100644 --- a/src/ver.h +++ b/src/ver.h @@ -3,5 +3,5 @@ // line 6 is version const std::string __version__ = -"0.4.5.test10" +"0.4.5.test11" ; \ No newline at end of file From 3c4921a56505c109ce68056f65ef10876b4d3835 Mon Sep 17 00:00:00 2001 From: SethHamilton Date: Thu, 12 Dec 2019 19:46:14 -0500 Subject: [PATCH 31/31] increase thread limits for 32 core setup --- src/main.cpp | 4 ++-- src/service.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 04990ad..4a794c3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -39,8 +39,8 @@ void StartOpenSet(openset::config::CommandlineArgs args) Logger::get().info("OpenSet v" + __version__); Logger::get().info("OpenSet, Copyright(c) 2015 - 2019, Seth Hamilton."); - const auto workerCount = 16;// TODO make this a switch std::thread::hardware_concurrency(); - Logger::get().info(to_string(workerCount) + " processor cores available."); + //const auto workerCount = 16;// TODO make this a switch std::thread::hardware_concurrency(); + //Logger::get().info(to_string(workerCount) + " processor cores available."); args.fix(); // fix the default startup arguments after WSAStartup (on windows) diff --git a/src/service.cpp b/src/service.cpp index 98a62d8..8079b8b 100644 --- a/src/service.cpp +++ b/src/service.cpp @@ -29,7 +29,7 @@ namespace openset #endif // generate our async workers, we are going to use one worker per core - openset::async::AsyncPool async(partitionTotal, 16 ); // TODO make this a switch std::thread::hardware_concurrency()); + openset::async::AsyncPool async(partitionTotal, 32 ); // TODO make this a switch std::thread::hardware_concurrency()); // DEBUG OpenSet::async::AsyncPool async(partitionTotal, 1); openset::mapping::Mapper mapper;