MongoDB MapReduce causes "Error: error during query" -
i've got collection documents using schema (some members redacted):
class="lang-js prettyprint-override">{ "_id" : objectid("539f41a95d1887b57ab78bea"), "answers" : { "ratings" : { "positivity" : [ 2, 3, 5 ], "activity" : [ 4, 4, 3 ], }, "media" : [ objectid("537ea185df872bb71e4df270"), objectid("537ea185df872bb71e4df275"), objectid("537ea185df872bb71e4df272") ] } in schema, first, second, , 3rd positivity ratings correspond first, second, , 3rd entries in media array, respectively. same true activity ratings. need calculate statistics positivity , activity ratings respect associated media objects across documents in collection. right now, i'm doing first entries next mapreduce setup:
var mapfunction = function() { var activity = { sum: this.answers.ratings.activity[0], min: this.answers.ratings.activity[0], max: this.answers.ratings.activity[0], count: 1, diff: 0 }; var positivity = { sum: this.answers.ratings.positivity[0], min: this.answers.ratings.positivity[0], max: this.answers.ratings.positivity[0], count: 1, diff: 0 }; emit(this.media[0].str, {'activity': activity, 'positivity': positivity}); } var reducefunction = function(key, values) { var activitya = values[0].activity; // cut down here (var = 1; < values.length; i++) { var activityb = values[i].activity; // merge 'b' 'a' // temp helpers var delta = activitya.sum/activitya.count - activityb.sum/activityb.count; // a.mean - b.mean var weight = (activitya.count * activityb.count)/(activitya.count + activityb.count); // reducing activitya.diff += activityb.diff + delta*delta*weight; activitya.sum += activityb.sum; activitya.count += activityb.count; activitya.min = math.min(activitya.min, activityb.min); activitya.max = math.max(activitya.max, activityb.max); } var positivitya = values[0].positivity; // cut down here (var = 1; < values.length; i++) { var positivityb = values[i].positivity; // merge 'b' 'a' // temp helpers var delta = positivitya.sum/positivitya.count - positivityb.sum/positivityb.count; // a.mean - b.mean var weight = (positivitya.count * positivityb.count)/(positivitya.count + positivityb.count); // reducing positivitya.diff += positivityb.diff + delta*delta*weight; positivitya.sum += positivityb.sum; positivitya.count += positivityb.count; positivitya.min = math.min(positivitya.min, positivityb.min); positivitya.max = math.max(positivitya.max, positivityb.max); } homecoming {'activity': activitya, 'positivity': positivitya}; } var finalizefunction = function(key, value) { value.activity.mean = value.activity.sum / value.activity.count; value.activity.population_variance = value.activity.diff / value.activity.count; value.activity.population_std = math.sqrt(value.activity.population_variance); value.activity.sample_variance = value.activity.diff / (value.activity.count - 1); value.activity.sample_std = math.sqrt(value.activity.sample_variance); value.positivity.mean = value.positivity.sum / value.positivity.count; value.positivity.population_variance = value.positivity.diff / value.positivity.count; value.positivity.population_std = math.sqrt(value.positivity.population_variance); value.positivity.sample_variance = value.positivity.diff / (value.positivity.count - 1); value.positivity.sample_std = math.sqrt(value.positivity.sample_variance); homecoming value; } var limitingquery = {'answers.ratings.activity':{$exists:true},'answers.ratings.positivity':{$exists:true}} db.trials.mapreduce(mapfunction, reducefunction, {query: limitingquery, finalize: finalizefunction, out: {replace: 'base_ratings', db: 'tmp'}}); using little number of documents, works expect. when run against entire collection, few unusual things happen. first, when run db.currentop(), next output:
{ "inprog" : [ { "opid" : 2337, "active" : true, "secs_running" : 2787, "microsecs_running" : numberlong("2787597940"), "op" : "query", "ns" : "eim.trials", "query" : { "$msg" : "query not recording (too large)" }, "plansummary" : "collscan", "client" : "109.201.154.152:59939", "desc" : "conn17", "threadid" : "0x7ef89b022700", "connectionid" : 17, "locks" : { "^" : "r", "^eim" : "r" }, "waitingforlock" : false, "msg" : "m/r: (1/3) emit phase m/r: (1/3) emit progress: 8300/1 830000%", "progress" : { "done" : 8300, "total" : 1 }, "numyields" : 1133, "lockstats" : { "timelockedmicros" : { "r" : numberlong("5075753298"), "w" : numberlong(2274) }, "timeacquiringmicros" : { "r" : numberlong(243155328), "w" : numberlong(131) } } }, { "opid" : 2480, "active" : true, "secs_running" : 2111, "microsecs_running" : numberlong(2111502538), "op" : "query", "ns" : "eim.trials", "query" : { "$msg" : "query not recording (too large)" }, "plansummary" : "collscan", "client" : "109.201.154.192:61609", "desc" : "conn23", "threadid" : "0x7ef89ac1e700", "connectionid" : 23, "locks" : { "^" : "r", "^eim" : "r" }, "waitingforlock" : false, "msg" : "m/r: (1/3) emit phase m/r: (1/3) emit progress: 7952/1 795200%", "progress" : { "done" : 7952, "total" : 1 }, "numyields" : 819, "lockstats" : { "timelockedmicros" : { "r" : numberlong("3399905661"), "w" : numberlong(73184) }, "timeacquiringmicros" : { "r" : numberlong(406543723), "w" : numberlong(145) } } } ] } what's emit progress beingness greater 100%? know additional documents aren't beingness added collection while running. and, inprog.progress.doneis 8300 while inprog.progress.total 1. huh?
what's worse, operation bails error:
class="lang-js prettyprint-override">error: error doing query: failed src/mongo/shell/query.js:78 db.currentop() continues homecoming results similar above, after error has been reported. log shows no errors @ (only lines indicating command still running):
2014-06-19t13:24:15.378-0400 [conn23] m/r: (1/3) emit progress: 8400 any ideas happening here? running mongodb 2.6.2 on ubuntu 13.10.
not reply (apparently don't have plenty repution comment), had similar error , raised issue on @ mongodb: https://jira.mongodb.org/browse/server-15334 if find web page while googling, did, follow link (hopefully!) more details come (mongodb quite @ coming thoughts on sort of thing pretty quickly)
mongodb mapreduce
No comments:
Post a Comment