2 failures
This commit is contained in:
parent
0a94b65038
commit
f45198723a
@ -90,7 +90,13 @@ class StackOverflow extends Serializable {
|
||||
|
||||
/** Group the questions and answers together */
|
||||
def groupedPostings(postings: RDD[Posting]): RDD[(QID, Iterable[(Question, Answer)])] = {
|
||||
???
|
||||
val questions : RDD[Question] = postings.filter(_.postingType == 1)
|
||||
val answers : RDD[Answer] = postings.filter(_.postingType == 2)
|
||||
val qidQuestionPair : RDD[(QID, Question)] = questions.map(q => (q.id, q))
|
||||
val qidAnswerPair : RDD[(QID, Answer)] = answers.map(a => (a.parentId.get, a))
|
||||
|
||||
val qidQA : RDD[(QID, (Question, Answer))] = qidQuestionPair.join(qidAnswerPair)
|
||||
qidQA.groupByKey
|
||||
}
|
||||
|
||||
|
||||
@ -109,7 +115,11 @@ class StackOverflow extends Serializable {
|
||||
highScore
|
||||
}
|
||||
|
||||
???
|
||||
grouped.map{case (_, iter) =>
|
||||
val question = iter.head._1;
|
||||
val answers = iter map { case (q, a) => a }
|
||||
(question, answerHighScore(answers.toArray))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -129,7 +139,7 @@ class StackOverflow extends Serializable {
|
||||
}
|
||||
}
|
||||
|
||||
???
|
||||
scored.map{ case (q, hs) => (firstLangInTag(q.tags, langs).get * langSpread, hs)}.persist()
|
||||
}
|
||||
|
||||
|
||||
@ -287,10 +297,21 @@ class StackOverflow extends Serializable {
|
||||
val closestGrouped = closest.groupByKey()
|
||||
|
||||
val median = closestGrouped.mapValues { vs =>
|
||||
val langLabel: String = ??? // most common language in the cluster
|
||||
val langPercent: Double = ??? // percent of the questions in the most common language
|
||||
val clusterSize: Int = ???
|
||||
val medianScore: Int = ???
|
||||
val dominantLangIndex = vs.groupBy(_._1).maxBy(_._2.size)._1 / langSpread
|
||||
val langLabel: String = langs(dominantLangIndex)// most common language in the cluster
|
||||
|
||||
val nbOfDominant = vs.count(_._1 / langSpread == dominantLangIndex)
|
||||
val langPercent: Double = 100.0 * (nbOfDominant.toDouble / vs.size) // percent of the questions in the most common language
|
||||
val clusterSize: Int = vs.size
|
||||
val medianScore: Int = {
|
||||
val highscores = vs.map{ case (li, hs) => hs }
|
||||
val sortedHighscores = highscores.toArray.sorted;
|
||||
val mid = highscores.size / 2;
|
||||
highscores.size % 2 match{
|
||||
case 1 => sortedHighscores(mid)
|
||||
case 0 => (sortedHighscores(mid-1) + sortedHighscores(mid)) / 2
|
||||
}
|
||||
}
|
||||
|
||||
(langLabel, langPercent, clusterSize, medianScore)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user