2 failures
This commit is contained in:
parent
0a94b65038
commit
f45198723a
@ -90,7 +90,13 @@ class StackOverflow extends Serializable {
|
|||||||
|
|
||||||
/** Group the questions and answers together */
|
/** Group the questions and answers together */
|
||||||
def groupedPostings(postings: RDD[Posting]): RDD[(QID, Iterable[(Question, Answer)])] = {
|
def groupedPostings(postings: RDD[Posting]): RDD[(QID, Iterable[(Question, Answer)])] = {
|
||||||
???
|
val questions : RDD[Question] = postings.filter(_.postingType == 1)
|
||||||
|
val answers : RDD[Answer] = postings.filter(_.postingType == 2)
|
||||||
|
val qidQuestionPair : RDD[(QID, Question)] = questions.map(q => (q.id, q))
|
||||||
|
val qidAnswerPair : RDD[(QID, Answer)] = answers.map(a => (a.parentId.get, a))
|
||||||
|
|
||||||
|
val qidQA : RDD[(QID, (Question, Answer))] = qidQuestionPair.join(qidAnswerPair)
|
||||||
|
qidQA.groupByKey
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -109,7 +115,11 @@ class StackOverflow extends Serializable {
|
|||||||
highScore
|
highScore
|
||||||
}
|
}
|
||||||
|
|
||||||
???
|
grouped.map{case (_, iter) =>
|
||||||
|
val question = iter.head._1;
|
||||||
|
val answers = iter map { case (q, a) => a }
|
||||||
|
(question, answerHighScore(answers.toArray))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -129,7 +139,7 @@ class StackOverflow extends Serializable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
???
|
scored.map{ case (q, hs) => (firstLangInTag(q.tags, langs).get * langSpread, hs)}.persist()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -287,10 +297,21 @@ class StackOverflow extends Serializable {
|
|||||||
val closestGrouped = closest.groupByKey()
|
val closestGrouped = closest.groupByKey()
|
||||||
|
|
||||||
val median = closestGrouped.mapValues { vs =>
|
val median = closestGrouped.mapValues { vs =>
|
||||||
val langLabel: String = ??? // most common language in the cluster
|
val dominantLangIndex = vs.groupBy(_._1).maxBy(_._2.size)._1 / langSpread
|
||||||
val langPercent: Double = ??? // percent of the questions in the most common language
|
val langLabel: String = langs(dominantLangIndex)// most common language in the cluster
|
||||||
val clusterSize: Int = ???
|
|
||||||
val medianScore: Int = ???
|
val nbOfDominant = vs.count(_._1 / langSpread == dominantLangIndex)
|
||||||
|
val langPercent: Double = 100.0 * (nbOfDominant.toDouble / vs.size) // percent of the questions in the most common language
|
||||||
|
val clusterSize: Int = vs.size
|
||||||
|
val medianScore: Int = {
|
||||||
|
val highscores = vs.map{ case (li, hs) => hs }
|
||||||
|
val sortedHighscores = highscores.toArray.sorted;
|
||||||
|
val mid = highscores.size / 2;
|
||||||
|
highscores.size % 2 match{
|
||||||
|
case 1 => sortedHighscores(mid)
|
||||||
|
case 0 => (sortedHighscores(mid-1) + sortedHighscores(mid)) / 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
(langLabel, langPercent, clusterSize, medianScore)
|
(langLabel, langPercent, clusterSize, medianScore)
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user