2 failures

This commit is contained in:
Charles Beauvile 2020-05-21 12:23:44 +02:00
parent 0a94b65038
commit f45198723a

View File

@ -90,7 +90,13 @@ class StackOverflow extends Serializable {
/** Group the questions and answers together */ /** Group the questions and answers together */
def groupedPostings(postings: RDD[Posting]): RDD[(QID, Iterable[(Question, Answer)])] = { def groupedPostings(postings: RDD[Posting]): RDD[(QID, Iterable[(Question, Answer)])] = {
??? val questions : RDD[Question] = postings.filter(_.postingType == 1)
val answers : RDD[Answer] = postings.filter(_.postingType == 2)
val qidQuestionPair : RDD[(QID, Question)] = questions.map(q => (q.id, q))
val qidAnswerPair : RDD[(QID, Answer)] = answers.map(a => (a.parentId.get, a))
val qidQA : RDD[(QID, (Question, Answer))] = qidQuestionPair.join(qidAnswerPair)
qidQA.groupByKey
} }
@ -109,7 +115,11 @@ class StackOverflow extends Serializable {
highScore highScore
} }
??? grouped.map{case (_, iter) =>
val question = iter.head._1;
val answers = iter map { case (q, a) => a }
(question, answerHighScore(answers.toArray))
}
} }
@ -129,7 +139,7 @@ class StackOverflow extends Serializable {
} }
} }
??? scored.map{ case (q, hs) => (firstLangInTag(q.tags, langs).get * langSpread, hs)}.persist()
} }
@ -287,10 +297,21 @@ class StackOverflow extends Serializable {
val closestGrouped = closest.groupByKey() val closestGrouped = closest.groupByKey()
val median = closestGrouped.mapValues { vs => val median = closestGrouped.mapValues { vs =>
val langLabel: String = ??? // most common language in the cluster val dominantLangIndex = vs.groupBy(_._1).maxBy(_._2.size)._1 / langSpread
val langPercent: Double = ??? // percent of the questions in the most common language val langLabel: String = langs(dominantLangIndex)// most common language in the cluster
val clusterSize: Int = ???
val medianScore: Int = ??? val nbOfDominant = vs.count(_._1 / langSpread == dominantLangIndex)
val langPercent: Double = 100.0 * (nbOfDominant.toDouble / vs.size) // percent of the questions in the most common language
val clusterSize: Int = vs.size
val medianScore: Int = {
val highscores = vs.map{ case (li, hs) => hs }
val sortedHighscores = highscores.toArray.sorted;
val mid = highscores.size / 2;
highscores.size % 2 match{
case 1 => sortedHighscores(mid)
case 0 => (sortedHighscores(mid-1) + sortedHighscores(mid)) / 2
}
}
(langLabel, langPercent, clusterSize, medianScore) (langLabel, langPercent, clusterSize, medianScore)
} }