Сделал инструмент статистического анализа данных версии Plus для Nuggets.

задняя часть
Сделал инструмент статистического анализа данных версии Plus для Nuggets.

Эта статья участвовала в приказе о созыве Haowen, нажмите, чтобы просмотреть:Двойные заявки на внутреннюю и внешнюю стороны, призовой фонд в 20 000 юаней ждет вас, чтобы бросить вызов!

В этой статье говорится только об анализе, см. источник данныхСделал инструмент статистического анализа для Nuggets.

предисловие

Добавьте следующие функции к оригиналу

  1. Сегодняшний список обновлений
  2. Список обновлений за последние 3 дня 7 дней 30 дней
  3. Просмотрите Top10 за последние 3 дня, 7 дней и 30 дней
  4. Топ 10 лайков за последние 3 дня, 7 дней и 30 дней
  5. Лучшие авторы, ежедневная линейная диаграмма данных

увидеть эффект

  1. Сегодняшний список обновлений

image.png

  1. Список обновлений за последние 3 дня 7 дней 30 дней

image.png

  1. Просмотрите Top10 за последние 3 дня, 7 дней и 30 дней

image.png

  1. Топ 10 лайков за последние 3 дня, 7 дней и 30 дней

image.png

  1. Лучшие авторы, ежедневная линейная диаграмма данных

  Не решается вставлять html, чтобы увидеть фактический эффект ->я указываю image.png

анализировать данные

В примере кода по-прежнему используется scala

Основной код прокомментирован, если есть вопросы, пишите комментарии и задавайте вопросы

Если хотите данные, можете спросить у меня в комментариях, а можете собрать сами(Сделал инструмент статистического анализа для Nuggets.)


import cn.hutool.core.io.IoUtil
import cn.hutool.core.lang.TypeReference
import cn.hutool.json.JSONUtil
import com.yeting.juejin.JueLI.Author

import java.io.{FileInputStream, FileOutputStream}
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.time.{LocalDate, LocalDateTime}
import java.time.format.DateTimeFormatter
import java.util
import scala.collection.JavaConverters._
import scala.collection.{immutable, mutable}
import scala.math.Ordering

object J {

    val dateFormat = DateTimeFormatter.ofPattern("yyyyMMddHHmm")
    val yyyyMMdd = DateTimeFormatter.ofPattern("yyyyMMdd")
    val dateFormatOut = DateTimeFormatter.ofPattern("MM-dd HH:mm")

    val map: mutable.Map[String, List[Author]] = mutable.ListMap()

    def main(args: Array[String]): Unit = {
        //加载数据
        load()
        //top榜单
        top
        //图表
        userReport
    }

    private def userReport = {
        //按照每天分组
        val dayGroup = map.toList
            .map(t => {
                (LocalDateTime.parse(t._1, dateFormat).format(yyyyMMdd), t._2)
            })
            .groupBy(_._1)

        //取到表格下面的日期
        val xAxisdata = dayGroup.keys.toList.sortBy(t => t.toInt).map(t => s"'${t}'").mkString(",")

        //这里是 用户角度 转换成 每一天多少赞
        val userGroup = dayGroup.flatMap(t => {
            val authors: immutable.Iterable[(String, String, String, Int)] = t._2
                .flatMap(t => {
                    t._2
                })
                .groupBy(_.getUser_id)
                .map(m => {
                    val authorList: List[Author] = m._2.sortBy(_.getTime)
                    (m._1, t._1, authorList.head.getUser_name, authorList.last.getGot_digg_count.toInt - authorList.head.getGot_digg_count.toInt)
                })
            authors
        })
            .groupBy(_._1)
            //这里必须过滤一些,不然人太多了,直接爆炸
            .filter(
                m => {
                    m._2.map(t => {
                        t._4
                    }).sum > 50
                })
            .values
            //这里排序,方便表格好找
            .toList.sortBy(_.map(t => t._4).sum)(Ordering.Int.reverse)

        //取到表格展示的所有用户
        val legendData = userGroup.map(t => s"'${t.head._3}'").mkString(",")

        //组装表格每行数据
        val series = userGroup
            .map(t => {
                s"""
                   |{
                   |    name: '${t.head._3}',
                   |    type: 'line',
                   |    data: [${t.toList.sortBy(_._2).map(_._4).mkString(",")}]
                   |}
                   |""".stripMargin
            }).mkString(",")

        //组装html
        val html =
            s"""
               |<!DOCTYPE html>
               |<html style="height: 100%">
               |	<head>
               |		<meta charset="utf-8">
               |	</head>
               |	<body style="height: 100%; margin: 0">
               |		<div id="container" style="height: 100%"></div>
               |		<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
               |		<script type="text/javascript">
               |			var dom = document.getElementById("container");
               |			var myChart = echarts.init(dom);
               |			var app = {};
               |			var option;
               |			option = {
               |				title: {
               |					text: ''
               |				},
               |				tooltip: {
               |					trigger: 'axis'
               |				},
               |				legend: {
               |					data: [${legendData}]
               |				},
               |				grid: {
               |					left: '3%',
               |					right: '4%',
               |					bottom: '3%',
               |					containLabel: true
               |				},
               |				toolbox: {
               |					feature: {
               |						saveAsImage: {}
               |					}
               |				},
               |				xAxis: {
               |					type: 'category',
               |					boundaryGap: false,
               |					data: [${xAxisdata}]
               |				},
               |				yAxis: {
               |					type: 'value'
               |				},
               |				series: [
               |                 ${series}
               |				]
               |			};
               |			if (option && typeof option === 'object') {
               |				myChart.setOption(option);
               |			}
               |		</script>
               |	</body>
               |</html>
               |
               |""".stripMargin
        //存起来
        IoUtil.writeUtf8(new FileOutputStream("./111.html"), true, html)
    }

    private def top = {
        val res = map
            .values
            .flatten
            .groupBy(_.getUser_id)
            .map(m => {
                (m._1, m._2.toList.sortBy(_.getTime))
            })
            .map(m => {
                val allAuthorList: List[Author] = m._2

                val now = LocalDate.now()

                val day1List = allAuthorList.map(a => (LocalDateTime.parse(a.getTime, dateFormat).format(yyyyMMdd), a))
                    .groupBy(_._1)
                    .map(t => (t._1, t._2.map(_._2)))
                    .toList
                    .sortBy(_._1.toInt)

                //这里计算包含当天,不是今天计算昨天的
                val day_30 = day1List.filter(
                    ta =>
                        ta._1.toInt > now.minusDays(30).format(yyyyMMdd).toInt
                            && ta._1.toInt <= now.format(yyyyMMdd).toInt
                ).flatMap(_._2)
                val day_7 = day1List.filter(
                    ta =>
                        ta._1.toInt > now.minusDays(7).format(yyyyMMdd).toInt
                            && ta._1.toInt <= now.format(yyyyMMdd).toInt
                ).flatMap(_._2)
                val day_3 = day1List.filter(
                    ta =>
                        ta._1.toInt > now.minusDays(3).format(yyyyMMdd).toInt
                            && ta._1.toInt <= now.format(yyyyMMdd).toInt
                ).flatMap(_._2)
                val day_1 = day1List.filter(
                    ta => ta._1.toInt == now.format(yyyyMMdd).toInt
                ).flatMap(_._2)

                def report(authorList: List[Author]): (Int, Int, Int, String, Int, String, Boolean, String) = {
                    if (authorList.isEmpty) {
                        return (0, 0, 0, "", 0, "", false, "")
                    }

                    //总数
                    val day_got_digg_count = authorList.last.getGot_digg_count.toInt - authorList.head.getGot_digg_count.toInt
                    val day_got_view_count = authorList.last.getGot_view_count.toInt - authorList.head.getGot_view_count.toInt

                    //单时间段最高
                    var max_got_digg_count = 0;
                    var max_got_digg_count_time = ""
                    var max_got_view_count = 0
                    var max_got_view_count_time = ""
                    val authorListSliding = authorList.sliding(2, 2)
                    authorListSliding.foreach(l => {
                        val head = l.head
                        val last = l.last

                        val digg = last.getGot_digg_count.toInt - head.getGot_digg_count.toInt
                        if (digg > max_got_digg_count) {
                            max_got_digg_count = digg
                            max_got_digg_count_time = s"${getOutTime(head.getTime)} - ${getOutTime(last.getTime)}"
                        }

                        val view = last.getGot_view_count.toInt - head.getGot_view_count.toInt
                        if (view > max_got_view_count) {
                            max_got_view_count = view
                            max_got_view_count_time = s"${getOutTime(head.getTime)} - ${getOutTime(last.getTime)}"
                        }
                    })

                    //有无升级
                    val authors = authorList.sortBy(_.getLevel)
                    var level = false
                    var levelDesc = "无升级"
                    val headLevel = authors.head.getLevel.toInt
                    val lastLevel = authors.last.getLevel.toInt
                    if ((lastLevel - headLevel) != 0) {
                        level = true
                        levelDesc = s"${headLevel} 升到 ${lastLevel}"
                    }
                    (day_got_digg_count, day_got_view_count, max_got_digg_count, max_got_digg_count_time, max_got_view_count, max_got_view_count_time, level, levelDesc)
                }

                val (day_30_total_got_digg_count, day_30_total_got_view_count, day_30_max_got_digg_count, day_30_max_got_digg_count_time, day_30_max_got_view_count, day_30_max_got_view_count_time, day_30_level, day_30_levelDesc) = report(day_30)
                val (day_7_total_got_digg_count, day_7_total_got_view_count, day_7_max_got_digg_count, day_7_max_got_digg_count_time, day_7_max_got_view_count, day_7_max_got_view_count_time, day_7_level, day_7_levelDesc) = report(day_7)
                val (day_3_total_got_digg_count, day_3_total_got_view_count, day_3_max_got_digg_count, day_3_max_got_digg_count_time, day_3_max_got_view_count, day_3_max_got_view_count_time, day_3_level, day_3_levelDesc) = report(day_3)
                val (day_1_total_got_digg_count, day_1_total_got_view_count, day_1_max_got_digg_count, day_1_max_got_digg_count_time, day_1_max_got_view_count, day_1_max_got_view_count_time, day_1_level, day_1_levelDesc) = report(day_1)

                val head = allAuthorList.head
                (m._1, Map(
                    "user_name" -> head.getUser_name,
                    "user_id" -> head.getUser_id,
                    "day_30_total_got_digg_count" -> day_30_total_got_digg_count,
                    "day_30_total_got_view_count" -> day_30_total_got_view_count,
                    "day_30_max_got_digg_count" -> day_30_max_got_digg_count,
                    "day_30_max_got_digg_count_time" -> day_30_max_got_digg_count_time,
                    "day_30_max_got_view_count" -> day_30_max_got_view_count,
                    "day_30_max_got_view_count_time" -> day_30_max_got_view_count_time,
                    "day_30_level" -> day_30_level,
                    "day_30_levelDesc" -> day_30_levelDesc,

                    "day_7_total_got_digg_count" -> day_7_total_got_digg_count,
                    "day_7_total_got_view_count" -> day_7_total_got_view_count,
                    "day_7_max_got_digg_count" -> day_7_max_got_digg_count,
                    "day_7_max_got_digg_count_time" -> day_7_max_got_digg_count_time,
                    "day_7_max_got_view_count" -> day_7_max_got_view_count,
                    "day_7_max_got_view_count_time" -> day_7_max_got_view_count_time,
                    "day_7_level" -> day_7_level,
                    "day_7_levelDesc" -> day_7_levelDesc,

                    "day_3_total_got_digg_count" -> day_3_total_got_digg_count,
                    "day_3_total_got_view_count" -> day_3_total_got_view_count,
                    "day_3_max_got_digg_count" -> day_3_max_got_digg_count,
                    "day_3_max_got_digg_count_time" -> day_3_max_got_digg_count_time,
                    "day_3_max_got_view_count" -> day_3_max_got_view_count,
                    "day_3_max_got_view_count_time" -> day_3_max_got_view_count_time,
                    "day_3_level" -> day_3_level,
                    "day_3_levelDesc" -> day_3_levelDesc,

                    "day_1_total_got_digg_count" -> day_1_total_got_digg_count,
                    "day_1_total_got_view_count" -> day_1_total_got_view_count,
                    "day_1_max_got_digg_count" -> day_1_max_got_digg_count,
                    "day_1_max_got_digg_count_time" -> day_1_max_got_digg_count_time,
                    "day_1_max_got_view_count" -> day_1_max_got_view_count,
                    "day_1_max_got_view_count_time" -> day_1_max_got_view_count_time,
                    "day_1_level" -> day_1_level,
                    "day_1_levelDesc" -> day_1_levelDesc,

                ))
            })
        val list = res.values.toList

        println("\n-----------------今日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_1_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_total_got_digg_count"))
        })
        println("\n-----------------近3日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_3_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_total_got_digg_count"))
        })
        println("\n-----------------近7日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_7_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_7_total_got_digg_count"))
        })
        println("\n-----------------近30日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_30_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_30_total_got_digg_count"))
        })

        println("\n-----------------今日浏览Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总浏览")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_1_total_got_view_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_total_got_view_count"))
        })
        println("\n-----------------近3日浏览Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总浏览")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_3_total_got_view_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_total_got_view_count"))
        })

        println("\n-----------------今日单时间段获赞Top10------------------")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "用户", "时间段", "获赞")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "-" * 12, "-" * 25, "-" * 5)
        list.sortBy(value => value("day_1_max_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-25s\t|%-5s|\n", value("user_name"), value("day_1_max_got_digg_count_time"), value("day_1_max_got_digg_count"))
        })

        println("\n-----------------今日单时间段浏览Top10------------------")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "用户", "时间段", "获浏览")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "-" * 12, "-" * 25, "-" * 5)
        list.sortBy(value => value("day_3_max_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-25s\t|%-5s|\n", value("user_name"), value("day_3_max_got_digg_count_time"), value("day_3_max_got_digg_count"))
        })

        println("\n-----------------今日升级名单------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "等级")
        printf("|%-12s\t|%-10s|\n", "-" * 12, "-" * 10)
        list.filter(value => value("day_1_level").asInstanceOf[Boolean]).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_levelDesc"))
        })
        println("\n-----------------近3日升级名单------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "等级")
        printf("|%-12s\t|%-10s|\n", "-" * 12, "-" * 10)
        list.filter(value => value("day_3_level").asInstanceOf[Boolean]).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_levelDesc"))
        })
    }

    def load(): Unit = {
        List(
            "./j-20210701.json",
            "./j-20210702.json",
            "./j-20210703.json",
        ).foreach(path => {
            val lineList = new util.ArrayList[String]()
            IoUtil.readLines(new FileInputStream(path), StandardCharsets.UTF_8, lineList)
            lineList.forEach(line => {
                val type1: Type = new TypeReference[util.Map[String, util.List[Author]]] {}.getType
                val bean: util.Map[String, util.List[Author]] = JSONUtil.toBean(line, type1, true)
                bean.asScala.foreach(entry => map.put(entry._1, entry._2.asScala.toList))
            })
        })
    }

    def getOutTime(time: String): String = {
        LocalDateTime.parse(time, dateFormat).format(dateFormatOut)
    }

}