这篇文章主要介绍“如何生成Java数据脚本”,在日常操作中,相信很多人在如何生成Java数据脚本问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”如何生成Java数据脚本”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
/** * 向文件中生产数据 */ object ProducePvAndUvData { //ip val IP = 223 //地址 val ADDRESS = Array("北京", "天津", "上海", "重庆", "河北", "辽宁","山西", "吉林", "江苏", "浙江", "黑龙江", "安徽", "福建", "江西", "山东", "河南", "湖北", "湖南", "广东", "海南", "四川", "贵州", "云南", "山西", "甘肃", "青海", "台湾", "内蒙", "广西", "西藏", "宁夏", "新疆", "香港", "澳门") //日期 val DATE = new SimpleDateFormat("yyyy-MM-dd").format(new Date()) //timestamp val TIMESTAMP = 0L //userid val USERID = 0L //网站 val WEBSITE = Array("www.baidu.com", "www.taobao.com", "www.dangdang.com", "www.jd.com", "www.suning.com", "www.mi.com", "www.gome.com.cn") //行为 val ACTION = Array("Regist", "Comment", "View", "Login", "Buy", "Click", "Logout") def main(args: Array[String]): Unit = { val pathFileName = "G://idea//scala//spark02/data" //创建文件 val createFile = CreateFile(pathFileName) //向文件中写入数据 需要的对象 val file = new File(pathFileName) val fos = new FileOutputStream(file, true) val osw = new OutputStreamWriter(fos, "UTF-8") val pw = new PrintWriter(osw) if (createFile) { var i = 0 //产生5万+数据 while (i < 50000){ //模拟一个ip val random = new Random() val ip = random.nextInt(IP) + "." + random.nextInt(IP) + "." + random.nextInt(IP) + "." + random.nextInt(IP) //模拟地址 val address = ADDRESS(random.nextInt(34)) //模拟日期 val date = DATE //模拟userid val userid = Math.abs(random.nextLong) /** * 这里的while模拟是同一个用户不同时间点对不同网站的操作 */ var j = 0 var timestamp = 0L var webSite = "未知网站" var action = "未知行为" val flag = random.nextInt(5) | 1 while (j < flag) { // Threads.sleep(5); //模拟timestamp timestamp = new Date().getTime() //模拟网站 webSite = WEBSITE(random.nextInt(7)) //模拟行为 action = ACTION(random.nextInt(6)) j += 1 /** * 拼装 */ val content = ip + "\t" + address + "\t" + date + "\t" + timestamp + "\t" + userid + "\t" + webSite + "\t" + action System.out.println(content) //向文件中写入数据 pw.write(content + "\n") } i += 1 } //注意关闭的先后顺序,先打开的后关闭,后打开的先关闭 pw.close() osw.close() fos.close() } } /** * 创建文件 */ def CreateFile(pathFileName: String): Boolean = { val file = new File(pathFileName) if (file.exists) file.delete val createNewFile = file.createNewFile() System.out.println("create file " + pathFileName + " success!") createNewFile } }
统计每个网站的PU、VU、每个网站的每个地区访问量,由大到小排序
def main(args: Array[String]): Unit = { val conf = new SparkConf() conf.setMaster("local") conf.setAppName("SparkPvAndUv") val sc = new SparkContext(conf) val rdd: RDD[String] = sc.textFile("G:/idea/scala/spark02/data") println("*************PU******************") rdd.map(line=>{(line.split("\t")(5),1)}) .reduceByKey(_+_) .sortBy(_._2,false)//是否降序,false:是降序 .foreach(println) println("*************UV******************") rdd.map(line=>line.split("\t")(5)+"_"+line.split("\t")(1))//网站_ip .distinct()//去重 .map(line=>{(line.split("_")(0),1)}) .reduceByKey(_+_) .sortBy(_._2,false) .foreach(println) //每个网址的每个地区访问量,由大到小排序 val site_local: RDD[(String, String)] = rdd.map(line=>{(line.split("\t")(5),line.split("\t")(1))}) val site_localIterable: RDD[(String, Iterable[String])] = site_local.groupByKey() val result: RDD[(String, AbstractSeq[(String, Int)])] = site_localIterable.map(one => { val localMap = mutable.Map[String, Int]() //可变map val site = one._1 val localIterator = one._2.iterator while (localIterator.hasNext) { //地区 val local = localIterator.next() if (localMap.contains(local)) { //如果map中有该地区,则获取该地区的值再加1 val value = localMap.get(local).get localMap.put(local, value + 1) } else { //如果map中没有该地区,则获取该地区的值再加1 localMap.put(local, 1); } } //默认是升序,降序:localMap.toList.sortBy(-_._2),既多一个"-" val tuples: List[(String, Int)] = localMap.toList.sortBy(-_._2) if (tuples.length > 3) { val list = new ListBuffer[(String, Int)]() for (i <- 0 to 2) { list.append(tuples(i)) } (site, list) } else { (site, tuples) } }) result.foreach(println) }
到此,关于“如何生成Java数据脚本”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注亿速云网站,小编会继续努力为大家带来更多实用的文章!
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。