这篇文章主要介绍“如何生成Java数据脚本”,在日常操作中,相信很多人在如何生成Java数据脚本问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”如何生成Java数据脚本”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
/**
* 向文件中生产数据
*/
object ProducePvAndUvData {
//ip
val IP = 223
//地址
val ADDRESS = Array("北京", "天津", "上海", "重庆", "河北", "辽宁","山西",
"吉林", "江苏", "浙江", "黑龙江", "安徽", "福建", "江西",
"山东", "河南", "湖北", "湖南", "广东", "海南", "四川",
"贵州", "云南", "山西", "甘肃", "青海", "台湾", "内蒙",
"广西", "西藏", "宁夏", "新疆", "香港", "澳门")
//日期
val DATE = new SimpleDateFormat("yyyy-MM-dd").format(new Date())
//timestamp
val TIMESTAMP = 0L
//userid
val USERID = 0L
//网站
val WEBSITE = Array("www.baidu.com", "www.taobao.com", "www.dangdang.com", "www.jd.com", "www.suning.com", "www.mi.com", "www.gome.com.cn")
//行为
val ACTION = Array("Regist", "Comment", "View", "Login", "Buy", "Click", "Logout")
def main(args: Array[String]): Unit = {
val pathFileName = "G://idea//scala//spark02/data"
//创建文件
val createFile = CreateFile(pathFileName)
//向文件中写入数据 需要的对象
val file = new File(pathFileName)
val fos = new FileOutputStream(file, true)
val osw = new OutputStreamWriter(fos, "UTF-8")
val pw = new PrintWriter(osw)
if (createFile) {
var i = 0
//产生5万+数据
while (i < 50000){ //模拟一个ip
val random = new Random()
val ip = random.nextInt(IP) + "." + random.nextInt(IP) + "." + random.nextInt(IP) + "." + random.nextInt(IP)
//模拟地址
val address = ADDRESS(random.nextInt(34))
//模拟日期
val date = DATE
//模拟userid
val userid = Math.abs(random.nextLong)
/**
* 这里的while模拟是同一个用户不同时间点对不同网站的操作
*/
var j = 0
var timestamp = 0L
var webSite = "未知网站"
var action = "未知行为"
val flag = random.nextInt(5) | 1
while (j < flag) { // Threads.sleep(5);
//模拟timestamp
timestamp = new Date().getTime()
//模拟网站
webSite = WEBSITE(random.nextInt(7))
//模拟行为
action = ACTION(random.nextInt(6))
j += 1
/**
* 拼装
*/
val content = ip + "\t" + address + "\t" + date + "\t" + timestamp + "\t" + userid + "\t" + webSite + "\t" + action
System.out.println(content)
//向文件中写入数据
pw.write(content + "\n")
}
i += 1
}
//注意关闭的先后顺序,先打开的后关闭,后打开的先关闭
pw.close()
osw.close()
fos.close()
}
}
/**
* 创建文件
*/
def CreateFile(pathFileName: String): Boolean = {
val file = new File(pathFileName)
if (file.exists) file.delete
val createNewFile = file.createNewFile()
System.out.println("create file " + pathFileName + " success!")
createNewFile
}
}
统计每个网站的PU、VU、每个网站的每个地区访问量,由大到小排序
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local")
conf.setAppName("SparkPvAndUv")
val sc = new SparkContext(conf)
val rdd: RDD[String] = sc.textFile("G:/idea/scala/spark02/data")
println("*************PU******************")
rdd.map(line=>{(line.split("\t")(5),1)})
.reduceByKey(_+_)
.sortBy(_._2,false)//是否降序,false:是降序
.foreach(println)
println("*************UV******************")
rdd.map(line=>line.split("\t")(5)+"_"+line.split("\t")(1))//网站_ip
.distinct()//去重
.map(line=>{(line.split("_")(0),1)})
.reduceByKey(_+_)
.sortBy(_._2,false)
.foreach(println)
//每个网址的每个地区访问量,由大到小排序
val site_local: RDD[(String, String)] = rdd.map(line=>{(line.split("\t")(5),line.split("\t")(1))})
val site_localIterable: RDD[(String, Iterable[String])] = site_local.groupByKey()
val result: RDD[(String, AbstractSeq[(String, Int)])] = site_localIterable.map(one => {
val localMap = mutable.Map[String, Int]()
//可变map
val site = one._1
val localIterator = one._2.iterator
while (localIterator.hasNext) {
//地区
val local = localIterator.next()
if (localMap.contains(local)) {
//如果map中有该地区,则获取该地区的值再加1
val value = localMap.get(local).get
localMap.put(local, value + 1)
} else {
//如果map中没有该地区,则获取该地区的值再加1
localMap.put(local, 1);
}
}
//默认是升序,降序:localMap.toList.sortBy(-_._2),既多一个"-"
val tuples: List[(String, Int)] = localMap.toList.sortBy(-_._2)
if (tuples.length > 3) {
val list = new ListBuffer[(String, Int)]()
for (i <- 0 to 2) {
list.append(tuples(i))
}
(site, list)
} else {
(site, tuples)
}
})
result.foreach(println)
}
到此,关于“如何生成Java数据脚本”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注亿速云网站,小编会继续努力为大家带来更多实用的文章!
亿速云「云服务器」,即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘,价格低至29元/月。点击查看>>
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。
原文链接:https://my.oschina.net/u/3962987/blog/3080805