
任务:
搭建集群
重新开一个端口
还是在spark-master里面
运行
一直在不停的统计数据
job.py代码:
# import findspark
# findspark.init()
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
def updateFunc(values,state):
cnt = 0
for v in values:
cnt += v
if state == None:
return cnt
else:
return cnt+state
if __name__ == '__main__':
sparkConf = SparkContext()
sparkConf.setLogLevel("ERROR")
ssc = StreamingContext(sparkConf, 5)
ssc.checkpoint("cp_for_job")
stream = ssc.socketTextStream("localhost",9001)
stream.map(lambda x:(x.split(",")[1],1)).updateStateByKey(updateFunc).pprint()
ssc.start()
ssc.awaitTermination()
my_socket_server.py
import socket
import time
from threading import Thread
ADDRESS = ('localhost', 9001)
g_socket_server =None
g_conn_pool={}
def init():
global g_socket_server
g_socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
g_socket_server.bind(ADDRESS)
g_socket_server.listen(5)
print("server start, wait for client connecting... ")
def accept_client():
while True:
client, info=g_socket_server.accept()
thread = Thread(target=message_handle, args=(client, info))#设置成守护线程
thread.setDaemon(True)
thread.start()
def message_handle(client, info):
client.sendall("success".encode(encoding='utf8'))
while True:
try:
with open("/root/spark/part-00000",'r',encoding="utf-8") as f:
for line in f:
if len(line.strip())>0:
client.sendall(line.encode(encoding='utf8'))
# time.sleep(0.2)
except Exception as e:
print(e)
break
def remove_client(client_type):
client=g_conn_pool[client_type]
if None != client:
client.close()
g_conn_pool.pop(client_type)
print(" client offin:" + client_type)
if __name__ =='__main__':
init()
#新开一个线程,用于接收新连接
thread = Thread(target=accept_client)
thread.setDaemon(True)
thread.start()
while True:
time. sleep(0.1)
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)