spark-统计各岗位招聘信息行数

spark-统计各岗位招聘信息行数,第1张

任务:

 

搭建集群

 

重新开一个端口

还是在spark-master里面

运行

 一直在不停的统计数据

 

 

 job.py代码:

# import findspark
# findspark.init()
from pyspark import SparkContext
from pyspark.streaming import StreamingContext

def updateFunc(values,state):
    cnt = 0
    for v in values:
        cnt += v
    if state == None:
        return cnt
    else:
        return cnt+state

if __name__ == '__main__':
    sparkConf = SparkContext()
    sparkConf.setLogLevel("ERROR")
    ssc = StreamingContext(sparkConf, 5)
    ssc.checkpoint("cp_for_job")
    stream = ssc.socketTextStream("localhost",9001)
    stream.map(lambda x:(x.split(",")[1],1)).updateStateByKey(updateFunc).pprint()

    ssc.start()
    ssc.awaitTermination()

 my_socket_server.py

import socket
import time
from threading import Thread

ADDRESS = ('localhost', 9001)
g_socket_server =None
g_conn_pool={}

def init():

    global g_socket_server

    g_socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    g_socket_server.bind(ADDRESS)

    g_socket_server.listen(5)

    print("server start, wait for client connecting... ")

def accept_client():
    while True:

        client, info=g_socket_server.accept()

        thread = Thread(target=message_handle, args=(client, info))#设置成守护线程

        thread.setDaemon(True)

        thread.start()
def message_handle(client, info):
    client.sendall("success".encode(encoding='utf8'))
    while True:
        try:
            with open("/root/spark/part-00000",'r',encoding="utf-8") as f:
                for line in f:
                    if len(line.strip())>0:
                        client.sendall(line.encode(encoding='utf8'))
                        # time.sleep(0.2)
        except Exception as e:
            print(e)
            break

def remove_client(client_type):
    client=g_conn_pool[client_type]
    if None != client:
        client.close()
        g_conn_pool.pop(client_type)
        print(" client offin:" + client_type)
if __name__ =='__main__':
    init()
    #新开一个线程,用于接收新连接
    thread = Thread(target=accept_client)
    thread.setDaemon(True)
    thread.start()
    while True:
        time. sleep(0.1)

欢迎分享,转载请注明来源:内存溢出

原文地址:https://www.54852.com/langs/904777.html

(0)
打赏 微信扫一扫微信扫一扫 支付宝扫一扫支付宝扫一扫
上一篇 2022-05-15
下一篇2022-05-15

发表评论

登录后才能评论

评论列表(0条)

    保存