
- 前端:html,css,js,jQuery,bootstrap后端:flask搜索引擎:elasticsearch数据源:某某之家
获取数据源并写入es
from lxml import etree
from concurrent.futures import ThreadPoolExecutor
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import requests
headers = {
'user-agent': 'ua'
}
es = Elasticsearch()
if not es.indices.exists(index='car'):
es.indices.create(index='car', mappings={
'properties': {
'url': {
'type': 'text'
},
'img': {
'type': 'text'
},
'title': {
'type': 'text'
},
'desc': {
'type': 'text'
}
}
})
def task(url,page):
res = requests.get(url, headers)
text = res.text
tree = etree.HTML(text)
ul_list = tree.xpath('//ul[@]')
actions = []
for ul in ul_list:
li_list = ul.xpath('./li')
for li in li_list:
url = li.xpath('./a/@href'),
img = li.xpath('./a/div/img/@src'),
desc = li.xpath('./a/p/text()'),
title = li.xpath('./a/h3/text()')
if title:
doc = {
'_index': 'car',
'url': f'https:{url[0][0]}',
'img': img[0][0],
'desc': desc[0][0],
'title': title[0],
}
actions.append(doc)
helpers.bulk(es, actions=actions)
print(f'第{page}页完成!')
def main():
with ThreadPoolExecutor() as pool:
for i in range(1, 11):
url = f'https://www.autohome.com.cn/all/{i}/'
pool.submit(task, url=url,page=i)
if __name__ == '__main__':
main()
视图函数
from flask import Blueprint
from flask import request
from flask import render_template
from flask import jsonify
from web.ext import es
from pprint import pprint
search_bp = Blueprint('search', __name__, url_prefix='/search')
@search_bp.route('/', methods=['get', 'post'])
def search():
if request.method == 'GET':
return render_template('search.html')
elif request.method == 'POST':
content = request.values.get('content')
size = 10
current = int(request.values.get('current', '0'))
if content:
res = es.search(index='car', query={
'match': {
"title": content
}
}, highlight={
"pre_tags": ""
,
"post_tags": ""
,
"fields": {
"title": {}
}
}, size=1000)
else:
res = es.search(index='car', query={
'match_all': {}
}, size=1000)
new_res = res['hits']['hits']
total = int(res['hits']['total']['value'])
need_page = (total // size) + 1
data = {
'res': new_res[current * size:current * size + size],
'need_page': need_page,
'total': total
}
return jsonify(data)
General Search
微信扫一扫
支付宝扫一扫
评论列表(0条)