对python 操作solr索引数据的实例详解

时间:2021-05-22

测试代码1:

def test(self): data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}} params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000} url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = {"Content-Type": "application/json"} r = requests.post(url, json=data, params=params, headers=headers) print r.text def Index_data(self): solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10) # How you'd index data. result = solr.add([ { "id": "doc_1", "title": "A test document", }, { "id": "doc_2", "title": "The Banana: Tasty or Dangerous?", }, ]) print result

测试代码2:

实际数据:

def Index_Data_FromCSV(self, csvfile): ''' 从CSV文件中读取数据,并索引到solr中 :param csvfile: csv文件,包括完整路径 :return: ''' list = CSVOP.ReadCSV(csvfile) index = 0 doc = {} params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000} url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = {"Content-Type": "application/json"} for item in list: if index > 0: # 第一行是标题 try: doc['title'] = item[0].decode('GB2312') doc['link'] = item[1] # doc['date'] = item[2] doc['source'] = item[3].decode('GB2312') doc['keyword'] = item[4].decode('GB2312') data = {"add": {"doc": doc}} r = requests.post(url, json=data, params=params, headers=headers) print r.text except Exception,e: print e.message print index index += 1#pysolr客户端代码 def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'): ''' 从CSV文件中读取数据,并索引到solr中 :param csvfile: csv文件,包括完整路径 :return: ''' list = CSVOP.ReadCSV(csvfile) index = 0 listdocs = [] for item in list: if index > 0: # 第一行是标题 doc = {} try: doc['title'] = item[0].decode('GB2312') doc['link'] = item[1] # doc['date'] = item[2] doc['source'] = item[3].decode('GB2312') doc['keyword'] = item[4].decode('GB2312') listdocs.append(doc) except Exception,e: print e.message index += 1 solr = pysolr.Solr(url, timeout=10) result = solr.add(listdocs) print result

查询代码:

def search_data(self,message='视频'): url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message r = requests.get(url, verify=False) print r.text r = r.json()['response']['numFound'] print message + ":" + str(r) #pysolr客户端 def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'): solr = pysolr.Solr(url, timeout=10) dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'} result = solr.search('title:视频',**dict) # result = solr.search('title:视频') # print result.raw_response['response']['numFound'] for item in result: print 'keyword: %s'% item['keyword'] print 'title: %s'% item['title'] print 'source: %s'% item['source'] print 'link: %s'% item['link'] print '

'

输出结果:

{ "responseHeader":{ "status":0, "QTime":0, "params":{ "q":"title:\"\\视频\"", "indent":"true", "wt":"json"}}, "response":{"numFound":123,"start":0,"docs":[ { "source":"中彩网", "link":"http://mitWithin": 1000} url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = {"Content-Type": "application/json"} for item in list: if index > 0: # 第一行是标题 try: doc['title'] = item[0].decode('GB2312') doc['link'] = item[1] # doc['date'] = item[2] doc['source'] = item[3].decode('GB2312') doc['keyword'] = item[4].decode('GB2312') data = {"add": {"doc": doc}} r = requests.post(url, json=data, params=params, headers=headers) print r.text except Exception,e: print e.message print index index += 1 def Index_data(self): solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10) # How you'd index data. result = solr.add([ { "id": "doc_1", "title": "A test document", }, { "id": "doc_2", "title": "The Banana: Tasty or Dangerous?", }, ]) print result def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'): solr = pysolr.Solr(url, timeout=10) dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'} result = solr.search('title:视频',**dict) # result = solr.search('title:视频') # print result.raw_response['response']['numFound'] for item in result: print 'keyword: %s'% item['keyword'] print 'title: %s'% item['title'] print 'source: %s'% item['source'] print 'link: %s'% item['link'] print ' ' def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'): ''' 删除索引 :param where: 删除的条件 :param url: url :return: ''' solr = pysolr.Solr(url, timeout=10) # solr.delete(id=where) #id='id1':删除id为“id1”的索引 result = solr.delete(q=where) #q='*:*'删除所有索引 print resultobj = SolrClientObj()# obj.delete_index_data('*:*') #删除所有索引# obj.Index_data()# obj.search_data()# obj.delete_index_data('doc_1')obj.search_data('视频')# csvfile = 'D:/work/Solr/other/exportExcels/2017-07-07_info.csv'# obj.pysolr_Index_Data_FromCSV(csvfile)

以上这篇对python 操作solr索引数据的实例详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。

声明:本页内容来源网络,仅供用户参考;我单位不保证亦不表示资料全面及准确无误,也不保证亦不表示这些资料为最新信息,如因任何原因,本网内容或者用户因倚赖本网内容造成任何损失或损害,我单位将不会负任何法律责任。如涉及版权问题,请提交至online#300.cn邮箱联系删除。

相关文章