时间:2021-05-22
支付宝十年账单上的数字有点吓人,但它统计的项目太多,只是想看看到底单纯在淘宝上支出了多少,于是写了段脚本,统计任意时间段淘宝订单的消费情况,看那结果其实在淘宝上我还是相当节约的说。
脚本的主要工作是模拟了浏览器登录,解析“已买到的宝贝”页面以获得指定的订单及宝贝信息。
使用方法见代码或执行命令加参数-h,另外需要BeautifulSoup4支持,BeautifulSoup的官方项目列表页:https:///trade/itemlist/list_bought_items.htm' # 运费险 增值服务 分段支付(定金,尾款) extra_service = ['freight-info', 'service-info', 'stage-item'] stdout_cr('working... {:.0%}'.format(0)) # 1. 解析第一页 res = urllib2.urlopen(url) soup = BeautifulSoup(res.read().decode('gbk')) # 2. 获取页数相关 page_jump = soup.find('span', id='J_JumpTo') jump_url = page_jump.attrs['data-url'] url_parts = urlparse.urlparse(jump_url) query_data = dict(urlparse.parse_qsl(url_parts.query)) total_pages = int(query_data['tPage']) # 解析 orders = [] cur_page = 1 out_date = False errors = [] while True: bought_items = soup.find_all('tbody', attrs={'data-orderid' : True}) # pprint(len(bought_items)) count = 0 for item in bought_items: count += 1 # pprint('{}.{}'.format(cur_page, count)) try: info = {} # 订单在页面上的位置 页数.排序号 info['pos'] = '{}.{}'.format(cur_page, count) info['orderid'] = item.attrs['data-orderid'] info['status'] = item.attrs['data-status'] # 店铺 node = item.select('tr.order-hd a.shopname') if not node: # 店铺不存在,可能是赠送彩票订单,忽略 # print('ignore') continue info['shop_name'] = node[0].attrs['title'].strip() info['shop_url'] = node[0].attrs['href'] # 日期 node = item.select('tr.order-hd span.dealtime')[0] info['date'] = datetime.strptime(node.attrs['title'], '%Y-%m-%d %H:%M') if end_date and info['date'].toordinal() > end_date.toordinal(): continue if start_date and info['date'].toordinal() < start_date.toordinal(): out_date = True break # 宝贝 baobei = [] node = item.find_all('tr', class_='order-bd') # pprint(len(node)) for n in node: try: bb = {} if [True for ex in extra_service if ex in n.attrs['class']]: # 额外服务处理 # print('额外服务处理') name_node = n.find('td', class_='baobei') # 宝贝地址 bb['name'] = name_node.text.strip() bb['url'] = '' bb['spec'] = '' # 宝贝快照 bb['snapshot'] = '' # 宝贝价格 bb['price'] = 0.0 # 宝贝数量 bb['quantity'] = 1 bb['is_goods'] = False try: bb['url'] = name_node.find('a').attrs['href'] bb['price'] = float(n.find('td', class_='price').text) except: pass else: name_node = n.select('p.baobei-name a') # 宝贝地址 bb['name'] = name_node[0].text.strip() bb['url'] = name_node[0].attrs['href'] # 宝贝快照 bb['snapshot'] = '' if len(name_node) > 1: bb['snapshot'] = name_node[1].attrs['href'] # 宝贝规格 bb['spec'] = n.select('.spec')[0].text.strip() # 宝贝价格 bb['price'] = float(n.find('td', class_='price').attrs['title']) # 宝贝数量 bb['quantity'] = int(n.find('td', class_='quantity').attrs['title']) bb['is_goods'] = True baobei.append(bb) # 尝试获取实付款 # 实付款所在的节点可能跨越多个tr的td amount_node = n.select('td.amount em.real-price') if amount_node: info['amount'] = float(amount_node[0].text) except Exception as e: errors.append({ 'type' : 'baobei', 'id' : '{}.{}'.format(cur_page, count), 'node' : '{}'.format(n), 'error' : '{}'.format(e) }) except Exception as e: errors.append({ 'type' : 'order', 'id' : '{}.{}'.format(cur_page, count), 'node' : '{}'.format(item), 'error' : '{}'.format(e) }) info['baobei'] = baobei orders.append(info) stdout_cr('working... {:.0%}'.format(cur_page / total_pages)) # 下一页 cur_page += 1 if cur_page > total_pages or out_date: break query_data.update({'pageNum' : cur_page}) page_url = '{}?{}'.format(url, urllib.urlencode(query_data)) res = urllib2.urlopen(page_url) soup = BeautifulSoup(res.read().decode('gbk')) stdout_cr() if errors: print('INFO. 有错误发生,统计结果可能不准确。') # pprint(errors) return ordersdef output(orders, start_date, end_date): amount = 0.0 org_amount = 0 baobei_count = 0 order_count = 0 invaild_order_count = 0 for order in orders: if order['status'] in INVALID_ORDER_STATES: invaild_order_count += 1 continue amount += order['amount'] order_count += 1 for baobei in order.get('baobei', []): if not baobei['is_goods']: continue org_amount += baobei['price'] * baobei['quantity'] baobei_count += baobei['quantity'] print('{:<9} {}'.format('累计消费:', amount)) print('{:<9} {}/{}'.format('订单/宝贝:', order_count, baobei_count)) if invaild_order_count: print('{:<9} {} (退货或取消等, 不在上述订单之内)'.format('无效订单:', invaild_order_count)) print('{:<7} {}'.format('宝贝原始总价:', org_amount)) print('{:<7} {:.2f}'.format('宝贝平均单价:', 0 if baobei_count == 0 else org_amount / baobei_count)) print('{:<9} {} ({:.2%})'.format('节约了(?):', org_amount - amount, 0 if org_amount == 0 else (org_amount - amount) / org_amount)) from_date = start_date if start_date else orders[-1]['date'] to_date = end_date if end_date else datetime.now() print('{:<9} {:%Y-%m-%d} - {:%Y-%m-%d}'.format('统计区间:', from_date, to_date)) if not start_date: print('{:<9} {:%Y-%m-%d %H:%M}'.format('败家始于:', orders[-1]['date']))def ouput_orders(orders): print('所有订单:') if not orders: print(' --') return for order in orders: print(' {:-^20}'.format('-')) print(' * 订单号: {orderid} 实付款: {amount} 店铺: {shop_name} 时间: {date:%Y-%m-%d %H:%M}'.format(**order)) for bb in order['baobei']: if not bb['is_goods']: continue print(' - {name}'.format(**bb)) if bb['spec']: print(' {spec}'.format(**bb)) print(' {price} X {quantity}'.format(**bb))def main(): parser = argparse.ArgumentParser( prog='python {}'.format(__file__) ) parser.add_argument('-u', '--username', help='淘宝用户名') parser.add_argument('-p', '--password', help='淘宝密码') parser.add_argument('-s', '--start', help='起始时间,可选, 格式如: 2014-11-11') parser.add_argument('-e', '--end', help='结束时间,可选, 格式如: 2014-11-11') parser.add_argument('--verbose', action='store_true', default=False, help='订单详细输出') parser.add_argument('-v', '--version', action='version', version='v{}'.format(__version__), help='版本号') args = parser.parse_args() usr = args.username if not usr: usr = raw_input('输入淘宝用户名: '.encode(RAW_IMPUT_ENCODING)) usr = usr.decode('utf-8') # 中文输入问题 pwd = args.password if not pwd: if platform.system() == 'Windows': # Windows下中文输出有问题 pwd = getpass() else: pwd = getpass('输入淘宝密码: '.encode('utf-8')) pwd = pwd.decode('utf-8') verbose = args.verbose start_date = None if args.start: try: start_date = datetime.strptime(args.start, '%Y-%m-%d') except Exception as e: sys.exit('ERROR. {}'.format(e)) end_date = None if args.end: try: end_date = datetime.strptime(args.end, '%Y-%m-%d') except Exception as e: sys.exit('ERROR. {}'.format(e)) if start_date and end_date and start_date > end_date: sys.exit('ERROR, 结束日期必须晚于或等于开始日期') cj_file = './{}.tmp'.format(usr) cj = cookielib.LWPCookieJar() try: cj.load(cj_file) except: pass opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPHandler) urllib2.install_opener(opener) login(usr, pwd) try: cj.save(cj_file) except: pass orders = parse_bought_list(start_date, end_date) output(orders, start_date, end_date) # 输出订单明细 if verbose: ouput_orders(orders)if __name__ == '__main__': main()
声明:本页内容来源网络,仅供用户参考;我单位不保证亦不表示资料全面及准确无误,也不保证亦不表示这些资料为最新信息,如因任何原因,本网内容或者用户因倚赖本网内容造成任何损失或损害,我单位将不会负任何法律责任。如涉及版权问题,请提交至online#300.cn邮箱联系删除。
本文实例讲述了Python使用装饰器模拟用户登陆验证功能。分享给大家供大家参考,具体如下:#-*-coding:utf-8-*-#!python3user_li
python3.0模拟用户登录,三次错误锁定的实例实例如下所示:#-*-coding:utf-8-*-#需求模拟用户登录,超过三次错误锁定不允许登陆count=
本文实例为大家分享了java实现抽奖系统的具体代码,供大家参考,具体内容如下模拟一个在终端登陆注册的页面publicclassCjtest{publicstat
本文实例为大家分享了利用CSS3实现登陆面板3D旋转起来的具体代码,供大家参考,具体内容如下效果图:点击登陆,登陆面板会发生360度旋转,并显示信息。旋转结束:
Python模拟登陆的两种实现方法有时候我们的抓取项目时需要登陆到某个网站上,才能看见某些内容的,所以模拟登陆功能就必不可少了,散仙这次写的文章,主要有2个例子