{ "cells": [ { "cell_type": "markdown", "id": "f5b62519", "metadata": {}, "source": [ "# 采用异步Async\n", "\n", "## 问题分析\n", "我们上文中获取天气信息的性能不佳,主要在给网站发送请求的时候,需要等很久才能获得回复。\n", "\n", "这节我们需要想办法提高性能。\n", "\n", "我们先把关键代码复制到本节。" ] }, { "cell_type": "code", "execution_count": 1, "id": "1bb8abdd", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "ca459d20", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
citycity_asciilatlngcountryiso2iso3admin_namecapitalpopulationid
0TokyoTokyo35.6897139.6922JapanJPJPNTōkyōprimary37977000.01392685764
1JakartaJakarta-6.2146106.8451IndonesiaIDIDNJakartaprimary34540000.01360771077
2DelhiDelhi28.660077.2300IndiaININDDelhiadmin29617000.01356872604
3MumbaiMumbai18.966772.8333IndiaININDMahārāshtraadmin23355000.01356226629
4ManilaManila14.6000120.9833PhilippinesPHPHLManilaprimary23088000.01608618140
....................................
40996TukchiTukchi57.3670139.5000RussiaRURUSKhabarovskiy KrayNaN10.01643472801
40997NumtoNumto63.666771.3333RussiaRURUSKhanty-Mansiyskiy Avtonomnyy Okrug-YugraNaN10.01643985006
40998NordNord81.7166-17.8000GreenlandGLGRLSermersooqNaN10.01304217709
40999TimmiarmiutTimmiarmiut62.5333-42.2167GreenlandGLGRLKujalleqNaN10.01304206491
41000NordvikNordvik74.0165111.5100RussiaRURUSKrasnoyarskiy KrayNaN0.01643587468
\n", "

41001 rows × 11 columns

\n", "
" ], "text/plain": [ " city city_ascii lat lng country iso2 iso3 \\\n", "0 Tokyo Tokyo 35.6897 139.6922 Japan JP JPN \n", "1 Jakarta Jakarta -6.2146 106.8451 Indonesia ID IDN \n", "2 Delhi Delhi 28.6600 77.2300 India IN IND \n", "3 Mumbai Mumbai 18.9667 72.8333 India IN IND \n", "4 Manila Manila 14.6000 120.9833 Philippines PH PHL \n", "... ... ... ... ... ... ... ... \n", "40996 Tukchi Tukchi 57.3670 139.5000 Russia RU RUS \n", "40997 Numto Numto 63.6667 71.3333 Russia RU RUS \n", "40998 Nord Nord 81.7166 -17.8000 Greenland GL GRL \n", "40999 Timmiarmiut Timmiarmiut 62.5333 -42.2167 Greenland GL GRL \n", "41000 Nordvik Nordvik 74.0165 111.5100 Russia RU RUS \n", "\n", " admin_name capital population \\\n", "0 Tōkyō primary 37977000.0 \n", "1 Jakarta primary 34540000.0 \n", "2 Delhi admin 29617000.0 \n", "3 Mahārāshtra admin 23355000.0 \n", "4 Manila primary 23088000.0 \n", "... ... ... ... \n", "40996 Khabarovskiy Kray NaN 10.0 \n", "40997 Khanty-Mansiyskiy Avtonomnyy Okrug-Yugra NaN 10.0 \n", "40998 Sermersooq NaN 10.0 \n", "40999 Kujalleq NaN 10.0 \n", "41000 Krasnoyarskiy Kray NaN 0.0 \n", "\n", " id \n", "0 1392685764 \n", "1 1360771077 \n", "2 1356872604 \n", "3 1356226629 \n", "4 1608618140 \n", "... ... \n", "40996 1643472801 \n", "40997 1643985006 \n", "40998 1304217709 \n", "40999 1304206491 \n", "41000 1643587468 \n", "\n", "[41001 rows x 11 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "city_file = '../data/worldcities.csv'\n", "city_df = pd.read_csv(city_file,encoding='utf-8')\n", "city_df" ] }, { "cell_type": "code", "execution_count": 3, "id": "359284ac", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
citycity_asciilatlngcountryiso2iso3admin_namecapitalpopulationid
5ShanghaiShanghai31.1667121.4667ChinaCNCHNShanghaiadmin22120000.01156073548
9GuangzhouGuangzhou23.1288113.2590ChinaCNCHNGuangdongadmin20902000.01156237133
10BeijingBeijing39.9050116.3914ChinaCNCHNBeijingprimary19433000.01156228865
17ShenzhenShenzhen22.5350114.0540ChinaCNCHNGuangdongminor15929000.01156158707
29NanyangNanyang32.9987112.5292ChinaCNCHNHenanNaN12010000.01156192287
....................................
40725TaoyanTaoyan34.7706103.7903ChinaCNCHNGansuNaN5329.01156019900
40744JingpingJingping33.7844104.3652ChinaCNCHNGansuNaN5149.01156005145
40776DayiDayi33.8312104.0362ChinaCNCHNGansuNaN5114.01156108713
40782BiancangBiancang33.9007104.0321ChinaCNCHNGansuNaN5040.01156724811
40938NichicunNichicun29.533394.4167ChinaCNCHNTibetNaN100.01156860651
\n", "

1498 rows × 11 columns

\n", "
" ], "text/plain": [ " city city_ascii lat lng country iso2 iso3 admin_name \\\n", "5 Shanghai Shanghai 31.1667 121.4667 China CN CHN Shanghai \n", "9 Guangzhou Guangzhou 23.1288 113.2590 China CN CHN Guangdong \n", "10 Beijing Beijing 39.9050 116.3914 China CN CHN Beijing \n", "17 Shenzhen Shenzhen 22.5350 114.0540 China CN CHN Guangdong \n", "29 Nanyang Nanyang 32.9987 112.5292 China CN CHN Henan \n", "... ... ... ... ... ... ... ... ... \n", "40725 Taoyan Taoyan 34.7706 103.7903 China CN CHN Gansu \n", "40744 Jingping Jingping 33.7844 104.3652 China CN CHN Gansu \n", "40776 Dayi Dayi 33.8312 104.0362 China CN CHN Gansu \n", "40782 Biancang Biancang 33.9007 104.0321 China CN CHN Gansu \n", "40938 Nichicun Nichicun 29.5333 94.4167 China CN CHN Tibet \n", "\n", " capital population id \n", "5 admin 22120000.0 1156073548 \n", "9 admin 20902000.0 1156237133 \n", "10 primary 19433000.0 1156228865 \n", "17 minor 15929000.0 1156158707 \n", "29 NaN 12010000.0 1156192287 \n", "... ... ... ... \n", "40725 NaN 5329.0 1156019900 \n", "40744 NaN 5149.0 1156005145 \n", "40776 NaN 5114.0 1156108713 \n", "40782 NaN 5040.0 1156724811 \n", "40938 NaN 100.0 1156860651 \n", "\n", "[1498 rows x 11 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "capital_china = city_df[(city_df['country']=='China') & (city_df['capital'] is not None)]\n", "capital_china" ] }, { "cell_type": "code", "execution_count": 4, "id": "97e4b1df", "metadata": {}, "outputs": [], "source": [ "def generate_url(longitude,latitude):\n", " url = f'https://www.7timer.info/bin/api.pl?lon={longitude}&lat={latitude}&product=civil&output=json'\n", " return url\n", "\n", "def transform_weather_raw(text_j):\n", " weather_info = pd.DataFrame(text_j['dataseries'])\n", " start_time = pd.to_datetime(text_j['init'],format='%Y%m%d%H')\n", " weather_info['timepoint'] = pd.to_timedelta(weather_info['timepoint'],unit='h')\n", " weather_info['timestamp'] = start_time+ weather_info['timepoint']\n", " weather_info.drop('timepoint',axis=1,inplace=True)\n", " # more clean data steps\n", " wind_df = pd.json_normalize(weather_info['wind10m'])\n", " wind_df.columns = ['wind_'+col for col in wind_df.columns]\n", " weather_info = pd.concat([weather_info,wind_df],axis=1)\n", " weather_info.drop('wind10m',axis=1,inplace=True)\n", " weather_info['rh2m'] = weather_info['rh2m'].str.rstrip('%')\n", " #['']\n", " return weather_info\n", "\n", "def add_city_info(weather_info,longitude,latitude,city):\n", " weather_info['longitude'] = longitude\n", " weather_info['latitude'] = latitude\n", " weather_info['city'] = city\n", " return weather_info" ] }, { "cell_type": "markdown", "id": "db11830b", "metadata": {}, "source": [ "## 采用异步请求\n", "\n", "我们已经简单分析过,发送一个请求,我们需要等网站很久(2秒)才能得到回复。对于上千个城市,我们等不起啊。其实很多网站API都是接受多个客户端请求的,也就是同时可以接收多个get request。 \n", "\n", "所以我们不用等第一个request返回结果,就把第二个,第三个,第N个request 发送出去,这样等第一个request 返回结果时,我们再处理就可以了。\n", "这样就不用傻等了,这就是异步的思想。\n", "\n", "这里我们就采用requests的异步升级版本grequests来进行异步操作。\n", "\n", "安装依然很容易:```pip install grequests```\n", "\n", "入门稍微有点难,主要涉及到一些异步的思想和map的思想。不过对于我们入门来说,只需要调用一个函数即可:\n", "```grequests.imap(rs, size=50)```\n", "\n", "这里的size就是每次发送请求的数量。" ] }, { "cell_type": "code", "execution_count": 6, "id": "96341827", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9b76f160599f4a2caef040e15e55646d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\renb\\pycharmprojects\\weather_dashapp\\dash\\lib\\site-packages\\gevent\\hub.py:161: UserWarning: libuv only supports millisecond timer resolution; all times less will be set to 1 ms\n", " with loop.timer(seconds, ref=ref) as t:\n" ] } ], "source": [ "import grequests\n", "import json\n", "from tqdm.notebook import tqdm\n", "city_list = []\n", "lon_list = []\n", "lat_list = []\n", "url_list = []\n", "for index,city_info in capital_china.iterrows():\n", " city = city_info['city']\n", " city_list.append(city)\n", " lon = city_info['lng']\n", " lon_list.append(lon)\n", " lat = city_info['lat']\n", " lat_list.append(lat)\n", " url = generate_url(longitude=lon,latitude=lat)\n", " url_list.append(url)\n", " \n", "rs = (grequests.get(u) for u in url_list)\n", "all_cities_df = pd.DataFrame()\n", "for i,r in tqdm(enumerate(grequests.imap(rs, size=50))):\n", " text_j= json.loads(r.text)\n", " weather_info_df = transform_weather_raw(text_j)\n", " weather_info_df = add_city_info(weather_info_df,lon_list[i],lat_list[i],city_list[i])\n", " all_cities_df = pd.concat([all_cities_df,weather_info_df],axis=0)" ] }, { "cell_type": "markdown", "id": "c6894979", "metadata": {}, "source": [ "获取的数据,我们需要保存到数据库里。" ] }, { "cell_type": "code", "execution_count": 8, "id": "b57d9b05", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "C:\\Users\\renb\\PycharmProjects\\weather_dashapp\n" ] }, { "data": { "text/plain": [ "83392" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\renb\\pycharmprojects\\weather_dashapp\\dash\\lib\\site-packages\\gevent\\hub.py:161: UserWarning: libuv only supports millisecond timer resolution; all times less will be set to 1 ms\n", " with loop.timer(seconds, ref=ref) as t:\n" ] } ], "source": [ "import os\n", "import sys\n", "\n", "module_path = os.path.abspath(os.path.join('../..'))\n", "print(module_path)\n", "if module_path not in sys.path:\n", " sys.path.append(module_path)\n", "from weather_book.weather_app.models.db_models import engine,WeatherInfo\n", "all_cities_df['id'] = [i for i in range(all_cities_df.shape[0])]\n", "all_cities_df.to_sql('weather',engine,if_exists='append',index=False) # without index" ] }, { "cell_type": "code", "execution_count": null, "id": "4677712d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "dash", "language": "python", "name": "dash" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }