{"id":193,"date":"2023-10-27T11:04:17","date_gmt":"2023-10-27T03:04:17","guid":{"rendered":"https:\/\/www.liaoxinghui.com\/?p=193"},"modified":"2023-10-27T11:04:19","modified_gmt":"2023-10-27T03:04:19","slug":"python%e8%87%aa%e5%8a%a8%e5%bd%92%e9%9b%86%e5%bd%95%e9%9f%b3","status":"publish","type":"post","link":"https:\/\/www.liaoxinghui.com\/?p=193","title":{"rendered":"\u4f7f\u7528Python\u5f52\u96c6\u5f55\u97f3"},"content":{"rendered":"<p class=\"wp-block-paragraph\">\u9879\u76ee\u7a0d\u5fae\u6709\u70b9\u590d\u6742\uff0c\u4f7f\u7528Python\u5c06\u5f55\u97f3\u6587\u4ef6\u5f52\u96c6\u8d77\u6765\uff0c\u9700\u8981\u7684\u4fe1\u606f\u6709\u5206\u522b\u662f\uff0cexecl\u6587\u4ef6\uff0cmysql\u6587\u4ef6\uff0c\u5f55\u97f3\u6587\u4ef6\uff0c\u5c06\u5176\u7ec4\u88c5\u8d77\u6765\uff0c\u589e\u52a0\u4e86\u8fdb\u5ea6\u6761\uff0c\u65b9\u4fbf\u4f7f\u7528\u7684\u540c\u4e8b\u80fd\u770b\u5230\u8fdb\u5ea6\uff0c\u4ee3\u7801\u4e2d\u7684\u5185\u5bb9\u5df2\u7ecf\u8131\u654f\uff0c\u719f\u6089Python\u7684\u5c0f\u4f19\u4f34\u4e00\u770b\u5c31\u77e5\u9053\u662f\u600e\u4e48\u4e2a\u4e8b?<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># coding=utf-8\n\"\"\"\n@Time \uff1a 2023\/10\/26 14:21\n@Author \uff1a Taering\n@File \uff1amain.py\n@IDE \uff1aPyCharm\n@DESC : \u4eceexecl\u6587\u4ef6\u53ca\u6570\u636e\u5e93\u6587\u4ef6\u5339\u914d\u5f55\u97f3\u751f\u6210\u76f8\u5e94\u7684\u7d22\u5f15\n\"\"\"\n\nimport logging\nimport os\nimport pandas as pd\nfrom tqdm import tqdm\nfrom datetime import datetime, timedelta\nimport time\n\nbase_path = os.path.abspath(os.path.dirname(__file__))\nexecl_path = os.path.join(base_path, 'execl_files')\ndone_path = os.path.join(base_path, 'done')\nrecord_path = os.path.join(base_path, 'records')\ncsv_path = os.path.join(base_path, 'sql_file', 'data.csv')\nprint(f'execl\u6587\u4ef6\u7684\u76ee\u5f55\u4e3a\uff1a{execl_path}')\nprint(f'done\u6587\u4ef6\u7684\u76ee\u5f55\u4e3a\uff1a{done_path}')\nprint(f'record\u6587\u4ef6\u7684\u76ee\u5f55\u4e3a\uff1a{record_path}')\nlogging.basicConfig(filename='logs\/run.log', level=logging.INFO, format='%(asctime)s - %(message)s')\n\nprint(f'\u5f00\u59cb\u5c06\u6570\u636e\u5e93\u6587\u4ef6\u5bfc\u5165\u5185\u5b58')\nstart_sqlfile_time = time.time()\ncdr_data = pd.read_csv(csv_path)\nprint(f'sql\u6587\u4ef6\u5bfc\u5165\u5185\u5b58\u5b8c\u6210,\u8017\u65f6{time.time() - start_sqlfile_time}\u79d2')\n\ndef main():\n    # \u8bfb\u53d6Excel\u6587\u4ef6\n    print('\u5f00\u59cb\u8bfb\u53d6Execl\u6587\u4ef6')\n    start_execl_time = time.time()\n    logging.info('\u5f00\u59cb\u8bfb\u53d6Execl\u6587\u4ef6')\n    # \u521d\u59cb\u5316\u4e24\u4e2a\u7a7a\u7684DataFrames\n    df_a_list = []\n    df_b_list = []\n    file_list = os.listdir(execl_path)\n    for file in file_list:\n        if file.endswith('.xlsx'):\n            df_temp = pd.read_excel(os.path.join(execl_path, file))\n            if 'A\u7684\u7279\u5f81' in df_temp.columns:\n                df_a_list.append(df_temp)\n            elif 'B\u7684\u7279\u5f81' in df_temp.columns:\n                df_b_list.append(df_temp)\n    # \u4f7f\u7528pd.concat\u6765\u5408\u5e76\u6570\u636e\n    df_a = pd.concat(df_a_list, ignore_index=True)\n    df_b = pd.concat(df_b_list, ignore_index=True)\n    num_records_a = df_a.shape[0]\n    num_records_b = df_b.shape[0]\n    print(\n        f\"\u8bfb\u53d6execl\u5b8c\u6210\uff0cdf_a\u4e2d\u6709{num_records_a}\u6761\u8bb0\u5f55\uff0cdf_b\u4e2d\u6709{num_records_b}\u6761\u8bb0\u5f55,\u5171\u8017\u65f6\uff1a{time.time() - start_execl_time}\u79d2\")\n    count = 0\n    # \u521b\u5efa\u5b57\u5178\u6765\u5b58\u50a8\u6587\u4ef6\u8def\u5f84\u548cUUID\u7684\u6620\u5c04\n    print(f'\u5f00\u59cb\u6620\u5c04\u6587\u4ef6')\n    start_file_time = time.time()\n    file_dict = {}\n    for root, dirs, files in os.walk(record_path):\n        for file in files:\n            uuid = file.split('_')[0]\n            file_dict[uuid] = os.path.join(root, file)\n    print(f'\u5df2\u6620\u5c04 {len(file_dict)} \u6761\u5f55\u97f3\uff0c\u6d88\u8017\u65f6\u95f4\u4e3a\uff1a{time.time() - start_file_time}\u79d2')\n    for _, row in tqdm(df_a.iterrows(), total=df_a.shape[0], desc=\"\u6b63\u5728\u5339\u914d\u5f55\u97f3,\u8bf7\u7a0d\u7b49\",\n                       bar_format=\"{l_bar}{bar}| {percentage:3.2f}%\"):\n        phone_num = str(row[\"\u9700\u8981\u5339\u914d\u7684\u5185\u5bb9\"]).strip()\n        if phone_num.isnumeric():\n            # \u5c06\u5b57\u7b26\u4e32\u8f6c\u6362\u4e3adatetime\u5bf9\u8c61\n            collection_time = datetime.strptime(row['\u65f6\u95f4'], '%Y-%m-%d %H:%M:%S')\n            # \u8ba1\u7b97\u5f00\u59cb\u548c\u7ed3\u675f\u7684\u65f6\u95f4\u8303\u56f4\n            start_time = collection_time - timedelta(minutes=30)\n            end_time = collection_time\n            mask = ((cdr_data['caller_id_number'] == phone_num) | (cdr_data['destination_number'] == phone_num)) &amp; \\\n                   (cdr_data['start_stamp'] &gt;= start_time.strftime('%Y-%m-%d %H:%M:%S')) &amp; \\\n                   (cdr_data['start_stamp'] &lt;= end_time.strftime(&#039;%Y-%m-%d %H:%M:%S&#039;))\n            results = cdr_data[mask].values\n            for result in results:\n                uuid = result[1]\n                if uuid in file_dict:\n                    file_path = file_dict[uuid]\n                    logging.info(&#039;\u5df2\u5339\u914d\u5230\u5f55\u97f3\u6587\u4ef6\uff0c\u6b63\u5728\u5904\u7406...&#039;)\n                    print(&#039;\u5df2\u5339\u914d\u5230\u5f55\u97f3\u6587\u4ef6\uff0c\u6b63\u5728\u5904\u7406...&#039;)\n                    file_name = os.path.basename(file_path)\n                    year = file_name.split(&#039;_&#039;)[-1][:4]\n                    day_month_year = file_name.split(&#039;_&#039;)[-1][:8]\n                    dest_folder = os.path.join(done_path, year, day_month_year)\n                    if not os.path.exists(dest_folder):\n                        os.makedirs(dest_folder)\n                    os.makedirs(dest_folder, exist_ok=True)\n                    new_file_name = f&quot;{file_name.split(&#039;_&#039;)[-2]}_{file_name.split(&#039;_&#039;)[-1]}.mp3&quot;\n                    os.rename(file_path, os.path.join(dest_folder, new_file_name))\n                    print(f&#039;\u590d\u5236\u6587\u4ef6\u5b8c\u6210\uff1a{new_file_name}&#039;)\n                    logging.info(f&#039;\u590d\u5236\u6587\u4ef6\u5b8c\u6210\uff1a{new_file_name}&#039;)\n                    # \u521b\u5efa\u6216\u66f4\u65b0TXT\u6587\u4ef6\n                    txt_file = os.path.join(dest_folder, f&quot;DATA_{day_month_year}.txt&quot;)\n                    if not os.path.exists(txt_file):\n                        with open(txt_file, &#039;w&#039;) as txt:\n                            txt.write(\n                                &quot;\u793a\u4f8b&quot;)\n                    with open(txt_file, &#039;a&#039;) as txt:\n                        txt.write(\n                            f&quot;{\u793a\u4f8b&quot;)\n                        print(&#039;\u5199\u5165\u7d22\u5f15\u6587\u4ef6\u5b8c\u6210&#039;)\n                        logging.info(&#039;\u5199\u5165\u7d22\u5f15\u6587\u4ef6\u5b8c\u6210&#039;)\n                        count += 1\n                        logging.info(f&#039;\u5df2\u5b8c\u6210{count}\u6761\u5f55\u97f3\u5339\u914d&#039;)\n                        print(f&#039;\u5df2\u5b8c\u6210{count}\u6761\u5f55\u97f3\u5339\u914d&#039;)\n                        break\n    logging.info(f&quot;\u5b8c\u6210\u5339\u914d\u5f55\u97f3\uff0c\u5171\u5339\u914d\u5230 {count} \u6761\u5f55\u97f3\u3002\\n&quot;)\n    print(f&quot;\u5b8c\u6210\u5339\u914d\u5f55\u97f3\uff0c\u5171\u5339\u914d\u5230 {count} \u6761\u5f55\u97f3\u3002\\n&quot;)\n\nif __name__ == &quot;__main__&quot;:\n    main()\n<\/pre>\n<p class=\"wp-block-paragraph\">\u9879\u76ee\u7684execl\u6587\u4ef6\u6709\u4e24\u79cd\uff0c\u5176\u4e2dA\u662f\u67e5\u8be2\u6761\u4ef6\u7684\u4e3b\u8981execl\u6587\u4ef6\uff0cB\u662fA\u7684\u8865\u5145\uff0c\u53ef\u4ee5\u4eceB\u4e2d\u62ff\u5230A\u6ca1\u6709\u4fe1\u606f\uff0c\u7136\u540e\u5339\u914d\u6570\u636e\u5e93\u6587\u4ef6\uff0c\u5982\u679c\u80fd\u5339\u914d\u4e0a\u518d\u53bb\u627e\u76f8\u5e94\u7684\u6587\u4ef6\uff0c\u627e\u5230\u540e\u6309\u7167\u6307\u5b9a\u7684\u683c\u5f0f\u653e\u5230\u9879\u76ee\u7684done\u76ee\u5f55\u91cc\u3002<\/p>\n<p class=\"wp-block-paragraph\">\u9879\u76ee\u6d89\u53ca5\u4e2a\u76ee\u5f55\uff0c\u5206\u522b\u662f\u5b58\u653e\u6210\u679c\u7684done,\u5b58\u653e\u5f55\u97f3\u6587\u4ef6\u7684records,\u5b58\u653eexecl\u6587\u4ef6\u7684execl_files,\u5b58\u653esql\u6587\u4ef6\u7684sql_file,\u5b58\u653e\u65e5\u5fd7\u7684log<\/p>\n<p class=\"wp-block-paragraph\">\u5f55\u97f3\u548cexecl\u76f4\u63a5\u5b58\u653e\u5373\u53ef\uff0c\u7a0b\u5e8f\u4f1a\u53bb\u5224\u5b9a\u54ea\u4e9b\u662fA\uff0c\u54ea\u4e9b\u662fB\uff0c\u5f55\u97f3\u7684\u8bdd\u7a0b\u5e8f\u4f1a\u5c06\u5176\u6587\u4ef6\u540d\u53ca\u8def\u5f84\u4fdd\u5b58\u5230\u5185\u5b58\u4e2d\uff0c\u65b9\u4fbf\u540e\u7eed\u7684\u67e5\u627e\uff0cmysql\u5bfc\u51fa\u7684\u6587\u4ef6\u5fc5\u987b\u662fcsv\u683c\u5f0f\uff0c\u4e14\u6587\u4ef6\u540d\u5199\u6b7b\u662fdata\uff0c\u4f60\u8981\u6539\u7684\u8bdd\u81ea\u5df1\u6539\u4e0b\u4ee3\u7801<\/p>","protected":false},"excerpt":{"rendered":"<p>\u9879\u76ee\u7a0d\u5fae\u6709\u70b9\u590d\u6742\uff0c\u4f7f\u7528Python\u5c06\u5f55\u97f3\u6587\u4ef6\u5f52\u96c6\u8d77\u6765\uff0c\u9700\u8981\u7684\u4fe1\u606f\u6709\u5206\u522b\u662f\uff0cexecl\u6587\u4ef6\uff0cmysql\u6587\u4ef6\uff0c\u5f55\u97f3\u6587\u4ef6\uff0c\u5c06\u5176\u7ec4\u88c5\u8d77\u6765\uff0c\u589e\u52a0\u4e86\u8fdb &hellip;<\/p>","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[5,10],"tags":[],"class_list":["post-193","post","type-post","status-publish","format-standard","hentry","category-python","category-10"],"views":428,"_links":{"self":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts\/193","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=193"}],"version-history":[{"count":2,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts\/193\/revisions"}],"predecessor-version":[{"id":195,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=\/wp\/v2\/posts\/193\/revisions\/195"}],"wp:attachment":[{"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=193"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=193"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.liaoxinghui.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=193"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}