{"id":770,"date":"2023-05-28T22:55:54","date_gmt":"2023-05-28T14:55:54","guid":{"rendered":"http:\/\/xinyixx.com\/?p=770"},"modified":"2023-05-28T22:57:06","modified_gmt":"2023-05-28T14:57:06","slug":"spider","status":"publish","type":"post","link":"https:\/\/www.xinyixx.com\/index.php\/2023\/05\/28\/spider\/","title":{"rendered":"python\u7a0b\u5e8f2\uff1a\u7f51\u9875\u722c\u866b"},"content":{"rendered":"<div class=\"wp-block-group is-vertical is-layout-flex wp-container-core-group-is-layout-8cf370e7 wp-block-group-is-layout-flex\">\n<p>\u7f51\u9875\u722c\u866b\u662f\u4e00\u79cd\u7a0b\u5e8f\uff0c\u5b83\u53ef\u4ee5\u6a21\u62df\u4eba\u7c7b\u6d4f\u89c8\u5668\u7684\u884c\u4e3a\uff0c\u5411\u76ee\u6807\u7f51\u7ad9\u53d1\u9001\u8bf7\u6c42\uff0c\u83b7\u53d6\u7f51\u9875\u5185\u5bb9\uff0c\u518d\u901a\u8fc7\u89e3\u6790\u7f51\u9875\u5185\u5bb9\u63d0\u53d6\u6709\u7528\u4fe1\u606f\u3002\u8fd9\u4e2a\u8fc7\u7a0b\u5c31\u50cf\u6211\u4eec\u5728\u6d4f\u89c8\u5668\u4e2d\u6253\u5f00\u4e00\u4e2a\u7f51\u9875\uff0c\u7136\u540e\u67e5\u770b\u7f51\u9875\u7684\u5185\u5bb9\u4e00\u6837\u3002<\/p>\n\n\n\n<p>\u4e3a\u4e86\u5b9e\u73b0\u8fd9\u4e2a\u8fc7\u7a0b\uff0c\u6211\u4eec\u9700\u8981\u4f7f\u7528\u4e00\u4e9b\u5de5\u5177\u3002\u5176\u4e2d\u4e00\u4e2a\u5e38\u7528\u7684\u5de5\u5177\u662f <code>requests<\/code> \u6a21\u5757\u3002\u8fd9\u4e2a\u6a21\u5757\u53ef\u4ee5\u5e2e\u52a9\u6211\u4eec\u53d1\u9001 HTTP \u8bf7\u6c42\uff0c\u5e76\u4e14\u81ea\u52a8\u5904\u7406\u8fde\u63a5\u6c60\u3001\u8ba4\u8bc1\u3001Cookies \u7b49\u65b9\u9762\u7684\u7ec6\u8282\u3002\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528\u8fd9\u4e2a\u6a21\u5757\u53d1\u9001 GET\u3001POST\u3001PUT\u3001DELETE \u7b49\u8bf7\u6c42\uff0c\u5e76\u4e14\u53ef\u4ee5\u81ea\u5b9a\u4e49\u8bf7\u6c42\u5934\u3001\u8bf7\u6c42\u4f53\u7b49\u4fe1\u606f\u3002<\/p>\n\n\n\n<p>\u4e3e\u4e2a\u4f8b\u5b50\uff0c\u5982\u679c\u6211\u4eec\u60f3\u8981\u4ece\u4e00\u4e2a\u7535\u5546\u7f51\u7ad9\u4e0a\u722c\u53d6\u5546\u54c1\u4fe1\u606f\uff0c\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528 <code>requests<\/code> \u6a21\u5757\u5411\u8fd9\u4e2a\u7f51\u7ad9\u53d1\u9001\u8bf7\u6c42\uff0c\u83b7\u53d6\u7f51\u9875\u5185\u5bb9\u3002\u7136\u540e\uff0c\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528\u4e00\u4e9b\u6280\u672f\uff08\u6bd4\u5982\u6b63\u5219\u8868\u8fbe\u5f0f\u6216\u8005\u89e3\u6790\u5e93\uff09\u6765\u89e3\u6790\u7f51\u9875\u5185\u5bb9\uff0c\u63d0\u53d6\u51fa\u6211\u4eec\u9700\u8981\u7684\u5546\u54c1\u4fe1\u606f\uff08\u6bd4\u5982\u5546\u54c1\u540d\u79f0\u3001\u4ef7\u683c\u3001\u8bc4\u4ef7\u7b49\u7b49\uff09\u3002\u6700\u540e\uff0c\u6211\u4eec\u53ef\u4ee5\u5c06\u8fd9\u4e9b\u4fe1\u606f\u4fdd\u5b58\u5230\u672c\u5730\u6587\u4ef6\u6216\u8005\u6570\u636e\u5e93\u4e2d\uff0c\u4ee5\u4fbf\u540e\u7eed\u5206\u6790\u548c\u4f7f\u7528\u3002<\/p>\n\n\n\n<p>\u603b\u4e4b\uff0c\u7f51\u9875\u722c\u866b\u548c <code>requests<\/code> \u6a21\u5757\u662f\u5b9e\u73b0\u7f51\u9875\u722c\u53d6\u7684\u4e24\u4e2a\u91cd\u8981\u5de5\u5177\u3002\u5b83\u4eec\u53ef\u4ee5\u5e2e\u52a9\u6211\u4eec\u83b7\u53d6\u7f51\u9875\u5185\u5bb9\uff0c\u5e76\u4e14\u63d0\u53d6\u6709\u7528\u4fe1\u606f\u3002<\/p>\n\n\n\n<!--more-->\n<\/div>\n\n\n\n<p>\u5bf9\u4e8e\u5728windows\u600e\u4e48\u90e8\u7f72\u600e\u4e48\u8fd0\u884cpython\u5c1a\u4e0d\u719f\u6089\u7684\u7ad9\u53cb\u8bf7\u79fb\u6b65\uff1a<a href=\"http:\/\/xinyixx.com\/index.php\/2023\/05\/27\/python1\/\" target=\"_blank\" rel=\"noopener\" title=\"\u8fd9\u91cc\">\u8fd9\u91cc<\/a><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-vivid-red-color\">import requests\nfrom bs4 import BeautifulSoup\n\n# \u68c0\u67e5URL\u662f\u5426\u5408\u6cd5\ndef check_url(url):\n    try:\n        response = requests.get(url)\n        response.raise_for_status()\n        return True\n    except:\n        return False\n\n# \u68c0\u67e5\u722c\u53d6\u7c7b\u578b\u662f\u5426\u5408\u6cd5\ndef check_type(html, type):\n    soup = BeautifulSoup(html, 'html.parser')\n    items = soup.select(type)\n    return len(items) &gt; 0\n\n# \u63d0\u793a\u7528\u6237\u8f93\u5165\u8981\u722c\u53d6\u7684\u7f51\u7ad9\u548c\u9700\u8981\u722c\u53d6\u7684\u7c7b\u578b\nwhile True:\n    url = input('\u8bf7\u8f93\u5165\u8981\u722c\u53d6\u7684\u7f51\u7ad9URL\uff1a')\n    if check_url(url):\n        break\n    else:\n        print('URL\u65e0\u6548\uff0c\u8bf7\u91cd\u65b0\u8f93\u5165\u3002')\n\nwhile True:\n    type = input('\u8bf7\u8f93\u5165\u9700\u8981\u722c\u53d6\u7684\u7c7b\u578b\uff1a')\n    response = requests.get(url)\n    html = response.text\n    if check_type(html, type):\n        break\n    else:\n        print('\u722c\u53d6\u7c7b\u578b\u65e0\u6548\uff0c\u8bf7\u91cd\u65b0\u8f93\u5165\u3002')\n\n# \u53d1\u9001\u8bf7\u6c42\uff0c\u83b7\u53d6\u7f51\u9875\u5185\u5bb9\nresponse = requests.get(url)\nhtml = response.text\n\n# \u89e3\u6790\u7f51\u9875\u5185\u5bb9\uff0c\u83b7\u53d6\u9700\u8981\u722c\u53d6\u7684\u5185\u5bb9\nsoup = BeautifulSoup(html, 'html.parser')\nitems = soup.select(type)\n\n# \u5c06\u7ed3\u679c\u8f93\u51fa\u5230\u6587\u672c\u6587\u4ef6\nfilename = 'result.txt'\nwith open(filename, 'w', encoding='utf-8') as f:\n    for item in items:\n        f.write(str(item) + '\\n')\nprint(f'\u7ed3\u679c\u5df2\u4fdd\u5b58\u5230{filename}\u6587\u4ef6\u4e2d\u3002')<\/mark>\n<\/code><\/pre>\n\n\n\n<p>\u6309\u4e0a\u9762\u4ee3\u7801\u6d4b\u8bd5\u8f93\u51fa\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><noscript><img decoding=\"async\" width=\"1024\" height=\"794\" src=\"http:\/\/xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-1024x794.png\" alt class=\"wp-image-771\" srcset=\"https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-1024x794.png 1024w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-300x233.png 300w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-768x595.png 768w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-1536x1191.png 1536w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825.png 1647w\" sizes=\"(max-width: 1024px) 100vw, 1024px\"><\/noscript><img decoding=\"async\" width=\"1024\" height=\"794\" src=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%201024%20794%22%3E%3C%2Fsvg%3E\" alt class=\"wp-image-771 lazyload\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%201024%20794%22%3E%3C%2Fsvg%3E 1024w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" data-srcset=\"https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-1024x794.png 1024w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-300x233.png 300w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-768x595.png 768w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-1536x1191.png 1536w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825.png 1647w\" data-src=\"http:\/\/xinyixx.com\/wp-content\/uploads\/2023\/05\/560b757c4e42e472d2e2b0e16a5b825-1024x794.png\"><\/figure>\n\n\n\n<p>\u80fd\u591f\u81ea\u52a8\u68c0\u6d4bURL\u548c\u722c\u53d6\u7684html\u7c7b\u578b\u662f\u5426\u6b63\u786e\uff0c\u9519\u8bef\u540e\u4f1a\u8fd4\u56de\u91cd\u65b0\u8f93\u5165\u3002\u722c\u53d6\u7c7b\u578b\u5305\u62ec\uff1adiv\/li\/a\/link\/meta\/img\/p\/ul,\u7b49\u7b49\uff0c\u5728\u5f53\u6587\u4ef6\u5939\u4e0b\u751f\u6210\u6587\u672c\u6587\u4ef6result.txt\uff0c\u6211\u4eec\u6253\u5f00\u770b\u4e00\u4e0b\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><noscript><img decoding=\"async\" width=\"694\" height=\"756\" src=\"http:\/\/xinyixx.com\/wp-content\/uploads\/2023\/05\/image-10.png\" alt class=\"wp-image-772\" srcset=\"https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/image-10.png 694w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/image-10-275x300.png 275w\" sizes=\"(max-width: 694px) 100vw, 694px\"><\/noscript><img decoding=\"async\" width=\"694\" height=\"756\" src=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%20694%20756%22%3E%3C%2Fsvg%3E\" alt class=\"wp-image-772 lazyload\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%20694%20756%22%3E%3C%2Fsvg%3E 694w\" sizes=\"(max-width: 694px) 100vw, 694px\" data-srcset=\"https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/image-10.png 694w, https:\/\/www.xinyixx.com\/wp-content\/uploads\/2023\/05\/image-10-275x300.png 275w\" data-src=\"http:\/\/xinyixx.com\/wp-content\/uploads\/2023\/05\/image-10.png\"><\/figure>\n\n\n\n<p>\u7b80\u5355\u7684\u5b9e\u73b0\u539f\u7406\u5c31\u662f\u4e0a\u9762\u5566\uff0c\u7136\u540e\u5c31\u7528pyinstaller\u5c01\u88c5\u751f\u6210exe\u6587\u4ef6\u5c31\u53ef\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u7f51\u9875\u722c\u866b\u662f\u4e00\u79cd\u7a0b\u5e8f\uff0c\u5b83\u53ef\u4ee5\u6a21\u62df\u4eba\u7c7b\u6d4f\u89c8\u5668\u7684\u884c\u4e3a\uff0c\u5411\u76ee\u6807\u7f51\u7ad9\u53d1\u9001\u8bf7\u6c42\uff0c\u83b7\u53d6\u7f51\u9875\u5185\u5bb9\uff0c\u518d\u901a\u8fc7\u89e3\u6790\u7f51\u9875\u5185\u5bb9\u63d0\u53d6\u6709\u7528\u4fe1 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_import_markdown_pro_load_document_selector":0,"_import_markdown_pro_submit_text_textarea":"","footnotes":""},"categories":[14,10,7],"tags":[71,83,84,85],"class_list":["post-770","post","type-post","status-publish","format-standard","hentry","category-teacher","category-coding","category-software","tag-python","tag-spider","tag-worm","tag-exe","entry"],"_links":{"self":[{"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/posts\/770","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/comments?post=770"}],"version-history":[{"count":0,"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/posts\/770\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/media?parent=770"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/categories?post=770"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.xinyixx.com\/index.php\/wp-json\/wp\/v2\/tags?post=770"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}